]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.md
i386: Fix TARGET_USE_VECTOR_FP_CONVERTS SF->DF float_extend splitter [PR113133]
[thirdparty/gcc.git] / gcc / config / i386 / i386.md
1 ;; GCC machine description for IA-32 and x86-64.
2 ;; Copyright (C) 1988-2023 Free Software Foundation, Inc.
3 ;; Mostly by William Schelter.
4 ;; x86_64 support added by Jan Hubicka
5 ;;
6 ;; This file is part of GCC.
7 ;;
8 ;; GCC is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; any later version.
12 ;;
13 ;; GCC is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
17 ;;
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GCC; see the file COPYING3. If not see
20 ;; <http://www.gnu.org/licenses/>. */
21 ;;
22 ;; The original PO technology requires these to be ordered by speed,
23 ;; so that assigner will pick the fastest.
24 ;;
25 ;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
26 ;;
27 ;; The special asm out single letter directives following a '%' are:
28 ;; L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
29 ;; C -- print opcode suffix for set/cmov insn.
30 ;; c -- like C, but print reversed condition
31 ;; F,f -- likewise, but for floating-point.
32 ;; O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
33 ;; otherwise nothing
34 ;; R -- print the prefix for register names.
35 ;; z -- print the opcode suffix for the size of the current operand.
36 ;; Z -- likewise, with special suffixes for x87 instructions.
37 ;; * -- print a star (in certain assembler syntax)
38 ;; A -- print an absolute memory reference.
39 ;; E -- print address with DImode register names if TARGET_64BIT.
40 ;; w -- print the operand as if it's a "word" (HImode) even if it isn't.
41 ;; s -- print a shift double count, followed by the assemblers argument
42 ;; delimiter.
43 ;; b -- print the QImode name of the register for the indicated operand.
44 ;; %b0 would print %al if operands[0] is reg 0.
45 ;; w -- likewise, print the HImode name of the register.
46 ;; k -- likewise, print the SImode name of the register.
47 ;; q -- likewise, print the DImode name of the register.
48 ;; x -- likewise, print the V4SFmode name of the register.
49 ;; t -- likewise, print the V8SFmode name of the register.
50 ;; h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
51 ;; y -- print "st(0)" instead of "st" as a register.
52 ;; d -- print duplicated register operand for AVX instruction.
53 ;; D -- print condition for SSE cmp instruction.
54 ;; P -- if PIC, print an @PLT suffix.
55 ;; p -- print raw symbol name.
56 ;; X -- don't print any sort of PIC '@' suffix for a symbol.
57 ;; & -- print some in-use local-dynamic symbol name.
58 ;; H -- print a memory address offset by 8; used for sse high-parts
59 ;; K -- print HLE lock prefix
60 ;; Y -- print condition for XOP pcom* instruction.
61 ;; + -- print a branch hint as 'cs' or 'ds' prefix
62 ;; ; -- print a semicolon (after prefixes due to bug in older gas).
63 ;; ~ -- print "i" if TARGET_AVX2, "f" otherwise.
64 ;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
65 ;; ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
66
67 (define_c_enum "unspec" [
68 ;; Relocation specifiers
69 UNSPEC_GOT
70 UNSPEC_GOTOFF
71 UNSPEC_GOTPCREL
72 UNSPEC_GOTTPOFF
73 UNSPEC_TPOFF
74 UNSPEC_NTPOFF
75 UNSPEC_DTPOFF
76 UNSPEC_GOTNTPOFF
77 UNSPEC_INDNTPOFF
78 UNSPEC_PLTOFF
79 UNSPEC_MACHOPIC_OFFSET
80 UNSPEC_PCREL
81 UNSPEC_SIZEOF
82
83 ;; Prologue support
84 UNSPEC_STACK_ALLOC
85 UNSPEC_SET_GOT
86 UNSPEC_SET_RIP
87 UNSPEC_SET_GOT_OFFSET
88 UNSPEC_MEMORY_BLOCKAGE
89 UNSPEC_PROBE_STACK
90
91 ;; TLS support
92 UNSPEC_TP
93 UNSPEC_TLS_GD
94 UNSPEC_TLS_LD_BASE
95 UNSPEC_TLSDESC
96 UNSPEC_TLS_IE_SUN
97
98 ;; Other random patterns
99 UNSPEC_SCAS
100 UNSPEC_FNSTSW
101 UNSPEC_SAHF
102 UNSPEC_NOTRAP
103 UNSPEC_PARITY
104 UNSPEC_FSTCW
105 UNSPEC_REP
106 UNSPEC_LD_MPIC ; load_macho_picbase
107 UNSPEC_TRUNC_NOOP
108 UNSPEC_DIV_ALREADY_SPLIT
109 UNSPEC_PAUSE
110 UNSPEC_LEA_ADDR
111 UNSPEC_XBEGIN_ABORT
112 UNSPEC_STOS
113 UNSPEC_PEEPSIB
114 UNSPEC_INSN_FALSE_DEP
115 UNSPEC_SBB
116 UNSPEC_CC_NE
117 UNSPEC_STC
118 UNSPEC_PUSHFL
119 UNSPEC_POPFL
120
121 ;; For SSE/MMX support:
122 UNSPEC_FIX_NOTRUNC
123 UNSPEC_MASKMOV
124 UNSPEC_MOVCC_MASK
125 UNSPEC_MOVMSK
126 UNSPEC_INSERTPS
127 UNSPEC_BLENDV
128 UNSPEC_PSHUFB
129 UNSPEC_XOP_PERMUTE
130 UNSPEC_RCP
131 UNSPEC_RSQRT
132 UNSPEC_PSADBW
133
134 ;; Different from generic us_truncate RTX
135 ;; as it does unsigned saturation of signed source.
136 UNSPEC_US_TRUNCATE
137
138 ;; For AVX/AVX512F support
139 UNSPEC_SCALEF
140 UNSPEC_PCMP
141 UNSPEC_CVTBFSF
142
143 ;; Generic math support
144 UNSPEC_IEEE_MIN ; not commutative
145 UNSPEC_IEEE_MAX ; not commutative
146
147 ;; x87 Floating point
148 UNSPEC_SIN
149 UNSPEC_COS
150 UNSPEC_FPATAN
151 UNSPEC_FYL2X
152 UNSPEC_FYL2XP1
153 UNSPEC_FRNDINT
154 UNSPEC_FIST
155 UNSPEC_F2XM1
156 UNSPEC_TAN
157 UNSPEC_FXAM
158
159 ;; x87 Rounding
160 UNSPEC_FRNDINT_ROUNDEVEN
161 UNSPEC_FRNDINT_FLOOR
162 UNSPEC_FRNDINT_CEIL
163 UNSPEC_FRNDINT_TRUNC
164 UNSPEC_FIST_FLOOR
165 UNSPEC_FIST_CEIL
166
167 ;; x87 Double output FP
168 UNSPEC_SINCOS_COS
169 UNSPEC_SINCOS_SIN
170 UNSPEC_XTRACT_FRACT
171 UNSPEC_XTRACT_EXP
172 UNSPEC_FSCALE_FRACT
173 UNSPEC_FSCALE_EXP
174 UNSPEC_FPREM_F
175 UNSPEC_FPREM_U
176 UNSPEC_FPREM1_F
177 UNSPEC_FPREM1_U
178
179 UNSPEC_C2_FLAG
180 UNSPEC_FXAM_MEM
181
182 ;; SSP patterns
183 UNSPEC_SP_SET
184 UNSPEC_SP_TEST
185
186 ;; For ROUND support
187 UNSPEC_ROUND
188
189 ;; For CRC32 support
190 UNSPEC_CRC32
191
192 ;; For LZCNT suppoprt
193 UNSPEC_LZCNT
194
195 ;; For BMI support
196 UNSPEC_TZCNT
197 UNSPEC_BEXTR
198
199 ;; For BMI2 support
200 UNSPEC_PDEP
201 UNSPEC_PEXT
202
203 ;; IRET support
204 UNSPEC_INTERRUPT_RETURN
205
206 ;; For MOVDIRI and MOVDIR64B support
207 UNSPEC_MOVDIRI
208 UNSPEC_MOVDIR64B
209
210 ;; For insn_callee_abi:
211 UNSPEC_CALLEE_ABI
212
213 ;; For APX PUSH2/POP2 support
214 UNSPEC_APXPUSH2
215 UNSPEC_APXPOP2_LOW
216 UNSPEC_APXPOP2_HIGH
217
218 ;; For APX PPX support
219 UNSPEC_APX_PPX
220 ])
221
222 (define_c_enum "unspecv" [
223 UNSPECV_UD2
224 UNSPECV_BLOCKAGE
225 UNSPECV_STACK_PROBE
226 UNSPECV_PROBE_STACK_RANGE
227 UNSPECV_ALIGN
228 UNSPECV_PROLOGUE_USE
229 UNSPECV_SPLIT_STACK_RETURN
230 UNSPECV_CLD
231 UNSPECV_NOPS
232 UNSPECV_RDTSC
233 UNSPECV_RDTSCP
234 UNSPECV_RDPMC
235 UNSPECV_LLWP_INTRINSIC
236 UNSPECV_SLWP_INTRINSIC
237 UNSPECV_LWPVAL_INTRINSIC
238 UNSPECV_LWPINS_INTRINSIC
239 UNSPECV_RDFSBASE
240 UNSPECV_RDGSBASE
241 UNSPECV_WRFSBASE
242 UNSPECV_WRGSBASE
243 UNSPECV_FXSAVE
244 UNSPECV_FXRSTOR
245 UNSPECV_FXSAVE64
246 UNSPECV_FXRSTOR64
247 UNSPECV_XSAVE
248 UNSPECV_XRSTOR
249 UNSPECV_XSAVE64
250 UNSPECV_XRSTOR64
251 UNSPECV_XSAVEOPT
252 UNSPECV_XSAVEOPT64
253 UNSPECV_XSAVES
254 UNSPECV_XRSTORS
255 UNSPECV_XSAVES64
256 UNSPECV_XRSTORS64
257 UNSPECV_XSAVEC
258 UNSPECV_XSAVEC64
259 UNSPECV_XGETBV
260 UNSPECV_XSETBV
261 UNSPECV_WBINVD
262 UNSPECV_WBNOINVD
263
264 ;; For atomic compound assignments.
265 UNSPECV_FNSTENV
266 UNSPECV_FLDENV
267 UNSPECV_FNSTSW
268 UNSPECV_FNCLEX
269
270 ;; For RDRAND support
271 UNSPECV_RDRAND
272
273 ;; For RDSEED support
274 UNSPECV_RDSEED
275
276 ;; For RTM support
277 UNSPECV_XBEGIN
278 UNSPECV_XEND
279 UNSPECV_XABORT
280 UNSPECV_XTEST
281
282 UNSPECV_NLGR
283
284 ;; For CLWB support
285 UNSPECV_CLWB
286
287 ;; For CLFLUSHOPT support
288 UNSPECV_CLFLUSHOPT
289
290 ;; For MONITORX and MWAITX support
291 UNSPECV_MONITORX
292 UNSPECV_MWAITX
293
294 ;; For CLZERO support
295 UNSPECV_CLZERO
296
297 ;; For RDPKRU and WRPKRU support
298 UNSPECV_PKU
299
300 ;; For RDPID support
301 UNSPECV_RDPID
302
303 ;; For CET support
304 UNSPECV_NOP_ENDBR
305 UNSPECV_NOP_RDSSP
306 UNSPECV_INCSSP
307 UNSPECV_SAVEPREVSSP
308 UNSPECV_RSTORSSP
309 UNSPECV_WRSS
310 UNSPECV_WRUSS
311 UNSPECV_SETSSBSY
312 UNSPECV_CLRSSBSY
313
314 ;; For TSXLDTRK support
315 UNSPECV_XSUSLDTRK
316 UNSPECV_XRESLDTRK
317
318 ;; For WAITPKG support
319 UNSPECV_UMWAIT
320 UNSPECV_UMONITOR
321 UNSPECV_TPAUSE
322
323 ;; For UINTR support
324 UNSPECV_CLUI
325 UNSPECV_STUI
326 UNSPECV_TESTUI
327 UNSPECV_SENDUIPI
328
329 ;; For CLDEMOTE support
330 UNSPECV_CLDEMOTE
331
332 ;; For Speculation Barrier support
333 UNSPECV_SPECULATION_BARRIER
334
335 UNSPECV_PTWRITE
336
337 ;; For ENQCMD and ENQCMDS support
338 UNSPECV_ENQCMD
339 UNSPECV_ENQCMDS
340
341 ;; For SERIALIZE support
342 UNSPECV_SERIALIZE
343
344 ;; For patchable area support
345 UNSPECV_PATCHABLE_AREA
346
347 ;; For HRESET support
348 UNSPECV_HRESET
349
350 ;; For PREFETCHI support
351 UNSPECV_PREFETCHI
352
353 ;; For USER_MSR support
354 UNSPECV_URDMSR
355 UNSPECV_UWRMSR
356 ])
357
358 ;; Constants to represent rounding modes in the ROUND instruction
359 (define_constants
360 [(ROUND_ROUNDEVEN 0x0)
361 (ROUND_FLOOR 0x1)
362 (ROUND_CEIL 0x2)
363 (ROUND_TRUNC 0x3)
364 (ROUND_MXCSR 0x4)
365 (ROUND_NO_EXC 0x8)
366 ])
367
368 ;; Constants to represent AVX512F embeded rounding
369 (define_constants
370 [(ROUND_NEAREST_INT 0)
371 (ROUND_NEG_INF 1)
372 (ROUND_POS_INF 2)
373 (ROUND_ZERO 3)
374 (NO_ROUND 4)
375 (ROUND_SAE 8)
376 ])
377
378 ;; Constants to represent pcomtrue/pcomfalse variants
379 (define_constants
380 [(PCOM_FALSE 0)
381 (PCOM_TRUE 1)
382 (COM_FALSE_S 2)
383 (COM_FALSE_P 3)
384 (COM_TRUE_S 4)
385 (COM_TRUE_P 5)
386 ])
387
388 ;; Constants used in the XOP pperm instruction
389 (define_constants
390 [(PPERM_SRC 0x00) /* copy source */
391 (PPERM_INVERT 0x20) /* invert source */
392 (PPERM_REVERSE 0x40) /* bit reverse source */
393 (PPERM_REV_INV 0x60) /* bit reverse & invert src */
394 (PPERM_ZERO 0x80) /* all 0's */
395 (PPERM_ONES 0xa0) /* all 1's */
396 (PPERM_SIGN 0xc0) /* propagate sign bit */
397 (PPERM_INV_SIGN 0xe0) /* invert & propagate sign */
398 (PPERM_SRC1 0x00) /* use first source byte */
399 (PPERM_SRC2 0x10) /* use second source byte */
400 ])
401
402 ;; Registers by name.
403 (define_constants
404 [(AX_REG 0)
405 (DX_REG 1)
406 (CX_REG 2)
407 (BX_REG 3)
408 (SI_REG 4)
409 (DI_REG 5)
410 (BP_REG 6)
411 (SP_REG 7)
412 (ST0_REG 8)
413 (ST1_REG 9)
414 (ST2_REG 10)
415 (ST3_REG 11)
416 (ST4_REG 12)
417 (ST5_REG 13)
418 (ST6_REG 14)
419 (ST7_REG 15)
420 (ARGP_REG 16)
421 (FLAGS_REG 17)
422 (FPSR_REG 18)
423 (FRAME_REG 19)
424 (XMM0_REG 20)
425 (XMM1_REG 21)
426 (XMM2_REG 22)
427 (XMM3_REG 23)
428 (XMM4_REG 24)
429 (XMM5_REG 25)
430 (XMM6_REG 26)
431 (XMM7_REG 27)
432 (MM0_REG 28)
433 (MM1_REG 29)
434 (MM2_REG 30)
435 (MM3_REG 31)
436 (MM4_REG 32)
437 (MM5_REG 33)
438 (MM6_REG 34)
439 (MM7_REG 35)
440 (R8_REG 36)
441 (R9_REG 37)
442 (R10_REG 38)
443 (R11_REG 39)
444 (R12_REG 40)
445 (R13_REG 41)
446 (R14_REG 42)
447 (R15_REG 43)
448 (XMM8_REG 44)
449 (XMM9_REG 45)
450 (XMM10_REG 46)
451 (XMM11_REG 47)
452 (XMM12_REG 48)
453 (XMM13_REG 49)
454 (XMM14_REG 50)
455 (XMM15_REG 51)
456 (XMM16_REG 52)
457 (XMM17_REG 53)
458 (XMM18_REG 54)
459 (XMM19_REG 55)
460 (XMM20_REG 56)
461 (XMM21_REG 57)
462 (XMM22_REG 58)
463 (XMM23_REG 59)
464 (XMM24_REG 60)
465 (XMM25_REG 61)
466 (XMM26_REG 62)
467 (XMM27_REG 63)
468 (XMM28_REG 64)
469 (XMM29_REG 65)
470 (XMM30_REG 66)
471 (XMM31_REG 67)
472 (MASK0_REG 68)
473 (MASK1_REG 69)
474 (MASK2_REG 70)
475 (MASK3_REG 71)
476 (MASK4_REG 72)
477 (MASK5_REG 73)
478 (MASK6_REG 74)
479 (MASK7_REG 75)
480 (R16_REG 76)
481 (R17_REG 77)
482 (R18_REG 78)
483 (R19_REG 79)
484 (R20_REG 80)
485 (R21_REG 81)
486 (R22_REG 82)
487 (R23_REG 83)
488 (R24_REG 84)
489 (R25_REG 85)
490 (R26_REG 86)
491 (R27_REG 87)
492 (R28_REG 88)
493 (R29_REG 89)
494 (R30_REG 90)
495 (R31_REG 91)
496 (FIRST_PSEUDO_REG 92)
497 ])
498
499 ;; Insn callee abi index.
500 (define_constants
501 [(ABI_DEFAULT 0)
502 (ABI_VZEROUPPER 1)
503 (ABI_UNKNOWN 2)])
504
505 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
506 ;; from i386.cc.
507
508 ;; In C guard expressions, put expressions which may be compile-time
509 ;; constants first. This allows for better optimization. For
510 ;; example, write "TARGET_64BIT && reload_completed", not
511 ;; "reload_completed && TARGET_64BIT".
512
513 \f
514 ;; Processor type.
515 (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
516 atom,slm,glm,haswell,generic,lujiazui,yongfeng,amdfam10,bdver1,
517 bdver2,bdver3,bdver4,btver2,znver1,znver2,znver3,znver4"
518 (const (symbol_ref "ix86_schedule")))
519
520 ;; A basic instruction type. Refinements due to arguments to be
521 ;; provided in other attributes.
522 (define_attr "type"
523 "other,multi,
524 alu,alu1,negnot,imov,imovx,lea,
525 incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,
526 imul,imulx,idiv,icmp,test,ibr,setcc,icmov,
527 push,pop,call,callv,leave,
528 str,bitmanip,
529 fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
530 fxch,fistp,fisttp,frndint,
531 sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
532 ssemul,sseimul,ssediv,sselog,sselog1,
533 sseishft,sseishft1,ssecmp,ssecomi,
534 ssecvt,ssecvt1,sseicvt,sseins,
535 sseshuf,sseshuf1,ssemuladd,sse4arg,
536 lwp,mskmov,msklog,
537 mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
538 (const_string "other"))
539
540 ;; Main data type used by the insn
541 (define_attr "mode"
542 "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
543 V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V4BF,V2HF,V2BF"
544 (const_string "unknown"))
545
546 ;; The CPU unit operations uses.
547 (define_attr "unit" "integer,i387,sse,mmx,unknown"
548 (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
549 fxch,fistp,fisttp,frndint")
550 (const_string "i387")
551 (eq_attr "type" "sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
552 ssemul,sseimul,ssediv,sselog,sselog1,
553 sseishft,sseishft1,ssecmp,ssecomi,
554 ssecvt,ssecvt1,sseicvt,sseins,
555 sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov")
556 (const_string "sse")
557 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
558 (const_string "mmx")
559 (eq_attr "type" "other")
560 (const_string "unknown")]
561 (const_string "integer")))
562
563 ;; Used to control the "enabled" attribute on a per-instruction basis.
564 (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
565 x64_avx,x64_avx512bw,x64_avx512dq,aes,apx_ndd,
566 sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
567 avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512,
568 noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq,
569 noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
570 avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
571 avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl"
572 (const_string "base"))
573
574 ;; The (bounding maximum) length of an instruction immediate.
575 (define_attr "length_immediate" ""
576 (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
577 bitmanip,imulx,msklog,mskmov")
578 (const_int 0)
579 (ior (eq_attr "type" "sse4arg")
580 (eq_attr "isa" "fma4"))
581 (const_int 1)
582 (eq_attr "unit" "i387,sse,mmx")
583 (const_int 0)
584 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1,
585 rotate,rotatex,rotate1,imul,icmp,push,pop")
586 (symbol_ref "ix86_attr_length_immediate_default (insn, true)")
587 (eq_attr "type" "imov,test")
588 (symbol_ref "ix86_attr_length_immediate_default (insn, false)")
589 (eq_attr "type" "call")
590 (if_then_else (match_operand 0 "constant_call_address_operand")
591 (const_int 4)
592 (const_int 0))
593 (eq_attr "type" "callv")
594 (if_then_else (match_operand 1 "constant_call_address_operand")
595 (const_int 4)
596 (const_int 0))
597 ;; We don't know the size before shorten_branches. Expect
598 ;; the instruction to fit for better scheduling.
599 (eq_attr "type" "ibr")
600 (const_int 1)
601 ]
602 (symbol_ref "/* Update immediate_length and other attributes! */
603 gcc_unreachable (),1")))
604
605 ;; The (bounding maximum) length of an instruction address.
606 (define_attr "length_address" ""
607 (cond [(eq_attr "type" "str,other,multi,fxch")
608 (const_int 0)
609 (and (eq_attr "type" "call")
610 (match_operand 0 "constant_call_address_operand"))
611 (const_int 0)
612 (and (eq_attr "type" "callv")
613 (match_operand 1 "constant_call_address_operand"))
614 (const_int 0)
615 ]
616 (symbol_ref "ix86_attr_length_address_default (insn)")))
617
618 ;; Set when length prefix is used.
619 (define_attr "prefix_data16" ""
620 (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
621 (const_int 0)
622 (eq_attr "mode" "HI")
623 (const_int 1)
624 (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF,TI"))
625 (const_int 1)
626 ]
627 (const_int 0)))
628
629 ;; Set when string REP prefix is used.
630 (define_attr "prefix_rep" ""
631 (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
632 (const_int 0)
633 (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF"))
634 (const_int 1)
635 ]
636 (const_int 0)))
637
638 ;; Set when 0f opcode prefix is used.
639 (define_attr "prefix_0f" ""
640 (if_then_else
641 (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov")
642 (eq_attr "unit" "sse,mmx"))
643 (const_int 1)
644 (const_int 0)))
645
646 ;; Set when REX opcode prefix is used.
647 (define_attr "prefix_rex" ""
648 (cond [(not (match_test "TARGET_64BIT"))
649 (const_int 0)
650 (and (eq_attr "mode" "DI")
651 (and (eq_attr "type" "!push,pop,call,callv,leave,ibr")
652 (eq_attr "unit" "!mmx")))
653 (const_int 1)
654 (and (eq_attr "mode" "QI")
655 (match_test "x86_extended_QIreg_mentioned_p (insn)"))
656 (const_int 1)
657 (match_test "x86_extended_reg_mentioned_p (insn)")
658 (const_int 1)
659 (and (eq_attr "type" "imovx")
660 (match_operand:QI 1 "ext_QIreg_operand"))
661 (const_int 1)
662 ]
663 (const_int 0)))
664
665 ;; There are also additional prefixes in 3DNOW, SSSE3.
666 ;; 3DNOW has 0f0f prefix, SSSE3 and SSE4_{1,2} 0f38/0f3a.
667 ;; While generally inapplicable to VEX/XOP/EVEX encodings, "length_vex" uses
668 ;; the attribute evaluating to zero to know that VEX2 encoding may be usable.
669 (define_attr "prefix_extra" ""
670 (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
671 (const_int 1)
672 ]
673 (const_int 0)))
674
675 ;; Prefix used: original, VEX or maybe VEX.
676 (define_attr "prefix" "orig,vex,maybe_vex,evex,maybe_evex"
677 (cond [(eq_attr "mode" "OI,V8SF,V4DF")
678 (const_string "vex")
679 (eq_attr "mode" "XI,V16SF,V8DF")
680 (const_string "evex")
681 (eq_attr "type" "ssemuladd")
682 (if_then_else (eq_attr "isa" "fma4")
683 (const_string "vex")
684 (const_string "maybe_evex"))
685 (eq_attr "type" "sse4arg")
686 (const_string "vex")
687 ]
688 (const_string "orig")))
689
690 ;; VEX W bit is used.
691 (define_attr "prefix_vex_w" "" (const_int 0))
692
693 ;; The length of VEX prefix
694 ;; Only instructions with 0f prefix can have 2 byte VEX prefix,
695 ;; 0f38/0f3a prefixes can't. In i386.md 0f3[8a] is
696 ;; still prefix_0f 1, with prefix_extra 1.
697 (define_attr "length_vex" ""
698 (if_then_else (and (eq_attr "prefix_0f" "1")
699 (eq_attr "prefix_extra" "0"))
700 (if_then_else (eq_attr "prefix_vex_w" "1")
701 (symbol_ref "ix86_attr_length_vex_default (insn, true, true)")
702 (symbol_ref "ix86_attr_length_vex_default (insn, true, false)"))
703 (if_then_else (eq_attr "prefix_vex_w" "1")
704 (symbol_ref "ix86_attr_length_vex_default (insn, false, true)")
705 (symbol_ref "ix86_attr_length_vex_default (insn, false, false)"))))
706
707 ;; 4-bytes evex prefix and 1 byte opcode.
708 (define_attr "length_evex" "" (const_int 5))
709
710 ;; Set when modrm byte is used.
711 (define_attr "modrm" ""
712 (cond [(eq_attr "type" "str,leave")
713 (const_int 0)
714 (eq_attr "unit" "i387")
715 (const_int 0)
716 (and (eq_attr "type" "incdec")
717 (and (not (match_test "TARGET_64BIT"))
718 (ior (match_operand:SI 1 "register_operand")
719 (match_operand:HI 1 "register_operand"))))
720 (const_int 0)
721 (and (eq_attr "type" "push")
722 (not (match_operand 1 "memory_operand")))
723 (const_int 0)
724 (and (eq_attr "type" "pop")
725 (not (match_operand 0 "memory_operand")))
726 (const_int 0)
727 (and (eq_attr "type" "imov")
728 (and (not (eq_attr "mode" "DI"))
729 (ior (and (match_operand 0 "register_operand")
730 (match_operand 1 "immediate_operand"))
731 (ior (and (match_operand 0 "ax_reg_operand")
732 (match_operand 1 "memory_displacement_only_operand"))
733 (and (match_operand 0 "memory_displacement_only_operand")
734 (match_operand 1 "ax_reg_operand"))))))
735 (const_int 0)
736 (and (eq_attr "type" "call")
737 (match_operand 0 "constant_call_address_operand"))
738 (const_int 0)
739 (and (eq_attr "type" "callv")
740 (match_operand 1 "constant_call_address_operand"))
741 (const_int 0)
742 (and (eq_attr "type" "alu,alu1,icmp,test")
743 (match_operand 0 "ax_reg_operand"))
744 (symbol_ref "(get_attr_length_immediate (insn) <= (get_attr_mode (insn) != MODE_QI))")
745 ]
746 (const_int 1)))
747
748 ;; The (bounding maximum) length of an instruction in bytes.
749 ;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences.
750 ;; Later we may want to split them and compute proper length as for
751 ;; other insns.
752 (define_attr "length" ""
753 (cond [(eq_attr "type" "other,multi,fistp,frndint")
754 (const_int 16)
755 (eq_attr "type" "fcmp")
756 (const_int 4)
757 (eq_attr "unit" "i387")
758 (plus (const_int 2)
759 (plus (attr "prefix_data16")
760 (attr "length_address")))
761 (ior (eq_attr "prefix" "evex")
762 (and (ior (eq_attr "prefix" "maybe_evex")
763 (eq_attr "prefix" "maybe_vex"))
764 (match_test "TARGET_AVX512F")))
765 (plus (attr "length_evex")
766 (plus (attr "length_immediate")
767 (plus (attr "modrm")
768 (attr "length_address"))))
769 (ior (eq_attr "prefix" "vex")
770 (and (ior (eq_attr "prefix" "maybe_vex")
771 (eq_attr "prefix" "maybe_evex"))
772 (match_test "TARGET_AVX")))
773 (plus (attr "length_vex")
774 (plus (attr "length_immediate")
775 (plus (attr "modrm")
776 (attr "length_address"))))]
777 (plus (plus (attr "modrm")
778 (plus (attr "prefix_0f")
779 (plus (attr "prefix_rex")
780 (plus (attr "prefix_extra")
781 (const_int 1)))))
782 (plus (attr "prefix_rep")
783 (plus (attr "prefix_data16")
784 (plus (attr "length_immediate")
785 (attr "length_address")))))))
786
787 ;; The `memory' attribute is `none' if no memory is referenced, `load' or
788 ;; `store' if there is a simple memory reference therein, or `unknown'
789 ;; if the instruction is complex.
790
791 (define_attr "memory" "none,load,store,both,unknown"
792 (cond [(eq_attr "type" "other,multi,str,lwp")
793 (const_string "unknown")
794 (eq_attr "type" "lea,fcmov,fpspc")
795 (const_string "none")
796 (eq_attr "type" "fistp,leave")
797 (const_string "both")
798 (eq_attr "type" "frndint")
799 (const_string "load")
800 (eq_attr "type" "push")
801 (if_then_else (match_operand 1 "memory_operand")
802 (const_string "both")
803 (const_string "store"))
804 (eq_attr "type" "pop")
805 (if_then_else (match_operand 0 "memory_operand")
806 (const_string "both")
807 (const_string "load"))
808 (eq_attr "type" "setcc")
809 (if_then_else (match_operand 0 "memory_operand")
810 (const_string "store")
811 (const_string "none"))
812 (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp")
813 (if_then_else (ior (match_operand 0 "memory_operand")
814 (match_operand 1 "memory_operand"))
815 (const_string "load")
816 (const_string "none"))
817 (eq_attr "type" "ibr")
818 (if_then_else (match_operand 0 "memory_operand")
819 (const_string "load")
820 (const_string "none"))
821 (eq_attr "type" "call")
822 (if_then_else (match_operand 0 "constant_call_address_operand")
823 (const_string "none")
824 (const_string "load"))
825 (eq_attr "type" "callv")
826 (if_then_else (match_operand 1 "constant_call_address_operand")
827 (const_string "none")
828 (const_string "load"))
829 (and (eq_attr "type" "alu1,negnot,ishift1,rotate1,sselog1,sseshuf1")
830 (match_operand 1 "memory_operand"))
831 (const_string "both")
832 (and (match_operand 0 "memory_operand")
833 (match_operand 1 "memory_operand"))
834 (const_string "both")
835 (match_operand 0 "memory_operand")
836 (const_string "store")
837 (match_operand 1 "memory_operand")
838 (const_string "load")
839 (and (eq_attr "type"
840 "!alu1,negnot,ishift1,rotate1,
841 imov,imovx,icmp,test,bitmanip,
842 fmov,fcmp,fsgn,
843 sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
844 sselog1,sseshuf1,sseadd1,sseiadd1,sseishft1,
845 mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog")
846 (match_operand 2 "memory_operand"))
847 (const_string "load")
848 (and (eq_attr "type" "icmov,ssemuladd,sse4arg")
849 (match_operand 3 "memory_operand"))
850 (const_string "load")
851 ]
852 (const_string "none")))
853
854 ;; Indicates if an instruction has both an immediate and a displacement.
855
856 (define_attr "imm_disp" "false,true,unknown"
857 (cond [(eq_attr "type" "other,multi")
858 (const_string "unknown")
859 (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1")
860 (and (match_operand 0 "memory_displacement_operand")
861 (match_operand 1 "immediate_operand")))
862 (const_string "true")
863 (and (eq_attr "type" "alu,ishift,ishiftx,rotate,rotatex,imul,idiv")
864 (and (match_operand 0 "memory_displacement_operand")
865 (match_operand 2 "immediate_operand")))
866 (const_string "true")
867 ]
868 (const_string "false")))
869
870 ;; Indicates if an FP operation has an integer source.
871
872 (define_attr "fp_int_src" "false,true"
873 (const_string "false"))
874
875 ;; Defines rounding mode of an FP operation.
876
877 (define_attr "i387_cw" "roundeven,floor,ceil,trunc,uninitialized,any"
878 (const_string "any"))
879
880 ;; Define attribute to indicate AVX insns with partial XMM register update.
881 (define_attr "avx_partial_xmm_update" "false,true"
882 (const_string "false"))
883
884 ;; Define attribute to classify add/sub insns that consumes carry flag (CF)
885 (define_attr "use_carry" "0,1" (const_string "0"))
886
887 ;; Define attribute to indicate unaligned ssemov insns
888 (define_attr "movu" "0,1" (const_string "0"))
889
890 ;; Define attribute to limit memory address register set.
891 (define_attr "addr" "gpr8,gpr16,gpr32" (const_string "gpr32"))
892
893 ;; Define instruction set of MMX instructions
894 (define_attr "mmx_isa" "base,native,sse,sse_noavx,avx"
895 (const_string "base"))
896
897 (define_attr "enabled" ""
898 (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
899 (eq_attr "isa" "nox64") (symbol_ref "!TARGET_64BIT")
900 (eq_attr "isa" "x64_sse2")
901 (symbol_ref "TARGET_64BIT && TARGET_SSE2")
902 (eq_attr "isa" "x64_sse4")
903 (symbol_ref "TARGET_64BIT && TARGET_SSE4_1")
904 (eq_attr "isa" "x64_sse4_noavx")
905 (symbol_ref "TARGET_64BIT && TARGET_SSE4_1 && !TARGET_AVX")
906 (eq_attr "isa" "x64_avx")
907 (symbol_ref "TARGET_64BIT && TARGET_AVX")
908 (eq_attr "isa" "x64_avx512bw")
909 (symbol_ref "TARGET_64BIT && TARGET_AVX512BW")
910 (eq_attr "isa" "x64_avx512dq")
911 (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ")
912 (eq_attr "isa" "aes") (symbol_ref "TARGET_AES")
913 (eq_attr "isa" "sse_noavx")
914 (symbol_ref "TARGET_SSE && !TARGET_AVX")
915 (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
916 (eq_attr "isa" "sse2_noavx")
917 (symbol_ref "TARGET_SSE2 && !TARGET_AVX")
918 (eq_attr "isa" "sse3") (symbol_ref "TARGET_SSE3")
919 (eq_attr "isa" "sse3_noavx")
920 (symbol_ref "TARGET_SSE3 && !TARGET_AVX")
921 (eq_attr "isa" "sse4") (symbol_ref "TARGET_SSE4_1")
922 (eq_attr "isa" "sse4_noavx")
923 (symbol_ref "TARGET_SSE4_1 && !TARGET_AVX")
924 (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
925 (eq_attr "isa" "avx_noavx512f")
926 (symbol_ref "TARGET_AVX && !TARGET_AVX512F")
927 (eq_attr "isa" "avx_noavx512vl")
928 (symbol_ref "TARGET_AVX && !TARGET_AVX512VL")
929 (eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
930 (eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2")
931 (eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2")
932 (eq_attr "isa" "bmi") (symbol_ref "TARGET_BMI")
933 (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
934 (eq_attr "isa" "fma4") (symbol_ref "TARGET_FMA4")
935 (eq_attr "isa" "fma") (symbol_ref "TARGET_FMA")
936 (eq_attr "isa" "fma_or_avx512vl")
937 (symbol_ref "TARGET_FMA || TARGET_AVX512VL")
938 (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F")
939 (eq_attr "isa" "avx512f_512")
940 (symbol_ref "TARGET_AVX512F && TARGET_EVEX512")
941 (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
942 (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW")
943 (eq_attr "isa" "avx512bw_512")
944 (symbol_ref "TARGET_AVX512BW && TARGET_EVEX512")
945 (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW")
946 (eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ")
947 (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
948 (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
949 (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
950 (eq_attr "isa" "avxvnni") (symbol_ref "TARGET_AVXVNNI")
951 (eq_attr "isa" "avx512vnnivl")
952 (symbol_ref "TARGET_AVX512VNNI && TARGET_AVX512VL")
953 (eq_attr "isa" "avx512fp16")
954 (symbol_ref "TARGET_AVX512FP16")
955 (eq_attr "isa" "avxifma") (symbol_ref "TARGET_AVXIFMA")
956 (eq_attr "isa" "avx512ifmavl")
957 (symbol_ref "TARGET_AVX512IFMA && TARGET_AVX512VL")
958 (eq_attr "isa" "avxneconvert") (symbol_ref "TARGET_AVXNECONVERT")
959 (eq_attr "isa" "avx512bf16vl")
960 (symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL")
961 (eq_attr "isa" "vpclmulqdqvl")
962 (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL")
963 (eq_attr "isa" "apx_ndd")
964 (symbol_ref "TARGET_APX_NDD")
965
966 (eq_attr "mmx_isa" "native")
967 (symbol_ref "!TARGET_MMX_WITH_SSE")
968 (eq_attr "mmx_isa" "sse")
969 (symbol_ref "TARGET_MMX_WITH_SSE")
970 (eq_attr "mmx_isa" "sse_noavx")
971 (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
972 (eq_attr "mmx_isa" "avx")
973 (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
974 ]
975 (const_int 1)))
976
977 (define_attr "preferred_for_size" "" (const_int 1))
978 (define_attr "preferred_for_speed" "" (const_int 1))
979
980 ;; Describe a user's asm statement.
981 (define_asm_attributes
982 [(set_attr "length" "128")
983 (set_attr "type" "multi")])
984
985 (define_code_iterator plusminus [plus minus])
986 (define_code_iterator plusminusmult [plus minus mult])
987 (define_code_iterator plusminusmultdiv [plus minus mult div])
988
989 (define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])
990
991 ;; Base name for insn mnemonic.
992 (define_code_attr plusminus_mnemonic
993 [(plus "add") (ss_plus "adds") (us_plus "addus")
994 (minus "sub") (ss_minus "subs") (us_minus "subus")])
995
996 (define_code_iterator multdiv [mult div])
997
998 (define_code_attr multdiv_mnemonic
999 [(mult "mul") (div "div")])
1000
1001 ;; Mark commutative operators as such in constraints.
1002 (define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
1003 (minus "") (ss_minus "") (us_minus "")
1004 (mult "%") (div "")])
1005
1006 ;; Mapping of max and min
1007 (define_code_iterator maxmin [smax smin umax umin])
1008
1009 ;; Mapping of signed max and min
1010 (define_code_iterator smaxmin [smax smin])
1011
1012 ;; Mapping of unsigned max and min
1013 (define_code_iterator umaxmin [umax umin])
1014
1015 ;; Base name for integer and FP insn mnemonic
1016 (define_code_attr maxmin_int [(smax "maxs") (smin "mins")
1017 (umax "maxu") (umin "minu")])
1018 (define_code_attr maxmin_float [(smax "max") (smin "min")])
1019
1020 (define_int_iterator IEEE_MAXMIN
1021 [UNSPEC_IEEE_MAX
1022 UNSPEC_IEEE_MIN])
1023
1024 (define_int_attr ieee_maxmin
1025 [(UNSPEC_IEEE_MAX "max")
1026 (UNSPEC_IEEE_MIN "min")])
1027
1028 ;; Mapping of logic operators
1029 (define_code_iterator any_logic [and ior xor])
1030 (define_code_iterator any_or [ior xor])
1031 (define_code_iterator fpint_logic [and xor])
1032
1033 ;; Base name for insn mnemonic.
1034 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
1035
1036 ;; Mapping of logic-shift operators
1037 (define_code_iterator any_lshift [ashift lshiftrt])
1038
1039 ;; Mapping of shift-right operators
1040 (define_code_iterator any_shiftrt [lshiftrt ashiftrt])
1041
1042 ;; Mapping of all shift operators
1043 (define_code_iterator any_shift [ashift lshiftrt ashiftrt])
1044
1045 ;; Base name for insn mnemonic.
1046 (define_code_attr shift [(ashift "sal") (lshiftrt "shr") (ashiftrt "sar")])
1047 (define_code_attr vshift [(ashift "sll") (lshiftrt "srl") (ashiftrt "sra")])
1048
1049 ;; Mapping of rotate operators
1050 (define_code_iterator any_rotate [rotate rotatert])
1051
1052 ;; Base name for insn mnemonic.
1053 (define_code_attr rotate [(rotate "rol") (rotatert "ror")])
1054
1055 ;; Mapping of abs neg operators
1056 (define_code_iterator absneg [abs neg])
1057
1058 ;; Mapping of abs neg operators to logic operation
1059 (define_code_attr absneg_op [(abs "and") (neg "xor")])
1060
1061 ;; Base name for x87 insn mnemonic.
1062 (define_code_attr absneg_mnemonic [(abs "fabs") (neg "fchs")])
1063
1064 ;; Mapping of extend operators
1065 (define_code_iterator any_extend [sign_extend zero_extend])
1066
1067 ;; Mapping of highpart multiply operators
1068 (define_code_iterator any_mul_highpart [smul_highpart umul_highpart])
1069
1070 ;; Prefix for insn menmonic.
1071 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
1072 (smul_highpart "i") (umul_highpart "")
1073 (div "i") (udiv "")])
1074 ;; Prefix for define_insn
1075 (define_code_attr s [(sign_extend "s") (zero_extend "u")
1076 (smul_highpart "s") (umul_highpart "u")])
1077 (define_code_attr u [(sign_extend "") (zero_extend "u")
1078 (div "") (udiv "u")])
1079 (define_code_attr u_bool [(sign_extend "false") (zero_extend "true")
1080 (div "false") (udiv "true")])
1081
1082 ;; Used in signed and unsigned truncations.
1083 (define_code_iterator any_truncate [ss_truncate truncate us_truncate])
1084 ;; Instruction suffix for truncations.
1085 (define_code_attr trunsuffix
1086 [(ss_truncate "s") (truncate "") (us_truncate "us")])
1087
1088 ;; Instruction suffix for SSE sign and zero extensions.
1089 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
1090
1091 ;; Used in signed and unsigned fix.
1092 (define_code_iterator any_fix [fix unsigned_fix])
1093 (define_code_attr fixsuffix [(fix "") (unsigned_fix "u")])
1094 (define_code_attr fixunssuffix [(fix "") (unsigned_fix "uns")])
1095 (define_code_attr fixprefix [(fix "s") (unsigned_fix "u")])
1096
1097 ;; Used in signed and unsigned float.
1098 (define_code_iterator any_float [float unsigned_float])
1099 (define_code_attr floatsuffix [(float "") (unsigned_float "u")])
1100 (define_code_attr floatunssuffix [(float "") (unsigned_float "uns")])
1101 (define_code_attr floatprefix [(float "s") (unsigned_float "u")])
1102
1103 ;; Base name for expression
1104 (define_code_attr insn
1105 [(plus "add") (ss_plus "ssadd") (us_plus "usadd")
1106 (minus "sub") (ss_minus "sssub") (us_minus "ussub")
1107 (sign_extend "extend") (zero_extend "zero_extend")
1108 (ashift "ashl") (lshiftrt "lshr") (ashiftrt "ashr")
1109 (rotate "rotl") (rotatert "rotr")
1110 (mult "mul") (div "div")])
1111
1112 ;; All integer modes.
1113 (define_mode_iterator SWI1248x [QI HI SI DI])
1114
1115 ;; All integer modes without QImode.
1116 (define_mode_iterator SWI248x [HI SI DI])
1117
1118 ;; All integer modes without QImode and HImode.
1119 (define_mode_iterator SWI48x [SI DI])
1120
1121 ;; All integer modes without SImode and DImode.
1122 (define_mode_iterator SWI12 [QI HI])
1123
1124 ;; All integer modes without DImode.
1125 (define_mode_iterator SWI124 [QI HI SI])
1126
1127 ;; All integer modes without QImode and DImode.
1128 (define_mode_iterator SWI24 [HI SI])
1129
1130 ;; Single word integer modes.
1131 (define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")])
1132
1133 ;; Single word integer modes without QImode.
1134 (define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")])
1135
1136 ;; Single word integer modes without QImode and HImode.
1137 (define_mode_iterator SWI48 [SI (DI "TARGET_64BIT")])
1138
1139 ;; All math-dependant single and double word integer modes.
1140 (define_mode_iterator SDWIM [(QI "TARGET_QIMODE_MATH")
1141 (HI "TARGET_HIMODE_MATH")
1142 SI DI (TI "TARGET_64BIT")])
1143
1144 ;; Math-dependant single word integer modes.
1145 (define_mode_iterator SWIM [(QI "TARGET_QIMODE_MATH")
1146 (HI "TARGET_HIMODE_MATH")
1147 SI (DI "TARGET_64BIT")])
1148
1149 ;; Math-dependant integer modes without DImode.
1150 (define_mode_iterator SWIM124 [(QI "TARGET_QIMODE_MATH")
1151 (HI "TARGET_HIMODE_MATH")
1152 SI])
1153
1154 ;; Math-dependant integer modes with DImode.
1155 (define_mode_iterator SWIM1248x
1156 [(QI "TARGET_QIMODE_MATH")
1157 (HI "TARGET_HIMODE_MATH")
1158 SI DI])
1159
1160 ;; Math-dependant single word integer modes without QImode.
1161 (define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH")
1162 SI (DI "TARGET_64BIT")])
1163
1164 ;; Double word integer modes.
1165 (define_mode_iterator DWI [(DI "!TARGET_64BIT")
1166 (TI "TARGET_64BIT")])
1167
1168 ;; SWI and DWI together.
1169 (define_mode_iterator SWIDWI [QI HI SI DI (TI "TARGET_64BIT")])
1170
1171 ;; SWI48 and DWI together.
1172 (define_mode_iterator SWI48DWI [SI DI (TI "TARGET_64BIT")])
1173
1174 ;; GET_MODE_SIZE for selected modes. As GET_MODE_SIZE is not
1175 ;; compile time constant, it is faster to use <MODE_SIZE> than
1176 ;; GET_MODE_SIZE (<MODE>mode). For XFmode which depends on
1177 ;; command line options just use GET_MODE_SIZE macro.
1178 (define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8")
1179 (TI "16") (HF "2") (BF "2") (SF "4") (DF "8")
1180 (XF "GET_MODE_SIZE (XFmode)")
1181 (V16QI "16") (V32QI "32") (V64QI "64")
1182 (V8HI "16") (V16HI "32") (V32HI "64")
1183 (V4SI "16") (V8SI "32") (V16SI "64")
1184 (V2DI "16") (V4DI "32") (V8DI "64")
1185 (V1TI "16") (V2TI "32") (V4TI "64")
1186 (V2DF "16") (V4DF "32") (V8DF "64")
1187 (V4SF "16") (V8SF "32") (V16SF "64")
1188 (V8HF "16") (V16HF "32") (V32HF "64")
1189 (V4HF "8") (V2HF "4")
1190 (V8BF "16") (V16BF "32") (V32BF "64")
1191 (V4BF "8") (V2BF "4")])
1192
1193 ;; Double word integer modes as mode attribute.
1194 (define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")])
1195 (define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti") (TI "oi")])
1196
1197 ;; Half sized integer modes.
1198 (define_mode_attr HALF [(TI "DI") (DI "SI")])
1199 (define_mode_attr half [(TI "di") (DI "si")])
1200
1201 ;; LEA mode corresponding to an integer mode
1202 (define_mode_attr LEAMODE [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
1203
1204 ;; Half mode for double word integer modes.
1205 (define_mode_iterator DWIH [(SI "!TARGET_64BIT")
1206 (DI "TARGET_64BIT")])
1207
1208 ;; Instruction suffix for integer modes.
1209 (define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
1210
1211 ;; Instruction suffix for masks.
1212 (define_mode_attr mskmodesuffix [(QI "b") (HI "w") (SI "d") (DI "q")])
1213
1214 ;; Pointer size prefix for integer modes (Intel asm dialect)
1215 (define_mode_attr iptrsize [(QI "BYTE")
1216 (HI "WORD")
1217 (SI "DWORD")
1218 (DI "QWORD")])
1219
1220 ;; Register class for integer modes.
1221 (define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")])
1222
1223 ;; Immediate operand constraint for integer modes.
1224 (define_mode_attr i [(QI "n") (HI "n") (SI "e") (DI "e")])
1225
1226 ;; General operand constraint for word modes.
1227 (define_mode_attr g [(QI "qmn") (HI "rmn") (SI "rme") (DI "rme")])
1228
1229 ;; Memory operand constraint for word modes.
1230 (define_mode_attr m [(QI "m") (HI "m") (SI "BM") (DI "BM")])
1231
1232 ;; Immediate operand constraint for double integer modes.
1233 (define_mode_attr di [(SI "nF") (DI "Wd")])
1234
1235 ;; Immediate operand constraint for shifts.
1236 (define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])
1237 (define_mode_attr KS [(QI "Wb") (HI "Ww") (SI "I") (DI "J")])
1238
1239 ;; Print register name in the specified mode.
1240 (define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")])
1241
1242 ;; General operand predicate for integer modes.
1243 (define_mode_attr general_operand
1244 [(QI "general_operand")
1245 (HI "general_operand")
1246 (SI "x86_64_general_operand")
1247 (DI "x86_64_general_operand")
1248 (TI "x86_64_general_operand")])
1249
1250 ;; General operand predicate for integer modes, where for TImode
1251 ;; we need both words of the operand to be general operands.
1252 (define_mode_attr general_hilo_operand
1253 [(QI "general_operand")
1254 (HI "general_operand")
1255 (SI "x86_64_general_operand")
1256 (DI "x86_64_general_operand")
1257 (TI "x86_64_hilo_general_operand")])
1258
1259 ;; General sign extend operand predicate for integer modes,
1260 ;; which disallows VOIDmode operands and thus it is suitable
1261 ;; for use inside sign_extend.
1262 (define_mode_attr general_sext_operand
1263 [(QI "sext_operand")
1264 (HI "sext_operand")
1265 (SI "x86_64_sext_operand")
1266 (DI "x86_64_sext_operand")])
1267
1268 ;; General sign/zero extend operand predicate for integer modes.
1269 (define_mode_attr general_szext_operand
1270 [(QI "general_operand")
1271 (HI "general_operand")
1272 (SI "x86_64_szext_general_operand")
1273 (DI "x86_64_szext_general_operand")
1274 (TI "x86_64_hilo_general_operand")])
1275
1276 (define_mode_attr nonmemory_szext_operand
1277 [(QI "nonmemory_operand")
1278 (HI "nonmemory_operand")
1279 (SI "x86_64_szext_nonmemory_operand")
1280 (DI "x86_64_szext_nonmemory_operand")])
1281
1282 ;; Immediate operand predicate for integer modes.
1283 (define_mode_attr immediate_operand
1284 [(QI "immediate_operand")
1285 (HI "immediate_operand")
1286 (SI "x86_64_immediate_operand")
1287 (DI "x86_64_immediate_operand")])
1288
1289 ;; Nonmemory operand predicate for integer modes.
1290 (define_mode_attr nonmemory_operand
1291 [(QI "nonmemory_operand")
1292 (HI "nonmemory_operand")
1293 (SI "x86_64_nonmemory_operand")
1294 (DI "x86_64_nonmemory_operand")])
1295
1296 ;; Operand predicate for shifts.
1297 (define_mode_attr shift_operand
1298 [(QI "nonimmediate_operand")
1299 (HI "nonimmediate_operand")
1300 (SI "nonimmediate_operand")
1301 (DI "shiftdi_operand")
1302 (TI "register_operand")])
1303
1304 ;; Operand predicate for shift argument.
1305 (define_mode_attr shift_immediate_operand
1306 [(QI "const_1_to_31_operand")
1307 (HI "const_1_to_31_operand")
1308 (SI "const_1_to_31_operand")
1309 (DI "const_1_to_63_operand")])
1310
1311 ;; Input operand predicate for arithmetic left shifts.
1312 (define_mode_attr ashl_input_operand
1313 [(QI "nonimmediate_operand")
1314 (HI "nonimmediate_operand")
1315 (SI "nonimmediate_operand")
1316 (DI "ashldi_input_operand")
1317 (TI "reg_or_pm1_operand")])
1318
1319 ;; SSE and x87 SFmode and DFmode floating point modes
1320 (define_mode_iterator MODEF [SF DF])
1321
1322 ;; SSE floating point modes
1323 (define_mode_iterator MODEFH [(HF "TARGET_AVX512FP16") SF DF])
1324
1325 ;; All x87 floating point modes
1326 (define_mode_iterator X87MODEF [SF DF XF])
1327
1328 ;; All x87 floating point modes plus HFmode
1329 (define_mode_iterator X87MODEFH [HF SF DF XF BF])
1330
1331 ;; All SSE floating point modes
1332 (define_mode_iterator SSEMODEF [HF SF DF TF])
1333 (define_mode_attr ssevecmodef [(HF "V8HF") (SF "V4SF") (DF "V2DF") (TF "TF")])
1334
1335 ;; SSE instruction suffix for various modes
1336 (define_mode_attr ssemodesuffix
1337 [(HF "sh") (SF "ss") (DF "sd")
1338 (V32HF "ph") (V16SF "ps") (V8DF "pd")
1339 (V16HF "ph") (V16BF "bf") (V8SF "ps") (V4DF "pd")
1340 (V8HF "ph") (V8BF "bf") (V4SF "ps") (V2DF "pd")
1341 (V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")
1342 (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")
1343 (V64QI "b") (V32HI "w") (V16SI "d") (V8DI "q")])
1344
1345 ;; SSE vector suffix for floating point modes
1346 (define_mode_attr ssevecmodesuffix [(SF "ps") (DF "pd")])
1347
1348 ;; SSE vector mode corresponding to a scalar mode
1349 (define_mode_attr ssevecmode
1350 [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (HF "V8HF") (BF "V8BF") (SF "V4SF") (DF "V2DF")])
1351 (define_mode_attr ssevecmodelower
1352 [(QI "v16qi") (HI "v8hi") (SI "v4si") (DI "v2di") (SF "v4sf") (DF "v2df")])
1353
1354 ;; AVX512F vector mode corresponding to a scalar mode
1355 (define_mode_attr avx512fvecmode
1356 [(QI "V64QI") (HI "V32HI") (SI "V16SI") (DI "V8DI") (SF "V16SF") (DF "V8DF")])
1357
1358 ;; Instruction suffix for REX 64bit operators.
1359 (define_mode_attr rex64suffix [(SI "{l}") (DI "{q}")])
1360 (define_mode_attr rex64namesuffix [(SI "") (DI "q")])
1361
1362 ;; This mode iterator allows :P to be used for patterns that operate on
1363 ;; pointer-sized quantities. Exactly one of the two alternatives will match.
1364 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
1365
1366 ;; This mode iterator allows :W to be used for patterns that operate on
1367 ;; word_mode sized quantities.
1368 (define_mode_iterator W
1369 [(SI "word_mode == SImode") (DI "word_mode == DImode")])
1370
1371 ;; This mode iterator allows :PTR to be used for patterns that operate on
1372 ;; ptr_mode sized quantities.
1373 (define_mode_iterator PTR
1374 [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")])
1375 \f
1376 ;; Scheduling descriptions
1377
1378 (include "pentium.md")
1379 (include "ppro.md")
1380 (include "k6.md")
1381 (include "athlon.md")
1382 (include "bdver1.md")
1383 (include "bdver3.md")
1384 (include "btver2.md")
1385 (include "znver.md")
1386 (include "znver4.md")
1387 (include "geode.md")
1388 (include "atom.md")
1389 (include "slm.md")
1390 (include "glm.md")
1391 (include "core2.md")
1392 (include "haswell.md")
1393 (include "lujiazui.md")
1394 (include "yongfeng.md")
1395
1396 \f
1397 ;; Operand and operator predicates and constraints
1398
1399 (include "predicates.md")
1400 (include "constraints.md")
1401
1402 \f
1403 ;; Compare and branch/compare and store instructions.
1404
1405 (define_expand "cbranch<mode>4"
1406 [(set (reg:CC FLAGS_REG)
1407 (compare:CC (match_operand:SWIM1248x 1 "nonimmediate_operand")
1408 (match_operand:SWIM1248x 2 "<general_operand>")))
1409 (set (pc) (if_then_else
1410 (match_operator 0 "ordered_comparison_operator"
1411 [(reg:CC FLAGS_REG) (const_int 0)])
1412 (label_ref (match_operand 3))
1413 (pc)))]
1414 ""
1415 {
1416 if (MEM_P (operands[1]) && MEM_P (operands[2]))
1417 operands[1] = force_reg (<MODE>mode, operands[1]);
1418 ix86_expand_branch (GET_CODE (operands[0]),
1419 operands[1], operands[2], operands[3]);
1420 DONE;
1421 })
1422
1423 (define_expand "cbranchti4"
1424 [(set (reg:CC FLAGS_REG)
1425 (compare:CC (match_operand:TI 1 "nonimmediate_operand")
1426 (match_operand:TI 2 "ix86_timode_comparison_operand")))
1427 (set (pc) (if_then_else
1428 (match_operator 0 "ix86_timode_comparison_operator"
1429 [(reg:CC FLAGS_REG) (const_int 0)])
1430 (label_ref (match_operand 3))
1431 (pc)))]
1432 "TARGET_64BIT || TARGET_SSE4_1"
1433 {
1434 ix86_expand_branch (GET_CODE (operands[0]),
1435 operands[1], operands[2], operands[3]);
1436 DONE;
1437 })
1438
1439 (define_expand "cbranchoi4"
1440 [(set (reg:CC FLAGS_REG)
1441 (compare:CC (match_operand:OI 1 "nonimmediate_operand")
1442 (match_operand:OI 2 "nonimmediate_operand")))
1443 (set (pc) (if_then_else
1444 (match_operator 0 "bt_comparison_operator"
1445 [(reg:CC FLAGS_REG) (const_int 0)])
1446 (label_ref (match_operand 3))
1447 (pc)))]
1448 "TARGET_AVX"
1449 {
1450 ix86_expand_branch (GET_CODE (operands[0]),
1451 operands[1], operands[2], operands[3]);
1452 DONE;
1453 })
1454
1455 (define_expand "cbranchxi4"
1456 [(set (reg:CC FLAGS_REG)
1457 (compare:CC (match_operand:XI 1 "nonimmediate_operand")
1458 (match_operand:XI 2 "nonimmediate_operand")))
1459 (set (pc) (if_then_else
1460 (match_operator 0 "bt_comparison_operator"
1461 [(reg:CC FLAGS_REG) (const_int 0)])
1462 (label_ref (match_operand 3))
1463 (pc)))]
1464 "TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256"
1465 {
1466 ix86_expand_branch (GET_CODE (operands[0]),
1467 operands[1], operands[2], operands[3]);
1468 DONE;
1469 })
1470
1471 (define_expand "cstore<mode>4"
1472 [(set (reg:CC FLAGS_REG)
1473 (compare:CC (match_operand:SDWIM 2 "nonimmediate_operand")
1474 (match_operand:SDWIM 3 "<general_operand>")))
1475 (set (match_operand:QI 0 "register_operand")
1476 (match_operator 1 "ordered_comparison_operator"
1477 [(reg:CC FLAGS_REG) (const_int 0)]))]
1478 ""
1479 {
1480 if (<MODE>mode == (TARGET_64BIT ? TImode : DImode))
1481 {
1482 if (GET_CODE (operands[1]) != EQ
1483 && GET_CODE (operands[1]) != NE)
1484 FAIL;
1485 }
1486 else if (MEM_P (operands[2]) && MEM_P (operands[3]))
1487 operands[2] = force_reg (<MODE>mode, operands[2]);
1488 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1489 operands[2], operands[3]);
1490 DONE;
1491 })
1492
1493 (define_expand "@cmp<mode>_1"
1494 [(set (reg:CC FLAGS_REG)
1495 (compare:CC (match_operand:SWI48 0 "nonimmediate_operand")
1496 (match_operand:SWI48 1 "<general_operand>")))])
1497
1498 (define_mode_iterator SWI1248_AVX512BWDQ_64
1499 [(QI "TARGET_AVX512DQ") HI
1500 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW && TARGET_64BIT")])
1501
1502 (define_insn "*cmp<mode>_ccz_1"
1503 [(set (reg FLAGS_REG)
1504 (compare (match_operand:SWI1248_AVX512BWDQ_64 0
1505 "nonimmediate_operand" "<r>,?m<r>,$k")
1506 (match_operand:SWI1248_AVX512BWDQ_64 1 "const0_operand")))]
1507 "TARGET_AVX512F && ix86_match_ccmode (insn, CCZmode)"
1508 "@
1509 test{<imodesuffix>}\t%0, %0
1510 cmp{<imodesuffix>}\t{%1, %0|%0, %1}
1511 kortest<mskmodesuffix>\t%0, %0"
1512 [(set_attr "type" "test,icmp,msklog")
1513 (set_attr "length_immediate" "0,1,*")
1514 (set_attr "prefix" "*,*,vex")
1515 (set_attr "mode" "<MODE>")])
1516
1517 (define_insn "*cmp<mode>_ccno_1"
1518 [(set (reg FLAGS_REG)
1519 (compare (match_operand:SWI 0 "nonimmediate_operand" "<r>,?m<r>")
1520 (match_operand:SWI 1 "const0_operand")))]
1521 "ix86_match_ccmode (insn, CCNOmode)"
1522 "@
1523 test{<imodesuffix>}\t%0, %0
1524 cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
1525 [(set_attr "type" "test,icmp")
1526 (set_attr "length_immediate" "0,1")
1527 (set_attr "mode" "<MODE>")])
1528
1529 (define_insn "*cmp<mode>_1"
1530 [(set (reg FLAGS_REG)
1531 (compare (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
1532 (match_operand:SWI 1 "<general_operand>" "<r><i>,<r><m>")))]
1533 "ix86_match_ccmode (insn, CCmode)"
1534 "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
1535 [(set_attr "type" "icmp")
1536 (set_attr "mode" "<MODE>")])
1537
1538 (define_insn "*cmp<mode>_minus_1"
1539 [(set (reg FLAGS_REG)
1540 (compare
1541 (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
1542 (match_operand:SWI 1 "<general_operand>" "<r><i>,<r><m>"))
1543 (const_int 0)))]
1544 "ix86_match_ccmode (insn, CCGOCmode)"
1545 "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
1546 [(set_attr "type" "icmp")
1547 (set_attr "mode" "<MODE>")])
1548
1549 (define_insn "*cmpqi_ext<mode>_1"
1550 [(set (reg FLAGS_REG)
1551 (compare
1552 (match_operand:QI 0 "nonimmediate_operand" "QBn")
1553 (subreg:QI
1554 (match_operator:SWI248 2 "extract_operator"
1555 [(match_operand 1 "int248_register_operand" "Q")
1556 (const_int 8)
1557 (const_int 8)]) 0)))]
1558 "ix86_match_ccmode (insn, CCmode)"
1559 "cmp{b}\t{%h1, %0|%0, %h1}"
1560 [(set_attr "addr" "gpr8")
1561 (set_attr "type" "icmp")
1562 (set_attr "mode" "QI")])
1563
1564 (define_insn "*cmpqi_ext<mode>_2"
1565 [(set (reg FLAGS_REG)
1566 (compare
1567 (subreg:QI
1568 (match_operator:SWI248 2 "extract_operator"
1569 [(match_operand 0 "int248_register_operand" "Q")
1570 (const_int 8)
1571 (const_int 8)]) 0)
1572 (match_operand:QI 1 "const0_operand")))]
1573 "ix86_match_ccmode (insn, CCNOmode)"
1574 "test{b}\t%h0, %h0"
1575 [(set_attr "type" "test")
1576 (set_attr "length_immediate" "0")
1577 (set_attr "mode" "QI")])
1578
1579 (define_expand "cmpqi_ext_3"
1580 [(set (reg:CC FLAGS_REG)
1581 (compare:CC
1582 (subreg:QI
1583 (zero_extract:HI
1584 (match_operand:HI 0 "register_operand")
1585 (const_int 8)
1586 (const_int 8)) 0)
1587 (match_operand:QI 1 "const_int_operand")))])
1588
1589 (define_insn "*cmpqi_ext<mode>_3"
1590 [(set (reg FLAGS_REG)
1591 (compare
1592 (subreg:QI
1593 (match_operator:SWI248 2 "extract_operator"
1594 [(match_operand 0 "int248_register_operand" "Q")
1595 (const_int 8)
1596 (const_int 8)]) 0)
1597 (match_operand:QI 1 "general_operand" "QnBn")))]
1598 "ix86_match_ccmode (insn, CCmode)"
1599 "cmp{b}\t{%1, %h0|%h0, %1}"
1600 [(set_attr "addr" "gpr8")
1601 (set_attr "type" "icmp")
1602 (set_attr "mode" "QI")])
1603
1604 (define_insn "*cmpqi_ext<mode>_4"
1605 [(set (reg FLAGS_REG)
1606 (compare
1607 (subreg:QI
1608 (match_operator:SWI248 2 "extract_operator"
1609 [(match_operand 0 "int248_register_operand" "Q")
1610 (const_int 8)
1611 (const_int 8)]) 0)
1612 (subreg:QI
1613 (match_operator:SWI248 3 "extract_operator"
1614 [(match_operand 1 "int248_register_operand" "Q")
1615 (const_int 8)
1616 (const_int 8)]) 0)))]
1617 "ix86_match_ccmode (insn, CCmode)"
1618 "cmp{b}\t{%h1, %h0|%h0, %h1}"
1619 [(set_attr "type" "icmp")
1620 (set_attr "mode" "QI")])
1621
1622 (define_insn_and_split "*cmp<dwi>_doubleword"
1623 [(set (reg:CCZ FLAGS_REG)
1624 (compare:CCZ (match_operand:<DWI> 0 "nonimmediate_operand")
1625 (match_operand:<DWI> 1 "general_operand")))]
1626 "ix86_pre_reload_split ()"
1627 "#"
1628 "&& 1"
1629 [(parallel [(set (reg:CCZ FLAGS_REG)
1630 (compare:CCZ (ior:DWIH (match_dup 4) (match_dup 5))
1631 (const_int 0)))
1632 (set (match_dup 4) (ior:DWIH (match_dup 4) (match_dup 5)))])]
1633 {
1634 split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);
1635 /* Placing the SUBREG pieces in pseudos helps reload. */
1636 for (int i = 0; i < 4; i++)
1637 if (SUBREG_P (operands[i]))
1638 operands[i] = force_reg (<MODE>mode, operands[i]);
1639
1640 operands[4] = gen_reg_rtx (<MODE>mode);
1641
1642 /* Special case comparisons against -1. */
1643 if (operands[1] == constm1_rtx && operands[3] == constm1_rtx)
1644 {
1645 emit_insn (gen_and<mode>3 (operands[4], operands[0], operands[2]));
1646 emit_insn (gen_cmp_1 (<MODE>mode, operands[4], constm1_rtx));
1647 DONE;
1648 }
1649
1650 if (operands[1] == const0_rtx)
1651 emit_move_insn (operands[4], operands[0]);
1652 else if (operands[0] == const0_rtx)
1653 emit_move_insn (operands[4], operands[1]);
1654 else if (operands[1] == constm1_rtx)
1655 emit_insn (gen_one_cmpl<mode>2 (operands[4], operands[0]));
1656 else if (operands[0] == constm1_rtx)
1657 emit_insn (gen_one_cmpl<mode>2 (operands[4], operands[1]));
1658 else
1659 {
1660 if (CONST_SCALAR_INT_P (operands[1])
1661 && !x86_64_immediate_operand (operands[1], <MODE>mode))
1662 operands[1] = force_reg (<MODE>mode, operands[1]);
1663 emit_insn (gen_xor<mode>3 (operands[4], operands[0], operands[1]));
1664 }
1665
1666 if (operands[3] == const0_rtx)
1667 operands[5] = operands[2];
1668 else if (operands[2] == const0_rtx)
1669 operands[5] = operands[3];
1670 else
1671 {
1672 operands[5] = gen_reg_rtx (<MODE>mode);
1673 if (operands[3] == constm1_rtx)
1674 emit_insn (gen_one_cmpl<mode>2 (operands[5], operands[2]));
1675 else if (operands[2] == constm1_rtx)
1676 emit_insn (gen_one_cmpl<mode>2 (operands[5], operands[3]));
1677 else
1678 {
1679 if (CONST_SCALAR_INT_P (operands[3])
1680 && !x86_64_immediate_operand (operands[3], <MODE>mode))
1681 operands[3] = force_reg (<MODE>mode, operands[3]);
1682 emit_insn (gen_xor<mode>3 (operands[5], operands[2], operands[3]));
1683 }
1684 }
1685 })
1686
1687 ;; These implement float point compares.
1688 ;; %%% See if we can get away with VOIDmode operands on the actual insns,
1689 ;; which would allow mix and match FP modes on the compares. Which is what
1690 ;; the old patterns did, but with many more of them.
1691
1692 (define_expand "cbranchxf4"
1693 [(set (reg:CC FLAGS_REG)
1694 (compare:CC (match_operand:XF 1 "nonmemory_operand")
1695 (match_operand:XF 2 "nonmemory_operand")))
1696 (set (pc) (if_then_else
1697 (match_operator 0 "ix86_fp_comparison_operator"
1698 [(reg:CC FLAGS_REG)
1699 (const_int 0)])
1700 (label_ref (match_operand 3))
1701 (pc)))]
1702 "TARGET_80387"
1703 {
1704 ix86_expand_branch (GET_CODE (operands[0]),
1705 operands[1], operands[2], operands[3]);
1706 DONE;
1707 })
1708
1709 (define_expand "cstorexf4"
1710 [(set (reg:CC FLAGS_REG)
1711 (compare:CC (match_operand:XF 2 "nonmemory_operand")
1712 (match_operand:XF 3 "nonmemory_operand")))
1713 (set (match_operand:QI 0 "register_operand")
1714 (match_operator 1 "ix86_fp_comparison_operator"
1715 [(reg:CC FLAGS_REG)
1716 (const_int 0)]))]
1717 "TARGET_80387"
1718 {
1719 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1720 operands[2], operands[3]);
1721 DONE;
1722 })
1723
1724 (define_expand "cbranchhf4"
1725 [(set (reg:CC FLAGS_REG)
1726 (compare:CC (match_operand:HF 1 "cmp_fp_expander_operand")
1727 (match_operand:HF 2 "cmp_fp_expander_operand")))
1728 (set (pc) (if_then_else
1729 (match_operator 0 "ix86_fp_comparison_operator"
1730 [(reg:CC FLAGS_REG)
1731 (const_int 0)])
1732 (label_ref (match_operand 3))
1733 (pc)))]
1734 "TARGET_AVX512FP16"
1735 {
1736 ix86_expand_branch (GET_CODE (operands[0]),
1737 operands[1], operands[2], operands[3]);
1738 DONE;
1739 })
1740
1741 (define_expand "cbranch<mode>4"
1742 [(set (reg:CC FLAGS_REG)
1743 (compare:CC (match_operand:MODEF 1 "cmp_fp_expander_operand")
1744 (match_operand:MODEF 2 "cmp_fp_expander_operand")))
1745 (set (pc) (if_then_else
1746 (match_operator 0 "ix86_fp_comparison_operator"
1747 [(reg:CC FLAGS_REG)
1748 (const_int 0)])
1749 (label_ref (match_operand 3))
1750 (pc)))]
1751 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
1752 {
1753 ix86_expand_branch (GET_CODE (operands[0]),
1754 operands[1], operands[2], operands[3]);
1755 DONE;
1756 })
1757
1758 (define_expand "cbranchbf4"
1759 [(set (reg:CC FLAGS_REG)
1760 (compare:CC (match_operand:BF 1 "cmp_fp_expander_operand")
1761 (match_operand:BF 2 "cmp_fp_expander_operand")))
1762 (set (pc) (if_then_else
1763 (match_operator 0 "comparison_operator"
1764 [(reg:CC FLAGS_REG)
1765 (const_int 0)])
1766 (label_ref (match_operand 3))
1767 (pc)))]
1768 "TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
1769 {
1770 rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[1]);
1771 rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
1772 do_compare_rtx_and_jump (op1, op2, GET_CODE (operands[0]), 0,
1773 SFmode, NULL_RTX, NULL,
1774 as_a <rtx_code_label *> (operands[3]),
1775 /* Unfortunately this isn't propagated. */
1776 profile_probability::even ());
1777 DONE;
1778 })
1779
1780 (define_expand "cstorehf4"
1781 [(set (reg:CC FLAGS_REG)
1782 (compare:CC (match_operand:HF 2 "cmp_fp_expander_operand")
1783 (match_operand:HF 3 "cmp_fp_expander_operand")))
1784 (set (match_operand:QI 0 "register_operand")
1785 (match_operator 1 "ix86_fp_comparison_operator"
1786 [(reg:CC FLAGS_REG)
1787 (const_int 0)]))]
1788 "TARGET_AVX512FP16"
1789 {
1790 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1791 operands[2], operands[3]);
1792 DONE;
1793 })
1794
1795 (define_expand "cstorebf4"
1796 [(set (reg:CC FLAGS_REG)
1797 (compare:CC (match_operand:BF 2 "cmp_fp_expander_operand")
1798 (match_operand:BF 3 "cmp_fp_expander_operand")))
1799 (set (match_operand:QI 0 "register_operand")
1800 (match_operator 1 "comparison_operator"
1801 [(reg:CC FLAGS_REG)
1802 (const_int 0)]))]
1803 "TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
1804 {
1805 rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
1806 rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]);
1807 rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]),
1808 op1, op2, SFmode, 0, 1);
1809 if (!rtx_equal_p (res, operands[0]))
1810 emit_move_insn (operands[0], res);
1811 DONE;
1812 })
1813
1814 (define_expand "cstore<mode>4"
1815 [(set (reg:CC FLAGS_REG)
1816 (compare:CC (match_operand:MODEF 2 "cmp_fp_expander_operand")
1817 (match_operand:MODEF 3 "cmp_fp_expander_operand")))
1818 (set (match_operand:QI 0 "register_operand")
1819 (match_operator 1 "ix86_fp_comparison_operator"
1820 [(reg:CC FLAGS_REG)
1821 (const_int 0)]))]
1822 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
1823 {
1824 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1825 operands[2], operands[3]);
1826 DONE;
1827 })
1828
1829 (define_expand "cbranchcc4"
1830 [(set (pc) (if_then_else
1831 (match_operator 0 "comparison_operator"
1832 [(match_operand 1 "flags_reg_operand")
1833 (match_operand 2 "const0_operand")])
1834 (label_ref (match_operand 3))
1835 (pc)))]
1836 ""
1837 {
1838 ix86_expand_branch (GET_CODE (operands[0]),
1839 operands[1], operands[2], operands[3]);
1840 DONE;
1841 })
1842
1843 (define_expand "cstorecc4"
1844 [(set (match_operand:QI 0 "register_operand")
1845 (match_operator 1 "comparison_operator"
1846 [(match_operand 2 "flags_reg_operand")
1847 (match_operand 3 "const0_operand")]))]
1848 ""
1849 {
1850 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1851 operands[2], operands[3]);
1852 DONE;
1853 })
1854
1855 ;; FP compares, step 1:
1856 ;; Set the FP condition codes and move fpsr to ax.
1857
1858 ;; We may not use "#" to split and emit these
1859 ;; due to reg-stack pops killing fpsr.
1860
1861 (define_insn "*cmpxf_i387"
1862 [(set (match_operand:HI 0 "register_operand" "=a")
1863 (unspec:HI
1864 [(compare:CCFP
1865 (match_operand:XF 1 "register_operand" "f")
1866 (match_operand:XF 2 "reg_or_0_operand" "fC"))]
1867 UNSPEC_FNSTSW))]
1868 "TARGET_80387"
1869 "* return output_fp_compare (insn, operands, false, false);"
1870 [(set_attr "type" "multi")
1871 (set_attr "unit" "i387")
1872 (set_attr "mode" "XF")])
1873
1874 (define_insn "*cmp<mode>_i387"
1875 [(set (match_operand:HI 0 "register_operand" "=a")
1876 (unspec:HI
1877 [(compare:CCFP
1878 (match_operand:MODEF 1 "register_operand" "f")
1879 (match_operand:MODEF 2 "nonimm_or_0_operand" "fmC"))]
1880 UNSPEC_FNSTSW))]
1881 "TARGET_80387"
1882 "* return output_fp_compare (insn, operands, false, false);"
1883 [(set_attr "type" "multi")
1884 (set_attr "unit" "i387")
1885 (set_attr "mode" "<MODE>")])
1886
1887 (define_insn "*cmp<X87MODEF:mode>_<SWI24:mode>_i387"
1888 [(set (match_operand:HI 0 "register_operand" "=a")
1889 (unspec:HI
1890 [(compare:CCFP
1891 (match_operand:X87MODEF 1 "register_operand" "f")
1892 (float:X87MODEF
1893 (match_operand:SWI24 2 "nonimmediate_operand" "m")))]
1894 UNSPEC_FNSTSW))]
1895 "TARGET_80387
1896 && (TARGET_USE_<SWI24:MODE>MODE_FIOP
1897 || optimize_function_for_size_p (cfun))"
1898 "* return output_fp_compare (insn, operands, false, false);"
1899 [(set_attr "type" "multi")
1900 (set_attr "unit" "i387")
1901 (set_attr "fp_int_src" "true")
1902 (set_attr "mode" "<SWI24:MODE>")])
1903
1904 (define_insn "*cmpu<mode>_i387"
1905 [(set (match_operand:HI 0 "register_operand" "=a")
1906 (unspec:HI
1907 [(unspec:CCFP
1908 [(compare:CCFP
1909 (match_operand:X87MODEF 1 "register_operand" "f")
1910 (match_operand:X87MODEF 2 "register_operand" "f"))]
1911 UNSPEC_NOTRAP)]
1912 UNSPEC_FNSTSW))]
1913 "TARGET_80387"
1914 "* return output_fp_compare (insn, operands, false, true);"
1915 [(set_attr "type" "multi")
1916 (set_attr "unit" "i387")
1917 (set_attr "mode" "<MODE>")])
1918
1919 ;; FP compares, step 2:
1920 ;; Get ax into flags, general case.
1921
1922 (define_insn "x86_sahf_1"
1923 [(set (reg:CC FLAGS_REG)
1924 (unspec:CC [(match_operand:HI 0 "register_operand" "a")]
1925 UNSPEC_SAHF))]
1926 "TARGET_SAHF"
1927 {
1928 #ifndef HAVE_AS_IX86_SAHF
1929 if (TARGET_64BIT)
1930 return ASM_BYTE "0x9e";
1931 else
1932 #endif
1933 return "sahf";
1934 }
1935 [(set_attr "length" "1")
1936 (set_attr "athlon_decode" "vector")
1937 (set_attr "amdfam10_decode" "direct")
1938 (set_attr "bdver1_decode" "direct")
1939 (set_attr "mode" "SI")])
1940
1941 ;; Pentium Pro can do both steps in one go.
1942 ;; (these instructions set flags directly)
1943
1944 (define_subst_attr "unord" "unord_subst" "" "u")
1945 (define_subst_attr "unordered" "unord_subst" "false" "true")
1946
1947 (define_subst "unord_subst"
1948 [(set (match_operand:CCFP 0)
1949 (match_operand:CCFP 1))]
1950 ""
1951 [(set (match_dup 0)
1952 (unspec:CCFP
1953 [(match_dup 1)]
1954 UNSPEC_NOTRAP))])
1955
1956 (define_insn "*cmpi<unord>xf_i387"
1957 [(set (reg:CCFP FLAGS_REG)
1958 (compare:CCFP
1959 (match_operand:XF 0 "register_operand" "f")
1960 (match_operand:XF 1 "register_operand" "f")))]
1961 "TARGET_80387 && TARGET_CMOVE"
1962 "* return output_fp_compare (insn, operands, true, <unordered>);"
1963 [(set_attr "type" "fcmp")
1964 (set_attr "mode" "XF")
1965 (set_attr "athlon_decode" "vector")
1966 (set_attr "amdfam10_decode" "direct")
1967 (set_attr "bdver1_decode" "double")
1968 (set_attr "znver1_decode" "double")])
1969
1970 (define_insn "*cmpi<unord><MODEF:mode>"
1971 [(set (reg:CCFP FLAGS_REG)
1972 (compare:CCFP
1973 (match_operand:MODEF 0 "register_operand" "f,v")
1974 (match_operand:MODEF 1 "register_ssemem_operand" "f,vm")))]
1975 "(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
1976 || (TARGET_80387 && TARGET_CMOVE)"
1977 "@
1978 * return output_fp_compare (insn, operands, true, <unordered>);
1979 %v<unord>comi<MODEF:ssemodesuffix>\t{%1, %0|%0, %1}"
1980 [(set_attr "type" "fcmp,ssecomi")
1981 (set_attr "prefix" "orig,maybe_vex")
1982 (set_attr "mode" "<MODEF:MODE>")
1983 (set_attr "prefix_rep" "*,0")
1984 (set (attr "prefix_data16")
1985 (cond [(eq_attr "alternative" "0")
1986 (const_string "*")
1987 (eq_attr "mode" "DF")
1988 (const_string "1")
1989 ]
1990 (const_string "0")))
1991 (set_attr "athlon_decode" "vector")
1992 (set_attr "amdfam10_decode" "direct")
1993 (set_attr "bdver1_decode" "double")
1994 (set_attr "znver1_decode" "double")
1995 (set (attr "enabled")
1996 (if_then_else
1997 (match_test ("SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"))
1998 (if_then_else
1999 (eq_attr "alternative" "0")
2000 (symbol_ref "TARGET_MIX_SSE_I387")
2001 (symbol_ref "true"))
2002 (if_then_else
2003 (eq_attr "alternative" "0")
2004 (symbol_ref "true")
2005 (symbol_ref "false"))))])
2006
2007 (define_insn "*cmpi<unord>hf"
2008 [(set (reg:CCFP FLAGS_REG)
2009 (compare:CCFP
2010 (match_operand:HF 0 "register_operand" "v")
2011 (match_operand:HF 1 "nonimmediate_operand" "vm")))]
2012 "TARGET_AVX512FP16"
2013 "v<unord>comish\t{%1, %0|%0, %1}"
2014 [(set_attr "type" "ssecomi")
2015 (set_attr "prefix" "evex")
2016 (set_attr "mode" "HF")])
2017
2018 ;; Set carry flag.
2019 (define_insn "x86_stc"
2020 [(set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_STC))]
2021 ""
2022 "stc"
2023 [(set_attr "length" "1")
2024 (set_attr "length_immediate" "0")
2025 (set_attr "modrm" "0")])
2026
2027 ;; On Pentium 4, set the carry flag using mov $1,%al;addb $-1,%al.
2028 (define_peephole2
2029 [(match_scratch:QI 0 "r")
2030 (set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_STC))]
2031 "TARGET_SLOW_STC && !optimize_insn_for_size_p ()"
2032 [(set (match_dup 0) (const_int 1))
2033 (parallel
2034 [(set (reg:CCC FLAGS_REG)
2035 (compare:CCC (plus:QI (match_dup 0) (const_int -1))
2036 (match_dup 0)))
2037 (set (match_dup 0) (plus:QI (match_dup 0) (const_int -1)))])])
2038
2039 ;; Complement carry flag.
2040 (define_insn "*x86_cmc"
2041 [(set (reg:CCC FLAGS_REG)
2042 (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
2043 (geu:QI (reg:CCC FLAGS_REG) (const_int 0))))]
2044 ""
2045 "cmc"
2046 [(set_attr "length" "1")
2047 (set_attr "length_immediate" "0")
2048 (set_attr "use_carry" "1")
2049 (set_attr "modrm" "0")])
2050
2051 ;; On Pentium 4, cmc is replaced with setnc %al;addb $-1,%al.
2052 (define_peephole2
2053 [(match_scratch:QI 0 "r")
2054 (set (reg:CCC FLAGS_REG)
2055 (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
2056 (geu:QI (reg:CCC FLAGS_REG) (const_int 0))))]
2057 "TARGET_SLOW_STC && !optimize_insn_for_size_p ()"
2058 [(set (match_dup 0) (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))
2059 (parallel
2060 [(set (reg:CCC FLAGS_REG)
2061 (compare:CCC (plus:QI (match_dup 0) (const_int -1))
2062 (match_dup 0)))
2063 (set (match_dup 0) (plus:QI (match_dup 0) (const_int -1)))])])
2064 \f
2065 ;; Push/pop instructions.
2066
2067 (define_insn_and_split "*pushv1ti2"
2068 [(set (match_operand:V1TI 0 "push_operand" "=<")
2069 (match_operand:V1TI 1 "register_operand" "v"))]
2070 "TARGET_64BIT && TARGET_STV"
2071 "#"
2072 "&& reload_completed"
2073 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
2074 (set (match_dup 0) (match_dup 1))]
2075 {
2076 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (V1TImode)));
2077 /* Preserve memory attributes. */
2078 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
2079 }
2080 [(set_attr "type" "multi")
2081 (set_attr "mode" "TI")])
2082
2083 (define_insn "*push<mode>2"
2084 [(set (match_operand:DWI 0 "push_operand" "=<,<")
2085 (match_operand:DWI 1 "general_no_elim_operand" "riF*o,*v"))]
2086 ""
2087 "#"
2088 [(set_attr "type" "multi")
2089 (set_attr "mode" "<MODE>")])
2090
2091 (define_split
2092 [(set (match_operand:DWI 0 "push_operand")
2093 (match_operand:DWI 1 "general_gr_operand"))]
2094 "reload_completed"
2095 [(const_int 0)]
2096 "ix86_split_long_move (operands); DONE;")
2097
2098 (define_insn "*pushdi2_rex64"
2099 [(set (match_operand:DI 0 "push_operand" "=<,<,!<")
2100 (match_operand:DI 1 "general_no_elim_operand" "re*m,*v,n"))]
2101 "TARGET_64BIT"
2102 "@
2103 push{q}\t%1
2104 #
2105 #"
2106 [(set_attr "type" "push,multi,multi")
2107 (set_attr "mode" "DI")])
2108
2109 ;; Convert impossible pushes of immediate to existing instructions.
2110 ;; First try to get scratch register and go through it. In case this
2111 ;; fails, push sign extended lower part first and then overwrite
2112 ;; upper part by 32bit move.
2113
2114 (define_peephole2
2115 [(match_scratch:DI 2 "r")
2116 (set (match_operand:DI 0 "push_operand")
2117 (match_operand:DI 1 "immediate_operand"))]
2118 "TARGET_64BIT
2119 && !symbolic_operand (operands[1], DImode)
2120 && !x86_64_immediate_operand (operands[1], DImode)"
2121 [(set (match_dup 2) (match_dup 1))
2122 (set (match_dup 0) (match_dup 2))])
2123
2124 (define_split
2125 [(set (match_operand:DI 0 "push_operand")
2126 (match_operand:DI 1 "immediate_operand"))]
2127 "TARGET_64BIT && epilogue_completed
2128 && !symbolic_operand (operands[1], DImode)
2129 && !x86_64_immediate_operand (operands[1], DImode)"
2130 [(set (match_dup 0) (match_dup 1))
2131 (set (match_dup 2) (match_dup 3))]
2132 {
2133 split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]);
2134
2135 operands[1] = gen_lowpart (DImode, operands[2]);
2136 operands[2] = gen_rtx_MEM (SImode,
2137 plus_constant (Pmode, stack_pointer_rtx, 4));
2138 })
2139
2140 ;; For TARGET_64BIT we always round up to 8 bytes.
2141 (define_insn "*pushsi2_rex64"
2142 [(set (match_operand:SI 0 "push_operand" "=X,X")
2143 (match_operand:SI 1 "nonmemory_no_elim_operand" "re,*v"))]
2144 "TARGET_64BIT"
2145 "@
2146 push{q}\t%q1
2147 #"
2148 [(set_attr "type" "push,multi")
2149 (set_attr "mode" "DI")])
2150
2151 (define_insn "*pushsi2"
2152 [(set (match_operand:SI 0 "push_operand" "=<,<")
2153 (match_operand:SI 1 "general_no_elim_operand" "ri*m,*v"))]
2154 "!TARGET_64BIT"
2155 "@
2156 push{l}\t%1
2157 #"
2158 [(set_attr "type" "push,multi")
2159 (set_attr "mode" "SI")])
2160
2161 (define_split
2162 [(set (match_operand:SWI48DWI 0 "push_operand")
2163 (match_operand:SWI48DWI 1 "sse_reg_operand"))]
2164 "TARGET_SSE && reload_completed"
2165 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
2166 (set (match_dup 0) (match_dup 1))]
2167 {
2168 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (<SWI48DWI:MODE>mode)));
2169 /* Preserve memory attributes. */
2170 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
2171 })
2172
2173 ;; emit_push_insn when it calls move_by_pieces requires an insn to
2174 ;; "push a byte/word". But actually we use push{l,q}, which has
2175 ;; the effect of rounding the amount pushed up to a word.
2176
2177 (define_insn "*push<mode>2"
2178 [(set (match_operand:SWI12 0 "push_operand" "=X")
2179 (match_operand:SWI12 1 "nonmemory_no_elim_operand" "rn"))]
2180 ""
2181 "* return TARGET_64BIT ? \"push{q}\t%q1\" : \"push{l}\t%k1\";"
2182 [(set_attr "type" "push")
2183 (set (attr "mode")
2184 (if_then_else (match_test "TARGET_64BIT")
2185 (const_string "DI")
2186 (const_string "SI")))])
2187
2188 (define_insn "*push<mode>2_prologue"
2189 [(set (match_operand:W 0 "push_operand" "=<")
2190 (match_operand:W 1 "general_no_elim_operand" "r<i>*m"))
2191 (clobber (mem:BLK (scratch)))]
2192 ""
2193 "push{<imodesuffix>}\t%1"
2194 [(set_attr "type" "push")
2195 (set_attr "mode" "<MODE>")])
2196
2197 (define_insn "*pop<mode>1"
2198 [(set (match_operand:W 0 "nonimmediate_operand" "=r*m")
2199 (match_operand:W 1 "pop_operand" ">"))]
2200 ""
2201 "pop{<imodesuffix>}\t%0"
2202 [(set_attr "type" "pop")
2203 (set_attr "mode" "<MODE>")])
2204
2205 (define_insn "*pop<mode>1_epilogue"
2206 [(set (match_operand:W 0 "nonimmediate_operand" "=r*m")
2207 (match_operand:W 1 "pop_operand" ">"))
2208 (clobber (mem:BLK (scratch)))]
2209 ""
2210 "pop{<imodesuffix>}\t%0"
2211 [(set_attr "type" "pop")
2212 (set_attr "mode" "<MODE>")])
2213
2214 (define_insn "@pushfl<mode>2"
2215 [(set (match_operand:W 0 "push_operand" "=<")
2216 (unspec:W [(match_operand:CC 1 "flags_reg_operand")]
2217 UNSPEC_PUSHFL))]
2218 ""
2219 "pushf{<imodesuffix>}"
2220 [(set_attr "type" "push")
2221 (set_attr "mode" "<MODE>")])
2222
2223 (define_insn "@popfl<mode>1"
2224 [(set (match_operand:CC 0 "flags_reg_operand")
2225 (unspec:CC [(match_operand:W 1 "pop_operand" ">")]
2226 UNSPEC_POPFL))]
2227 ""
2228 "popf{<imodesuffix>}"
2229 [(set_attr "type" "pop")
2230 (set_attr "mode" "<MODE>")])
2231
2232 \f
2233 ;; Reload patterns to support multi-word load/store
2234 ;; with non-offsetable address.
2235 (define_expand "reload_noff_store"
2236 [(parallel [(match_operand 0 "memory_operand" "=m")
2237 (match_operand 1 "register_operand" "r")
2238 (match_operand:DI 2 "register_operand" "=&r")])]
2239 "TARGET_64BIT"
2240 {
2241 rtx mem = operands[0];
2242 rtx addr = XEXP (mem, 0);
2243
2244 emit_move_insn (operands[2], addr);
2245 mem = replace_equiv_address_nv (mem, operands[2]);
2246
2247 emit_insn (gen_rtx_SET (mem, operands[1]));
2248 DONE;
2249 })
2250
2251 (define_expand "reload_noff_load"
2252 [(parallel [(match_operand 0 "register_operand" "=r")
2253 (match_operand 1 "memory_operand" "m")
2254 (match_operand:DI 2 "register_operand" "=r")])]
2255 "TARGET_64BIT"
2256 {
2257 rtx mem = operands[1];
2258 rtx addr = XEXP (mem, 0);
2259
2260 emit_move_insn (operands[2], addr);
2261 mem = replace_equiv_address_nv (mem, operands[2]);
2262
2263 emit_insn (gen_rtx_SET (operands[0], mem));
2264 DONE;
2265 })
2266
2267 ;; Move instructions.
2268
2269 (define_expand "movxi"
2270 [(set (match_operand:XI 0 "nonimmediate_operand")
2271 (match_operand:XI 1 "general_operand"))]
2272 "TARGET_AVX512F && TARGET_EVEX512"
2273 "ix86_expand_vector_move (XImode, operands); DONE;")
2274
2275 (define_expand "movoi"
2276 [(set (match_operand:OI 0 "nonimmediate_operand")
2277 (match_operand:OI 1 "general_operand"))]
2278 "TARGET_AVX"
2279 "ix86_expand_vector_move (OImode, operands); DONE;")
2280
2281 (define_expand "movti"
2282 [(set (match_operand:TI 0 "nonimmediate_operand")
2283 (match_operand:TI 1 "general_operand"))]
2284 "TARGET_64BIT || TARGET_SSE"
2285 {
2286 if (TARGET_64BIT)
2287 ix86_expand_move (TImode, operands);
2288 else
2289 ix86_expand_vector_move (TImode, operands);
2290 DONE;
2291 })
2292
2293 ;; This expands to what emit_move_complex would generate if we didn't
2294 ;; have a movti pattern. Having this avoids problems with reload on
2295 ;; 32-bit targets when SSE is present, but doesn't seem to be harmful
2296 ;; to have around all the time.
2297 (define_expand "movcdi"
2298 [(set (match_operand:CDI 0 "nonimmediate_operand")
2299 (match_operand:CDI 1 "general_operand"))]
2300 ""
2301 {
2302 if (push_operand (operands[0], CDImode))
2303 emit_move_complex_push (CDImode, operands[0], operands[1]);
2304 else
2305 emit_move_complex_parts (operands[0], operands[1]);
2306 DONE;
2307 })
2308
2309 (define_expand "mov<mode>"
2310 [(set (match_operand:SWI1248x 0 "nonimmediate_operand")
2311 (match_operand:SWI1248x 1 "general_operand"))]
2312 ""
2313 "ix86_expand_move (<MODE>mode, operands); DONE;")
2314
2315 (define_insn "*mov<mode>_xor"
2316 [(set (match_operand:SWI48 0 "register_operand" "=r")
2317 (match_operand:SWI48 1 "const0_operand"))
2318 (clobber (reg:CC FLAGS_REG))]
2319 "reload_completed"
2320 "xor{l}\t%k0, %k0"
2321 [(set_attr "type" "alu1")
2322 (set_attr "mode" "SI")
2323 (set_attr "length_immediate" "0")])
2324
2325 (define_insn "*mov<mode>_and"
2326 [(set (match_operand:SWI248 0 "memory_operand" "=m")
2327 (match_operand:SWI248 1 "const0_operand"))
2328 (clobber (reg:CC FLAGS_REG))]
2329 "reload_completed"
2330 "and{<imodesuffix>}\t{%1, %0|%0, %1}"
2331 [(set_attr "type" "alu1")
2332 (set_attr "mode" "<MODE>")
2333 (set_attr "length_immediate" "1")])
2334
2335 (define_insn "*mov<mode>_or"
2336 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
2337 (match_operand:SWI248 1 "constm1_operand"))
2338 (clobber (reg:CC FLAGS_REG))]
2339 "reload_completed"
2340 "or{<imodesuffix>}\t{%1, %0|%0, %1}"
2341 [(set_attr "type" "alu1")
2342 (set_attr "mode" "<MODE>")
2343 (set_attr "length_immediate" "1")])
2344
2345 (define_insn "*movxi_internal_avx512f"
2346 [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,v ,m")
2347 (match_operand:XI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
2348 "TARGET_AVX512F && TARGET_EVEX512
2349 && (register_operand (operands[0], XImode)
2350 || register_operand (operands[1], XImode))"
2351 {
2352 switch (get_attr_type (insn))
2353 {
2354 case TYPE_SSELOG1:
2355 return standard_sse_constant_opcode (insn, operands);
2356
2357 case TYPE_SSEMOV:
2358 return ix86_output_ssemov (insn, operands);
2359
2360 default:
2361 gcc_unreachable ();
2362 }
2363 }
2364 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
2365 (set_attr "prefix" "evex")
2366 (set_attr "mode" "XI")])
2367
2368 (define_insn "*movoi_internal_avx"
2369 [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,v ,m")
2370 (match_operand:OI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
2371 "TARGET_AVX
2372 && (register_operand (operands[0], OImode)
2373 || register_operand (operands[1], OImode))"
2374 {
2375 switch (get_attr_type (insn))
2376 {
2377 case TYPE_SSELOG1:
2378 return standard_sse_constant_opcode (insn, operands);
2379
2380 case TYPE_SSEMOV:
2381 return ix86_output_ssemov (insn, operands);
2382
2383 default:
2384 gcc_unreachable ();
2385 }
2386 }
2387 [(set_attr "isa" "*,avx2,*,*")
2388 (set_attr "type" "sselog1,sselog1,ssemov,ssemov")
2389 (set_attr "prefix" "vex")
2390 (set_attr "mode" "OI")])
2391
2392 (define_insn "*movti_internal"
2393 [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?jc,?Yd")
2394 (match_operand:TI 1 "general_operand" "riFo,re,C,BC,vm,v,Yd,jc"))]
2395 "(TARGET_64BIT
2396 && !(MEM_P (operands[0]) && MEM_P (operands[1])))
2397 || (TARGET_SSE
2398 && nonimmediate_or_sse_const_operand (operands[1], TImode)
2399 && (register_operand (operands[0], TImode)
2400 || register_operand (operands[1], TImode)))"
2401 {
2402 switch (get_attr_type (insn))
2403 {
2404 case TYPE_MULTI:
2405 return "#";
2406
2407 case TYPE_SSELOG1:
2408 return standard_sse_constant_opcode (insn, operands);
2409
2410 case TYPE_SSEMOV:
2411 return ix86_output_ssemov (insn, operands);
2412
2413 default:
2414 gcc_unreachable ();
2415 }
2416 }
2417 [(set (attr "isa")
2418 (cond [(eq_attr "alternative" "0,1,6,7")
2419 (const_string "x64")
2420 (eq_attr "alternative" "3")
2421 (const_string "sse2")
2422 ]
2423 (const_string "*")))
2424 (set (attr "type")
2425 (cond [(eq_attr "alternative" "0,1,6,7")
2426 (const_string "multi")
2427 (eq_attr "alternative" "2,3")
2428 (const_string "sselog1")
2429 ]
2430 (const_string "ssemov")))
2431 (set (attr "prefix")
2432 (if_then_else (eq_attr "type" "sselog1,ssemov")
2433 (const_string "maybe_vex")
2434 (const_string "orig")))
2435 (set (attr "mode")
2436 (cond [(eq_attr "alternative" "0,1")
2437 (const_string "DI")
2438 (match_test "TARGET_AVX")
2439 (const_string "TI")
2440 (ior (not (match_test "TARGET_SSE2"))
2441 (match_test "optimize_function_for_size_p (cfun)"))
2442 (const_string "V4SF")
2443 (and (eq_attr "alternative" "5")
2444 (match_test "TARGET_SSE_TYPELESS_STORES"))
2445 (const_string "V4SF")
2446 ]
2447 (const_string "TI")))
2448 (set (attr "preferred_for_speed")
2449 (cond [(eq_attr "alternative" "6")
2450 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2451 (eq_attr "alternative" "7")
2452 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2453 ]
2454 (symbol_ref "true")))])
2455
2456 (define_split
2457 [(set (match_operand:TI 0 "sse_reg_operand")
2458 (match_operand:TI 1 "general_reg_operand"))]
2459 "TARGET_64BIT && TARGET_SSE4_1
2460 && reload_completed"
2461 [(set (match_dup 2)
2462 (vec_merge:V2DI
2463 (vec_duplicate:V2DI (match_dup 3))
2464 (match_dup 2)
2465 (const_int 2)))]
2466 {
2467 operands[2] = lowpart_subreg (V2DImode, operands[0], TImode);
2468 operands[3] = gen_highpart (DImode, operands[1]);
2469
2470 emit_move_insn (gen_lowpart (DImode, operands[0]),
2471 gen_lowpart (DImode, operands[1]));
2472 })
2473
2474 (define_insn "*movdi_internal"
2475 [(set (match_operand:DI 0 "nonimmediate_operand"
2476 "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r,?*y,?Yv,?v,?v,m ,m,?jc,?*Yd,?r,?v,?*y,?*x,*k,*k ,*r,*m,*k")
2477 (match_operand:DI 1 "general_operand"
2478 "riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r ,C ,?v,Bk,?v,v,*Yd,jc ,?v,r ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
2479 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2480 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2481 {
2482 switch (get_attr_type (insn))
2483 {
2484 case TYPE_MSKMOV:
2485 return "kmovq\t{%1, %0|%0, %1}";
2486
2487 case TYPE_MSKLOG:
2488 if (operands[1] == const0_rtx)
2489 return "kxorq\t%0, %0, %0";
2490 else if (operands[1] == constm1_rtx)
2491 return "kxnorq\t%0, %0, %0";
2492 gcc_unreachable ();
2493
2494 case TYPE_MULTI:
2495 return "#";
2496
2497 case TYPE_MMX:
2498 return "pxor\t%0, %0";
2499
2500 case TYPE_MMXMOV:
2501 /* Handle broken assemblers that require movd instead of movq. */
2502 if (!HAVE_AS_IX86_INTERUNIT_MOVQ
2503 && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
2504 return "movd\t{%1, %0|%0, %1}";
2505 return "movq\t{%1, %0|%0, %1}";
2506
2507 case TYPE_SSELOG1:
2508 return standard_sse_constant_opcode (insn, operands);
2509
2510 case TYPE_SSEMOV:
2511 return ix86_output_ssemov (insn, operands);
2512
2513 case TYPE_SSECVT:
2514 if (SSE_REG_P (operands[0]))
2515 return "movq2dq\t{%1, %0|%0, %1}";
2516 else
2517 return "movdq2q\t{%1, %0|%0, %1}";
2518
2519 case TYPE_LEA:
2520 return "lea{q}\t{%E1, %0|%0, %E1}";
2521
2522 case TYPE_IMOV:
2523 gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
2524 if (get_attr_mode (insn) == MODE_SI)
2525 return "mov{l}\t{%k1, %k0|%k0, %k1}";
2526 else if (which_alternative == 4)
2527 return "movabs{q}\t{%1, %0|%0, %1}";
2528 else if (ix86_use_lea_for_mov (insn, operands))
2529 return "lea{q}\t{%E1, %0|%0, %E1}";
2530 else
2531 return "mov{q}\t{%1, %0|%0, %1}";
2532
2533 default:
2534 gcc_unreachable ();
2535 }
2536 }
2537 [(set (attr "isa")
2538 (cond [(eq_attr "alternative" "0,1,17,18")
2539 (const_string "nox64")
2540 (eq_attr "alternative" "2,3,4,5,10,11,23,25")
2541 (const_string "x64")
2542 (eq_attr "alternative" "19,20")
2543 (const_string "x64_sse2")
2544 (eq_attr "alternative" "21,22")
2545 (const_string "sse2")
2546 ]
2547 (const_string "*")))
2548 (set (attr "type")
2549 (cond [(eq_attr "alternative" "0,1,17,18")
2550 (const_string "multi")
2551 (eq_attr "alternative" "6")
2552 (const_string "mmx")
2553 (eq_attr "alternative" "7,8,9,10,11")
2554 (const_string "mmxmov")
2555 (eq_attr "alternative" "12")
2556 (const_string "sselog1")
2557 (eq_attr "alternative" "13,14,15,16,19,20")
2558 (const_string "ssemov")
2559 (eq_attr "alternative" "21,22")
2560 (const_string "ssecvt")
2561 (eq_attr "alternative" "23,24,25,26")
2562 (const_string "mskmov")
2563 (eq_attr "alternative" "27")
2564 (const_string "msklog")
2565 (and (match_operand 0 "register_operand")
2566 (match_operand 1 "pic_32bit_operand"))
2567 (const_string "lea")
2568 ]
2569 (const_string "imov")))
2570 (set (attr "modrm")
2571 (if_then_else
2572 (and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
2573 (const_string "0")
2574 (const_string "*")))
2575 (set (attr "length_immediate")
2576 (if_then_else
2577 (and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
2578 (const_string "8")
2579 (const_string "*")))
2580 (set (attr "prefix_rex")
2581 (if_then_else
2582 (eq_attr "alternative" "10,11,19,20")
2583 (const_string "1")
2584 (const_string "*")))
2585 (set (attr "prefix")
2586 (if_then_else (eq_attr "type" "sselog1,ssemov")
2587 (const_string "maybe_vex")
2588 (const_string "orig")))
2589 (set (attr "prefix_data16")
2590 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
2591 (const_string "1")
2592 (const_string "*")))
2593 (set (attr "mode")
2594 (cond [(eq_attr "alternative" "2")
2595 (const_string "SI")
2596 (eq_attr "alternative" "12")
2597 (cond [(match_test "TARGET_AVX")
2598 (const_string "TI")
2599 (ior (not (match_test "TARGET_SSE2"))
2600 (match_test "optimize_function_for_size_p (cfun)"))
2601 (const_string "V4SF")
2602 ]
2603 (const_string "TI"))
2604 (eq_attr "alternative" "13")
2605 (cond [(match_test "TARGET_AVX512VL")
2606 (const_string "TI")
2607 (match_test "TARGET_AVX512F")
2608 (const_string "DF")
2609 (match_test "TARGET_AVX")
2610 (const_string "TI")
2611 (ior (not (match_test "TARGET_SSE2"))
2612 (match_test "optimize_function_for_size_p (cfun)"))
2613 (const_string "V4SF")
2614 ]
2615 (const_string "TI"))
2616
2617 (and (eq_attr "alternative" "14,15,16")
2618 (not (match_test "TARGET_SSE2")))
2619 (const_string "V2SF")
2620 ]
2621 (const_string "DI")))
2622 (set (attr "preferred_for_speed")
2623 (cond [(eq_attr "alternative" "10,17,19")
2624 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2625 (eq_attr "alternative" "11,18,20")
2626 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2627 ]
2628 (symbol_ref "true")))
2629 (set (attr "enabled")
2630 (cond [(eq_attr "alternative" "15")
2631 (if_then_else
2632 (match_test "TARGET_STV && TARGET_SSE2")
2633 (symbol_ref "false")
2634 (const_string "*"))
2635 (eq_attr "alternative" "16")
2636 (if_then_else
2637 (match_test "TARGET_STV && TARGET_SSE2")
2638 (symbol_ref "true")
2639 (symbol_ref "false"))
2640 ]
2641 (const_string "*")))])
2642
2643 (define_split
2644 [(set (match_operand:<DWI> 0 "general_reg_operand")
2645 (match_operand:<DWI> 1 "sse_reg_operand"))]
2646 "TARGET_SSE4_1
2647 && reload_completed"
2648 [(set (match_dup 2)
2649 (vec_select:DWIH
2650 (match_dup 3)
2651 (parallel [(const_int 1)])))]
2652 {
2653 operands[2] = gen_highpart (<MODE>mode, operands[0]);
2654 operands[3] = lowpart_subreg (<ssevecmode>mode, operands[1], <DWI>mode);
2655
2656 emit_move_insn (gen_lowpart (<MODE>mode, operands[0]),
2657 gen_lowpart (<MODE>mode, operands[1]));
2658 })
2659
2660 (define_split
2661 [(set (match_operand:DWI 0 "nonimmediate_gr_operand")
2662 (match_operand:DWI 1 "general_gr_operand"))]
2663 "reload_completed"
2664 [(const_int 0)]
2665 "ix86_split_long_move (operands); DONE;")
2666
2667 (define_split
2668 [(set (match_operand:DI 0 "sse_reg_operand")
2669 (match_operand:DI 1 "general_reg_operand"))]
2670 "!TARGET_64BIT && TARGET_SSE4_1
2671 && reload_completed"
2672 [(set (match_dup 2)
2673 (vec_merge:V4SI
2674 (vec_duplicate:V4SI (match_dup 3))
2675 (match_dup 2)
2676 (const_int 2)))]
2677 {
2678 operands[2] = lowpart_subreg (V4SImode, operands[0], DImode);
2679 operands[3] = gen_highpart (SImode, operands[1]);
2680
2681 emit_move_insn (gen_lowpart (SImode, operands[0]),
2682 gen_lowpart (SImode, operands[1]));
2683 })
2684
2685 ;; movabsq $0x0012345678000000, %rax is longer
2686 ;; than movl $0x12345678, %eax; shlq $24, %rax.
2687 (define_peephole2
2688 [(set (match_operand:DI 0 "register_operand")
2689 (match_operand:DI 1 "const_int_operand"))]
2690 "TARGET_64BIT
2691 && optimize_insn_for_size_p ()
2692 && LEGACY_INT_REG_P (operands[0])
2693 && !x86_64_immediate_operand (operands[1], DImode)
2694 && !x86_64_zext_immediate_operand (operands[1], DImode)
2695 && !((UINTVAL (operands[1]) >> ctz_hwi (UINTVAL (operands[1])))
2696 & ~(HOST_WIDE_INT) 0xffffffff)
2697 && peep2_regno_dead_p (0, FLAGS_REG)"
2698 [(set (match_dup 0) (match_dup 1))
2699 (parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2)))
2700 (clobber (reg:CC FLAGS_REG))])]
2701 {
2702 int shift = ctz_hwi (UINTVAL (operands[1]));
2703 rtx op1 = gen_int_mode (UINTVAL (operands[1]) >> shift, DImode);
2704 if (ix86_endbr_immediate_operand (op1, VOIDmode))
2705 FAIL;
2706 operands[1] = op1;
2707 operands[2] = gen_int_mode (shift, QImode);
2708 })
2709
2710 (define_insn "*movsi_internal"
2711 [(set (match_operand:SI 0 "nonimmediate_operand"
2712 "=r,m ,*y,*y,?*y,?m,?r,?*y,?Yv,?v,?v,m ,?r,?v,*k,*k ,*rm,*k")
2713 (match_operand:SI 1 "general_operand"
2714 "g ,re,C ,*y,Bk ,*y,*y,r ,C ,?v,Bk,?v,?v,r ,*r,*kBk,*k ,CBC"))]
2715 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2716 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2717 {
2718 switch (get_attr_type (insn))
2719 {
2720 case TYPE_SSELOG1:
2721 return standard_sse_constant_opcode (insn, operands);
2722
2723 case TYPE_MSKMOV:
2724 return "kmovd\t{%1, %0|%0, %1}";
2725
2726 case TYPE_MSKLOG:
2727 if (operands[1] == const0_rtx)
2728 return "kxord\t%0, %0, %0";
2729 else if (operands[1] == constm1_rtx)
2730 return "kxnord\t%0, %0, %0";
2731 gcc_unreachable ();
2732
2733 case TYPE_SSEMOV:
2734 return ix86_output_ssemov (insn, operands);
2735
2736 case TYPE_MMX:
2737 return "pxor\t%0, %0";
2738
2739 case TYPE_MMXMOV:
2740 switch (get_attr_mode (insn))
2741 {
2742 case MODE_DI:
2743 return "movq\t{%1, %0|%0, %1}";
2744 case MODE_SI:
2745 return "movd\t{%1, %0|%0, %1}";
2746
2747 default:
2748 gcc_unreachable ();
2749 }
2750
2751 case TYPE_LEA:
2752 return "lea{l}\t{%E1, %0|%0, %E1}";
2753
2754 case TYPE_IMOV:
2755 gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
2756 if (ix86_use_lea_for_mov (insn, operands))
2757 return "lea{l}\t{%E1, %0|%0, %E1}";
2758 else
2759 return "mov{l}\t{%1, %0|%0, %1}";
2760
2761 default:
2762 gcc_unreachable ();
2763 }
2764 }
2765 [(set (attr "isa")
2766 (cond [(eq_attr "alternative" "12,13")
2767 (const_string "sse2")
2768 ]
2769 (const_string "*")))
2770 (set (attr "type")
2771 (cond [(eq_attr "alternative" "2")
2772 (const_string "mmx")
2773 (eq_attr "alternative" "3,4,5,6,7")
2774 (const_string "mmxmov")
2775 (eq_attr "alternative" "8")
2776 (const_string "sselog1")
2777 (eq_attr "alternative" "9,10,11,12,13")
2778 (const_string "ssemov")
2779 (eq_attr "alternative" "14,15,16")
2780 (const_string "mskmov")
2781 (eq_attr "alternative" "17")
2782 (const_string "msklog")
2783 (and (match_operand 0 "register_operand")
2784 (match_operand 1 "pic_32bit_operand"))
2785 (const_string "lea")
2786 ]
2787 (const_string "imov")))
2788 (set (attr "prefix")
2789 (if_then_else (eq_attr "type" "sselog1,ssemov")
2790 (const_string "maybe_vex")
2791 (const_string "orig")))
2792 (set (attr "prefix_data16")
2793 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
2794 (const_string "1")
2795 (const_string "*")))
2796 (set (attr "mode")
2797 (cond [(eq_attr "alternative" "2,3")
2798 (const_string "DI")
2799 (eq_attr "alternative" "8")
2800 (cond [(match_test "TARGET_AVX")
2801 (const_string "TI")
2802 (ior (not (match_test "TARGET_SSE2"))
2803 (match_test "optimize_function_for_size_p (cfun)"))
2804 (const_string "V4SF")
2805 ]
2806 (const_string "TI"))
2807 (eq_attr "alternative" "9")
2808 (cond [(match_test "TARGET_AVX512VL")
2809 (const_string "TI")
2810 (match_test "TARGET_AVX512F")
2811 (const_string "SF")
2812 (match_test "TARGET_AVX")
2813 (const_string "TI")
2814 (ior (not (match_test "TARGET_SSE2"))
2815 (match_test "optimize_function_for_size_p (cfun)"))
2816 (const_string "V4SF")
2817 ]
2818 (const_string "TI"))
2819
2820 (and (eq_attr "alternative" "10,11")
2821 (not (match_test "TARGET_SSE2")))
2822 (const_string "SF")
2823 ]
2824 (const_string "SI")))
2825 (set (attr "preferred_for_speed")
2826 (cond [(eq_attr "alternative" "6,12")
2827 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2828 (eq_attr "alternative" "7,13")
2829 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2830 ]
2831 (symbol_ref "true")))])
2832
2833 ;; With -Oz, transform mov $imm,reg to the shorter push $imm; pop reg.
2834 (define_peephole2
2835 [(set (match_operand:SWI248 0 "general_reg_operand")
2836 (match_operand:SWI248 1 "const_int_operand"))]
2837 "optimize_insn_for_size_p () && optimize_size > 1
2838 && operands[1] != const0_rtx
2839 && IN_RANGE (INTVAL (operands[1]), -128, 127)
2840 && !ix86_red_zone_used
2841 && REGNO (operands[0]) != SP_REG"
2842 [(set (match_dup 2) (match_dup 1))
2843 (set (match_dup 0) (match_dup 3))]
2844 {
2845 if (GET_MODE (operands[0]) != word_mode)
2846 operands[0] = gen_rtx_REG (word_mode, REGNO (operands[0]));
2847
2848 operands[2] = gen_rtx_MEM (word_mode,
2849 gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
2850 operands[3] = gen_rtx_MEM (word_mode,
2851 gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
2852 })
2853
2854 ;; With -Oz, transform mov $0,mem to the shorter and $0,mem.
2855 ;; Likewise, transform mov $-1,mem to the shorter or $-1,mem.
2856 (define_peephole2
2857 [(set (match_operand:SWI248 0 "memory_operand")
2858 (match_operand:SWI248 1 "const_int_operand"))]
2859 "(operands[1] == const0_rtx || operands[1] == constm1_rtx)
2860 && optimize_insn_for_size_p () && optimize_size > 1
2861 && peep2_regno_dead_p (0, FLAGS_REG)"
2862 [(parallel [(set (match_dup 0) (match_dup 1))
2863 (clobber (reg:CC FLAGS_REG))])])
2864
2865 (define_insn "*movhi_internal"
2866 [(set (match_operand:HI 0 "nonimmediate_operand"
2867 "=r,r,r,m ,*k,*k ,r ,m ,*k ,?r,?*v,*Yv,*v,*v,jm,m")
2868 (match_operand:HI 1 "general_operand"
2869 "r ,n,m,rn,r ,*km,*k,*k,CBC,*v,r ,C ,*v,m ,*x,*v"))]
2870 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2871 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2872 {
2873 switch (get_attr_type (insn))
2874 {
2875 case TYPE_IMOVX:
2876 /* movzwl is faster than movw on p2 due to partial word stalls,
2877 though not as fast as an aligned movl. */
2878 return "movz{wl|x}\t{%1, %k0|%k0, %1}";
2879
2880 case TYPE_MSKMOV:
2881 switch (which_alternative)
2882 {
2883 case 4:
2884 return "kmovw\t{%k1, %0|%0, %k1}";
2885 case 6:
2886 return "kmovw\t{%1, %k0|%k0, %1}";
2887 case 5:
2888 case 7:
2889 return "kmovw\t{%1, %0|%0, %1}";
2890 default:
2891 gcc_unreachable ();
2892 }
2893
2894 case TYPE_SSEMOV:
2895 return ix86_output_ssemov (insn, operands);
2896
2897 case TYPE_SSELOG1:
2898 if (satisfies_constraint_C (operands[1]))
2899 return standard_sse_constant_opcode (insn, operands);
2900
2901 if (SSE_REG_P (operands[0]))
2902 return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
2903 else
2904 return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
2905
2906 case TYPE_MSKLOG:
2907 if (operands[1] == const0_rtx)
2908 return "kxorw\t%0, %0, %0";
2909 else if (operands[1] == constm1_rtx)
2910 return "kxnorw\t%0, %0, %0";
2911 gcc_unreachable ();
2912
2913 default:
2914 if (get_attr_mode (insn) == MODE_SI)
2915 return "mov{l}\t{%k1, %k0|%k0, %k1}";
2916 else
2917 return "mov{w}\t{%1, %0|%0, %1}";
2918 }
2919 }
2920 [(set (attr "isa")
2921 (cond [(eq_attr "alternative" "9,10,11,12,13")
2922 (const_string "sse2")
2923 (eq_attr "alternative" "14")
2924 (const_string "sse4_noavx")
2925 (eq_attr "alternative" "15")
2926 (const_string "avx")
2927 ]
2928 (const_string "*")))
2929 (set (attr "addr")
2930 (if_then_else (eq_attr "alternative" "14")
2931 (const_string "gpr16")
2932 (const_string "*")))
2933 (set (attr "type")
2934 (cond [(eq_attr "alternative" "4,5,6,7")
2935 (const_string "mskmov")
2936 (eq_attr "alternative" "8")
2937 (const_string "msklog")
2938 (eq_attr "alternative" "13,14,15")
2939 (if_then_else (match_test "TARGET_AVX512FP16")
2940 (const_string "ssemov")
2941 (const_string "sselog1"))
2942 (eq_attr "alternative" "11")
2943 (const_string "sselog1")
2944 (eq_attr "alternative" "9,10,12")
2945 (const_string "ssemov")
2946 (match_test "optimize_function_for_size_p (cfun)")
2947 (const_string "imov")
2948 (and (eq_attr "alternative" "0")
2949 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
2950 (not (match_test "TARGET_HIMODE_MATH"))))
2951 (const_string "imov")
2952 (and (eq_attr "alternative" "1,2")
2953 (match_operand:HI 1 "aligned_operand"))
2954 (const_string "imov")
2955 (and (match_test "TARGET_MOVX")
2956 (eq_attr "alternative" "0,2"))
2957 (const_string "imovx")
2958 ]
2959 (const_string "imov")))
2960 (set (attr "prefix")
2961 (cond [(eq_attr "alternative" "4,5,6,7,8")
2962 (const_string "vex")
2963 (eq_attr "alternative" "9,10,11,12,13,14,15")
2964 (const_string "maybe_evex")
2965 ]
2966 (const_string "orig")))
2967 (set (attr "mode")
2968 (cond [(eq_attr "alternative" "9,10")
2969 (if_then_else (match_test "TARGET_AVX512FP16")
2970 (const_string "HI")
2971 (const_string "SI"))
2972 (eq_attr "alternative" "13,14,15")
2973 (if_then_else (match_test "TARGET_AVX512FP16")
2974 (const_string "HI")
2975 (const_string "TI"))
2976 (eq_attr "alternative" "11")
2977 (cond [(match_test "TARGET_AVX")
2978 (const_string "TI")
2979 (ior (not (match_test "TARGET_SSE2"))
2980 (match_test "optimize_function_for_size_p (cfun)"))
2981 (const_string "V4SF")
2982 ]
2983 (const_string "TI"))
2984 (eq_attr "alternative" "12")
2985 (cond [(match_test "TARGET_AVX512VL")
2986 (const_string "TI")
2987 (match_test "TARGET_AVX512FP16")
2988 (const_string "HF")
2989 (match_test "TARGET_AVX512F")
2990 (const_string "SF")
2991 (match_test "TARGET_AVX")
2992 (const_string "TI")
2993 (ior (not (match_test "TARGET_SSE2"))
2994 (match_test "optimize_function_for_size_p (cfun)"))
2995 (const_string "V4SF")
2996 ]
2997 (const_string "TI"))
2998 (eq_attr "type" "imovx")
2999 (const_string "SI")
3000 (and (eq_attr "alternative" "1,2")
3001 (match_operand:HI 1 "aligned_operand"))
3002 (const_string "SI")
3003 (and (eq_attr "alternative" "0")
3004 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
3005 (not (match_test "TARGET_HIMODE_MATH"))))
3006 (const_string "SI")
3007 ]
3008 (const_string "HI")))
3009 (set (attr "preferred_for_speed")
3010 (cond [(eq_attr "alternative" "9")
3011 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
3012 (eq_attr "alternative" "10")
3013 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
3014 ]
3015 (symbol_ref "true")))])
3016
3017 ;; Situation is quite tricky about when to choose full sized (SImode) move
3018 ;; over QImode moves. For Q_REG -> Q_REG move we use full size only for
3019 ;; partial register dependency machines (such as AMD Athlon), where QImode
3020 ;; moves issue extra dependency and for partial register stalls machines
3021 ;; that don't use QImode patterns (and QImode move cause stall on the next
3022 ;; instruction).
3023 ;;
3024 ;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial
3025 ;; register stall machines with, where we use QImode instructions, since
3026 ;; partial register stall can be caused there. Then we use movzx.
3027
3028 (define_insn "*movqi_internal"
3029 [(set (match_operand:QI 0 "nonimmediate_operand"
3030 "=Q,R,r,q,q,r,r ,?r,m ,*k,*k,*r,*m,*k,*k,*k")
3031 (match_operand:QI 1 "general_operand"
3032 "Q ,R,r,n,m,q,rn, m,qn,*r,*k,*k,*k,*m,C,BC"))]
3033 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
3034 && ix86_hardreg_mov_ok (operands[0], operands[1])"
3035
3036 {
3037 char buf[128];
3038 const char *ops;
3039 const char *suffix;
3040
3041 switch (get_attr_type (insn))
3042 {
3043 case TYPE_IMOVX:
3044 gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]));
3045 return "movz{bl|x}\t{%1, %k0|%k0, %1}";
3046
3047 case TYPE_MSKMOV:
3048 switch (which_alternative)
3049 {
3050 case 9:
3051 ops = "kmov%s\t{%%k1, %%0|%%0, %%k1}";
3052 break;
3053 case 11:
3054 ops = "kmov%s\t{%%1, %%k0|%%k0, %%1}";
3055 break;
3056 case 12:
3057 case 13:
3058 gcc_assert (TARGET_AVX512DQ);
3059 /* FALLTHRU */
3060 case 10:
3061 ops = "kmov%s\t{%%1, %%0|%%0, %%1}";
3062 break;
3063 default:
3064 gcc_unreachable ();
3065 }
3066
3067 suffix = (get_attr_mode (insn) == MODE_HI) ? "w" : "b";
3068
3069 snprintf (buf, sizeof (buf), ops, suffix);
3070 output_asm_insn (buf, operands);
3071 return "";
3072
3073 case TYPE_MSKLOG:
3074 if (operands[1] == const0_rtx)
3075 {
3076 if (get_attr_mode (insn) == MODE_HI)
3077 return "kxorw\t%0, %0, %0";
3078 else
3079 return "kxorb\t%0, %0, %0";
3080 }
3081 else if (operands[1] == constm1_rtx)
3082 {
3083 gcc_assert (TARGET_AVX512DQ);
3084 return "kxnorb\t%0, %0, %0";
3085 }
3086 gcc_unreachable ();
3087
3088 default:
3089 if (get_attr_mode (insn) == MODE_SI)
3090 return "mov{l}\t{%k1, %k0|%k0, %k1}";
3091 else
3092 return "mov{b}\t{%1, %0|%0, %1}";
3093 }
3094 }
3095 [(set (attr "isa")
3096 (cond [(eq_attr "alternative" "1,2")
3097 (const_string "x64")
3098 (eq_attr "alternative" "12,13,15")
3099 (const_string "avx512dq")
3100 ]
3101 (const_string "*")))
3102 (set (attr "type")
3103 (cond [(eq_attr "alternative" "9,10,11,12,13")
3104 (const_string "mskmov")
3105 (eq_attr "alternative" "14,15")
3106 (const_string "msklog")
3107 (and (eq_attr "alternative" "7")
3108 (not (match_operand:QI 1 "aligned_operand")))
3109 (const_string "imovx")
3110 (match_test "optimize_function_for_size_p (cfun)")
3111 (const_string "imov")
3112 (and (eq_attr "alternative" "5")
3113 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
3114 (not (match_test "TARGET_QIMODE_MATH"))))
3115 (const_string "imov")
3116 (eq_attr "alternative" "5,7")
3117 (const_string "imovx")
3118 (and (match_test "TARGET_MOVX")
3119 (eq_attr "alternative" "4"))
3120 (const_string "imovx")
3121 ]
3122 (const_string "imov")))
3123 (set (attr "prefix")
3124 (if_then_else (eq_attr "alternative" "9,10,11,12,13,14,15")
3125 (const_string "vex")
3126 (const_string "orig")))
3127 (set (attr "mode")
3128 (cond [(eq_attr "alternative" "5,6,7")
3129 (const_string "SI")
3130 (eq_attr "alternative" "8")
3131 (const_string "QI")
3132 (and (eq_attr "alternative" "9,10,11,14")
3133 (not (match_test "TARGET_AVX512DQ")))
3134 (const_string "HI")
3135 (eq_attr "type" "imovx")
3136 (const_string "SI")
3137 ;; For -Os, 8-bit immediates are always shorter than 32-bit
3138 ;; ones.
3139 (and (eq_attr "type" "imov")
3140 (and (eq_attr "alternative" "3")
3141 (match_test "optimize_function_for_size_p (cfun)")))
3142 (const_string "QI")
3143 ;; For -Os, movl where one or both operands are NON_Q_REGS
3144 ;; and both are LEGACY_REGS is shorter than movb.
3145 ;; Otherwise movb and movl sizes are the same, so decide purely
3146 ;; based on speed factors.
3147 (and (eq_attr "type" "imov")
3148 (and (eq_attr "alternative" "1")
3149 (match_test "optimize_function_for_size_p (cfun)")))
3150 (const_string "SI")
3151 (and (eq_attr "type" "imov")
3152 (and (eq_attr "alternative" "0,1,2,3")
3153 (and (match_test "TARGET_PARTIAL_REG_DEPENDENCY")
3154 (not (match_test "TARGET_PARTIAL_REG_STALL")))))
3155 (const_string "SI")
3156 ;; Avoid partial register stalls when not using QImode arithmetic
3157 (and (eq_attr "type" "imov")
3158 (and (eq_attr "alternative" "0,1,2,3")
3159 (and (match_test "TARGET_PARTIAL_REG_STALL")
3160 (not (match_test "TARGET_QIMODE_MATH")))))
3161 (const_string "SI")
3162 ]
3163 (const_string "QI")))])
3164
3165 /* Reload dislikes loading 0/-1 directly into mask registers.
3166 Try to tidy things up here. */
3167 (define_peephole2
3168 [(set (match_operand:SWI 0 "general_reg_operand")
3169 (match_operand:SWI 1 "immediate_operand"))
3170 (set (match_operand:SWI 2 "mask_reg_operand")
3171 (match_dup 0))]
3172 "peep2_reg_dead_p (2, operands[0])
3173 && (const0_operand (operands[1], <MODE>mode)
3174 || (constm1_operand (operands[1], <MODE>mode)
3175 && (<MODE_SIZE> > 1 || TARGET_AVX512DQ)))"
3176 [(set (match_dup 2) (match_dup 1))])
3177
3178 ;; Stores and loads of ax to arbitrary constant address.
3179 ;; We fake an second form of instruction to force reload to load address
3180 ;; into register when rax is not available
3181 (define_insn "*movabs<mode>_1"
3182 [(set (mem:SWI1248x (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
3183 (match_operand:SWI1248x 1 "nonmemory_operand" "a,r<i>"))]
3184 "TARGET_LP64 && ix86_check_movabs (insn, 0)"
3185 {
3186 /* Recover the full memory rtx. */
3187 operands[0] = SET_DEST (PATTERN (insn));
3188 switch (which_alternative)
3189 {
3190 case 0:
3191 return "movabs{<imodesuffix>}\t{%1, %P0|<iptrsize> PTR [%P0], %1}";
3192 case 1:
3193 return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
3194 default:
3195 gcc_unreachable ();
3196 }
3197 }
3198 [(set_attr "type" "imov")
3199 (set_attr "modrm" "0,*")
3200 (set_attr "length_address" "8,0")
3201 (set_attr "length_immediate" "0,*")
3202 (set_attr "memory" "store")
3203 (set_attr "mode" "<MODE>")])
3204
3205 (define_insn "*movabs<mode>_2"
3206 [(set (match_operand:SWI1248x 0 "register_operand" "=a,r")
3207 (mem:SWI1248x (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
3208 "TARGET_LP64 && ix86_check_movabs (insn, 1)"
3209 {
3210 /* Recover the full memory rtx. */
3211 operands[1] = SET_SRC (PATTERN (insn));
3212 switch (which_alternative)
3213 {
3214 case 0:
3215 return "movabs{<imodesuffix>}\t{%P1, %0|%0, <iptrsize> PTR [%P1]}";
3216 case 1:
3217 return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
3218 default:
3219 gcc_unreachable ();
3220 }
3221 }
3222 [(set_attr "type" "imov")
3223 (set_attr "modrm" "0,*")
3224 (set_attr "length_address" "8,0")
3225 (set_attr "length_immediate" "0")
3226 (set_attr "memory" "load")
3227 (set_attr "mode" "<MODE>")])
3228
3229 (define_insn "swap<mode>"
3230 [(set (match_operand:SWI48 0 "register_operand" "+r")
3231 (match_operand:SWI48 1 "register_operand" "+r"))
3232 (set (match_dup 1)
3233 (match_dup 0))]
3234 ""
3235 "xchg{<imodesuffix>}\t%1, %0"
3236 [(set_attr "type" "imov")
3237 (set_attr "mode" "<MODE>")
3238 (set_attr "pent_pair" "np")
3239 (set_attr "athlon_decode" "vector")
3240 (set_attr "amdfam10_decode" "double")
3241 (set_attr "bdver1_decode" "double")])
3242
3243 (define_insn "*swap<mode>"
3244 [(set (match_operand:SWI12 0 "register_operand" "+<r>,r")
3245 (match_operand:SWI12 1 "register_operand" "+<r>,r"))
3246 (set (match_dup 1)
3247 (match_dup 0))]
3248 ""
3249 "@
3250 xchg{<imodesuffix>}\t%1, %0
3251 xchg{l}\t%k1, %k0"
3252 [(set_attr "type" "imov")
3253 (set_attr "mode" "<MODE>,SI")
3254 (set (attr "preferred_for_size")
3255 (cond [(eq_attr "alternative" "0")
3256 (symbol_ref "false")]
3257 (symbol_ref "true")))
3258 ;; Potential partial reg stall on alternative 1.
3259 (set (attr "preferred_for_speed")
3260 (cond [(eq_attr "alternative" "1")
3261 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
3262 (symbol_ref "true")))
3263 (set_attr "pent_pair" "np")
3264 (set_attr "athlon_decode" "vector")
3265 (set_attr "amdfam10_decode" "double")
3266 (set_attr "bdver1_decode" "double")])
3267
3268 (define_peephole2
3269 [(set (match_operand:SWI 0 "general_reg_operand")
3270 (match_operand:SWI 1 "general_reg_operand"))
3271 (set (match_dup 1)
3272 (match_operand:SWI 2 "general_reg_operand"))
3273 (set (match_dup 2) (match_dup 0))]
3274 "peep2_reg_dead_p (3, operands[0])
3275 && optimize_insn_for_size_p ()"
3276 [(parallel [(set (match_dup 1) (match_dup 2))
3277 (set (match_dup 2) (match_dup 1))])])
3278
3279 ;; Convert xchg with a REG_UNUSED note to a mov (variant #1).
3280 (define_peephole2
3281 [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
3282 (match_operand:SWI 1 "general_reg_operand"))
3283 (set (match_dup 1) (match_dup 0))])]
3284 "((REGNO (operands[0]) != AX_REG
3285 && REGNO (operands[1]) != AX_REG)
3286 || optimize_size < 2
3287 || !optimize_insn_for_size_p ())
3288 && peep2_reg_dead_p (1, operands[0])"
3289 [(set (match_dup 1) (match_dup 0))])
3290
3291 ;; Convert xchg with a REG_UNUSED note to a mov (variant #2).
3292 (define_peephole2
3293 [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
3294 (match_operand:SWI 1 "general_reg_operand"))
3295 (set (match_dup 1) (match_dup 0))])]
3296 "((REGNO (operands[0]) != AX_REG
3297 && REGNO (operands[1]) != AX_REG)
3298 || optimize_size < 2
3299 || !optimize_insn_for_size_p ())
3300 && peep2_reg_dead_p (1, operands[1])"
3301 [(set (match_dup 0) (match_dup 1))])
3302
3303 ;; Convert moves to/from AX_REG into xchg with -Oz.
3304 (define_peephole2
3305 [(set (match_operand:SWI48 0 "general_reg_operand")
3306 (match_operand:SWI48 1 "general_reg_operand"))]
3307 "optimize_size > 1
3308 && ((REGNO (operands[0]) == AX_REG)
3309 != (REGNO (operands[1]) == AX_REG))
3310 && optimize_insn_for_size_p ()
3311 && peep2_reg_dead_p (1, operands[1])"
3312 [(parallel [(set (match_dup 0) (match_dup 1))
3313 (set (match_dup 1) (match_dup 0))])])
3314
3315 (define_expand "movstrict<mode>"
3316 [(set (strict_low_part (match_operand:SWI12 0 "register_operand"))
3317 (match_operand:SWI12 1 "general_operand"))]
3318 ""
3319 {
3320 gcc_assert (SUBREG_P (operands[0]));
3321 if ((TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
3322 || !VALID_INT_MODE_P (GET_MODE (SUBREG_REG (operands[0]))))
3323 FAIL;
3324 })
3325
3326 (define_insn "*movstrict<mode>_1"
3327 [(set (strict_low_part
3328 (match_operand:SWI12 0 "register_operand" "+<r>"))
3329 (match_operand:SWI12 1 "general_operand" "<r>mn"))]
3330 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
3331 "mov{<imodesuffix>}\t{%1, %0|%0, %1}"
3332 [(set_attr "type" "imov")
3333 (set_attr "mode" "<MODE>")])
3334
3335 (define_insn "*movstrict<mode>_xor"
3336 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
3337 (match_operand:SWI12 1 "const0_operand"))
3338 (clobber (reg:CC FLAGS_REG))]
3339 "reload_completed"
3340 "xor{<imodesuffix>}\t%0, %0"
3341 [(set_attr "type" "alu1")
3342 (set_attr "mode" "<MODE>")
3343 (set_attr "length_immediate" "0")])
3344
3345 (define_insn "*movstrictqi_ext<mode>_1"
3346 [(set (strict_low_part
3347 (match_operand:QI 0 "register_operand" "+Q"))
3348 (subreg:QI
3349 (match_operator:SWI248 2 "extract_operator"
3350 [(match_operand 1 "int248_register_operand" "Q")
3351 (const_int 8)
3352 (const_int 8)]) 0))]
3353 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
3354 "mov{b}\t{%h1, %0|%0, %h1}"
3355 [(set_attr "type" "imov")
3356 (set_attr "mode" "QI")])
3357
3358 (define_expand "extv<mode>"
3359 [(set (match_operand:SWI24 0 "register_operand")
3360 (sign_extract:SWI24 (match_operand:SWI24 1 "register_operand")
3361 (match_operand:QI 2 "const_int_operand")
3362 (match_operand:QI 3 "const_int_operand")))]
3363 ""
3364 {
3365 /* Handle extractions from %ah et al. */
3366 if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
3367 FAIL;
3368
3369 unsigned int regno = reg_or_subregno (operands[1]);
3370
3371 /* Be careful to expand only with registers having upper parts. */
3372 if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
3373 operands[1] = copy_to_reg (operands[1]);
3374 })
3375
3376 (define_insn "*extv<mode>"
3377 [(set (match_operand:SWI24 0 "register_operand" "=R")
3378 (sign_extract:SWI24 (match_operand 1 "int248_register_operand" "Q")
3379 (const_int 8)
3380 (const_int 8)))]
3381 ""
3382 "movs{bl|x}\t{%h1, %k0|%k0, %h1}"
3383 [(set_attr "type" "imovx")
3384 (set_attr "mode" "SI")])
3385
3386 ;; Split sign-extension of single least significant bit as and x,$1;neg x
3387 (define_insn_and_split "*extv<mode>_1_0"
3388 [(set (match_operand:SWI48 0 "register_operand" "=r")
3389 (sign_extract:SWI48 (match_operand:SWI48 1 "register_operand" "0")
3390 (const_int 1)
3391 (const_int 0)))
3392 (clobber (reg:CC FLAGS_REG))]
3393 ""
3394 "#"
3395 ""
3396 [(parallel [(set (match_dup 0) (and:SWI48 (match_dup 1) (const_int 1)))
3397 (clobber (reg:CC FLAGS_REG))])
3398 (parallel [(set (match_dup 0) (neg:SWI48 (match_dup 0)))
3399 (clobber (reg:CC FLAGS_REG))])])
3400
3401 (define_expand "extzv<mode>"
3402 [(set (match_operand:SWI248 0 "register_operand")
3403 (zero_extract:SWI248 (match_operand:SWI248 1 "register_operand")
3404 (match_operand:QI 2 "const_int_operand")
3405 (match_operand:QI 3 "const_int_operand")))]
3406 ""
3407 {
3408 if (ix86_expand_pextr (operands))
3409 DONE;
3410
3411 /* Handle extractions from %ah et al. */
3412 if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
3413 FAIL;
3414
3415 unsigned int regno = reg_or_subregno (operands[1]);
3416
3417 /* Be careful to expand only with registers having upper parts. */
3418 if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
3419 operands[1] = copy_to_reg (operands[1]);
3420 })
3421
3422 (define_insn "*extzv<mode>"
3423 [(set (match_operand:SWI248 0 "register_operand" "=R")
3424 (zero_extract:SWI248 (match_operand 1 "int248_register_operand" "Q")
3425 (const_int 8)
3426 (const_int 8)))]
3427 ""
3428 "movz{bl|x}\t{%h1, %k0|%k0, %h1}"
3429 [(set_attr "type" "imovx")
3430 (set_attr "mode" "SI")])
3431
3432 (define_insn "*extzvqi"
3433 [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn,?R")
3434 (subreg:QI
3435 (match_operator:SWI248 2 "extract_operator"
3436 [(match_operand 1 "int248_register_operand" "Q,Q")
3437 (const_int 8)
3438 (const_int 8)]) 0))]
3439 ""
3440 {
3441 switch (get_attr_type (insn))
3442 {
3443 case TYPE_IMOVX:
3444 return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
3445 default:
3446 return "mov{b}\t{%h1, %0|%0, %h1}";
3447 }
3448 }
3449 [(set_attr "addr" "gpr8,*")
3450 (set (attr "type")
3451 (if_then_else (and (match_operand:QI 0 "register_operand")
3452 (ior (not (match_operand:QI 0 "QIreg_operand"))
3453 (match_test "TARGET_MOVX")))
3454 (const_string "imovx")
3455 (const_string "imov")))
3456 (set (attr "mode")
3457 (if_then_else (eq_attr "type" "imovx")
3458 (const_string "SI")
3459 (const_string "QI")))])
3460
3461 (define_expand "insv<mode>"
3462 [(set (zero_extract:SWI248 (match_operand:SWI248 0 "register_operand")
3463 (match_operand:QI 1 "const_int_operand")
3464 (match_operand:QI 2 "const_int_operand"))
3465 (match_operand:SWI248 3 "register_operand"))]
3466 ""
3467 {
3468 rtx dst;
3469
3470 if (ix86_expand_pinsr (operands))
3471 DONE;
3472
3473 /* Handle insertions to %ah et al. */
3474 if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
3475 FAIL;
3476
3477 unsigned int regno = reg_or_subregno (operands[0]);
3478
3479 /* Be careful to expand only with registers having upper parts. */
3480 if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
3481 dst = copy_to_reg (operands[0]);
3482 else
3483 dst = operands[0];
3484
3485 emit_insn (gen_insv_1 (<MODE>mode, dst, operands[3]));
3486
3487 /* Fix up the destination if needed. */
3488 if (dst != operands[0])
3489 emit_move_insn (operands[0], dst);
3490
3491 DONE;
3492 })
3493
3494 (define_insn "@insv<mode>_1"
3495 [(set (zero_extract:SWI248
3496 (match_operand 0 "int248_register_operand" "+Q")
3497 (const_int 8)
3498 (const_int 8))
3499 (match_operand:SWI248 1 "general_operand" "QnBn"))]
3500 ""
3501 {
3502 if (CONST_INT_P (operands[1]))
3503 operands[1] = gen_int_mode (INTVAL (operands[1]), QImode);
3504 return "mov{b}\t{%b1, %h0|%h0, %b1}";
3505 }
3506 [(set_attr "addr" "gpr8")
3507 (set_attr "type" "imov")
3508 (set_attr "mode" "QI")])
3509
3510 (define_insn "*insvqi_1"
3511 [(set (zero_extract:SWI248
3512 (match_operand 0 "int248_register_operand" "+Q")
3513 (const_int 8)
3514 (const_int 8))
3515 (subreg:SWI248
3516 (match_operand:QI 1 "general_operand" "QnBn") 0))]
3517 ""
3518 "mov{b}\t{%1, %h0|%h0, %1}"
3519 [(set_attr "addr" "gpr8")
3520 (set_attr "type" "imov")
3521 (set_attr "mode" "QI")])
3522
3523 ;; Eliminate redundant insv, e.g. xorl %eax,%eax; movb $0, %ah
3524 (define_peephole2
3525 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
3526 (const_int 0))
3527 (clobber (reg:CC FLAGS_REG))])
3528 (set (zero_extract:SWI248 (match_operand 1 "int248_register_operand")
3529 (const_int 8)
3530 (const_int 8))
3531 (const_int 0))]
3532 "REGNO (operands[0]) == REGNO (operands[1])"
3533 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
3534 (const_int 0))
3535 (clobber (reg:CC FLAGS_REG))])])
3536
3537 ;; Combine movl followed by movb.
3538 (define_peephole2
3539 [(set (match_operand:SWI48 0 "general_reg_operand")
3540 (match_operand:SWI48 1 "const_int_operand"))
3541 (set (zero_extract:SWI248 (match_operand 2 "int248_register_operand")
3542 (const_int 8)
3543 (const_int 8))
3544 (match_operand:SWI248 3 "const_int_operand"))]
3545 "REGNO (operands[0]) == REGNO (operands[2])"
3546 [(set (match_operand:SWI48 0 "general_reg_operand")
3547 (match_dup 4))]
3548 {
3549 HOST_WIDE_INT tmp = INTVAL (operands[1]) & ~(HOST_WIDE_INT)0xff00;
3550 tmp |= (INTVAL (operands[3]) & 0xff) << 8;
3551 operands[4] = gen_int_mode (tmp, <SWI48:MODE>mode);
3552 })
3553
3554 (define_insn "*insvqi_2"
3555 [(set (zero_extract:SWI248
3556 (match_operand 0 "int248_register_operand" "+Q")
3557 (const_int 8)
3558 (const_int 8))
3559 (match_operator:SWI248 2 "extract_operator"
3560 [(match_operand 1 "int248_register_operand" "Q")
3561 (const_int 8)
3562 (const_int 8)]))]
3563 ""
3564 "mov{b}\t{%h1, %h0|%h0, %h1}"
3565 [(set_attr "type" "imov")
3566 (set_attr "mode" "QI")])
3567
3568 (define_insn "*insvqi_3"
3569 [(set (zero_extract:SWI248
3570 (match_operand 0 "int248_register_operand" "+Q")
3571 (const_int 8)
3572 (const_int 8))
3573 (any_shiftrt:SWI248
3574 (match_operand:SWI248 1 "register_operand" "Q")
3575 (const_int 8)))]
3576 ""
3577 "mov{b}\t{%h1, %h0|%h0, %h1}"
3578 [(set_attr "type" "imov")
3579 (set_attr "mode" "QI")])
3580
3581 (define_code_iterator any_or_plus [plus ior xor])
3582
3583 (define_insn_and_split "*insvti_highpart_1"
3584 [(set (match_operand:TI 0 "nonimmediate_operand" "=ro,r,r,&r")
3585 (any_or_plus:TI
3586 (and:TI
3587 (match_operand:TI 1 "nonimmediate_operand" "r,m,r,m")
3588 (match_operand:TI 3 "const_scalar_int_operand" "n,n,n,n"))
3589 (ashift:TI
3590 (zero_extend:TI
3591 (match_operand:DI 2 "nonimmediate_operand" "r,r,m,m"))
3592 (const_int 64))))]
3593 "TARGET_64BIT
3594 && CONST_WIDE_INT_P (operands[3])
3595 && CONST_WIDE_INT_NUNITS (operands[3]) == 2
3596 && CONST_WIDE_INT_ELT (operands[3], 0) == -1
3597 && CONST_WIDE_INT_ELT (operands[3], 1) == 0"
3598 "#"
3599 "&& reload_completed"
3600 [(const_int 0)]
3601 {
3602 operands[4] = gen_lowpart (DImode, operands[1]);
3603 split_double_concat (TImode, operands[0], operands[4], operands[2]);
3604 DONE;
3605 })
3606
3607 (define_insn_and_split "*insvti_lowpart_1"
3608 [(set (match_operand:TI 0 "nonimmediate_operand" "=ro,r,r,&r")
3609 (any_or_plus:TI
3610 (and:TI
3611 (match_operand:TI 1 "nonimmediate_operand" "r,m,r,m")
3612 (match_operand:TI 3 "const_scalar_int_operand" "n,n,n,n"))
3613 (zero_extend:TI
3614 (match_operand:DI 2 "nonimmediate_operand" "r,r,m,m"))))]
3615 "TARGET_64BIT
3616 && CONST_WIDE_INT_P (operands[3])
3617 && CONST_WIDE_INT_NUNITS (operands[3]) == 2
3618 && CONST_WIDE_INT_ELT (operands[3], 0) == 0
3619 && CONST_WIDE_INT_ELT (operands[3], 1) == -1"
3620 "#"
3621 "&& reload_completed"
3622 [(const_int 0)]
3623 {
3624 operands[4] = gen_highpart (DImode, operands[1]);
3625 split_double_concat (TImode, operands[0], operands[2], operands[4]);
3626 DONE;
3627 })
3628
3629 (define_insn_and_split "*insvdi_lowpart_1"
3630 [(set (match_operand:DI 0 "nonimmediate_operand" "=ro,r,r,&r")
3631 (any_or_plus:DI
3632 (and:DI
3633 (match_operand:DI 1 "nonimmediate_operand" "r,m,r,m")
3634 (match_operand:DI 3 "const_int_operand" "n,n,n,n"))
3635 (zero_extend:DI
3636 (match_operand:SI 2 "nonimmediate_operand" "r,r,m,m"))))]
3637 "!TARGET_64BIT
3638 && CONST_INT_P (operands[3])
3639 && UINTVAL (operands[3]) == 0xffffffff00000000ll"
3640 "#"
3641 "&& reload_completed"
3642 [(const_int 0)]
3643 {
3644 operands[4] = gen_highpart (SImode, operands[1]);
3645 split_double_concat (DImode, operands[0], operands[2], operands[4]);
3646 DONE;
3647 })
3648 \f
3649 ;; Floating point push instructions.
3650
3651 (define_insn "*pushtf"
3652 [(set (match_operand:TF 0 "push_operand" "=<,<")
3653 (match_operand:TF 1 "general_no_elim_operand" "v,*roC"))]
3654 "TARGET_64BIT || TARGET_SSE"
3655 {
3656 /* This insn should be already split before reg-stack. */
3657 return "#";
3658 }
3659 [(set_attr "isa" "*,x64")
3660 (set_attr "type" "multi")
3661 (set_attr "unit" "sse,*")
3662 (set_attr "mode" "TF,DI")])
3663
3664 ;; %%% Kill this when call knows how to work this out.
3665 (define_split
3666 [(set (match_operand:TF 0 "push_operand")
3667 (match_operand:TF 1 "sse_reg_operand"))]
3668 "TARGET_SSE && reload_completed"
3669 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
3670 (set (match_dup 0) (match_dup 1))]
3671 {
3672 /* Preserve memory attributes. */
3673 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3674 })
3675
3676 (define_insn "*pushxf"
3677 [(set (match_operand:XF 0 "push_operand" "=<,<,<,<,<")
3678 (match_operand:XF 1 "general_no_elim_operand" "f,r,*r,oF,oC"))]
3679 ""
3680 {
3681 /* This insn should be already split before reg-stack. */
3682 return "#";
3683 }
3684 [(set_attr "isa" "*,*,*,nox64,x64")
3685 (set_attr "type" "multi")
3686 (set_attr "unit" "i387,*,*,*,*")
3687 (set (attr "mode")
3688 (cond [(eq_attr "alternative" "1,2,3,4")
3689 (if_then_else (match_test "TARGET_64BIT")
3690 (const_string "DI")
3691 (const_string "SI"))
3692 ]
3693 (const_string "XF")))
3694 (set (attr "preferred_for_size")
3695 (cond [(eq_attr "alternative" "1")
3696 (symbol_ref "false")]
3697 (symbol_ref "true")))])
3698
3699 ;; %%% Kill this when call knows how to work this out.
3700 (define_split
3701 [(set (match_operand:XF 0 "push_operand")
3702 (match_operand:XF 1 "fp_register_operand"))]
3703 "reload_completed"
3704 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
3705 (set (match_dup 0) (match_dup 1))]
3706 {
3707 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (XFmode)));
3708 /* Preserve memory attributes. */
3709 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3710 })
3711
3712 (define_insn "*pushdf"
3713 [(set (match_operand:DF 0 "push_operand" "=<,<,<,<,<,<")
3714 (match_operand:DF 1 "general_no_elim_operand" "f,r,*r,oF,rmC,v"))]
3715 ""
3716 {
3717 /* This insn should be already split before reg-stack. */
3718 return "#";
3719 }
3720 [(set_attr "isa" "*,nox64,nox64,nox64,x64,sse2")
3721 (set_attr "type" "multi")
3722 (set_attr "unit" "i387,*,*,*,*,sse")
3723 (set_attr "mode" "DF,SI,SI,SI,DI,DF")
3724 (set (attr "preferred_for_size")
3725 (cond [(eq_attr "alternative" "1")
3726 (symbol_ref "false")]
3727 (symbol_ref "true")))
3728 (set (attr "preferred_for_speed")
3729 (cond [(eq_attr "alternative" "1")
3730 (symbol_ref "TARGET_INTEGER_DFMODE_MOVES")]
3731 (symbol_ref "true")))])
3732
3733 ;; %%% Kill this when call knows how to work this out.
3734 (define_split
3735 [(set (match_operand:DF 0 "push_operand")
3736 (match_operand:DF 1 "any_fp_register_operand"))]
3737 "reload_completed"
3738 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
3739 (set (match_dup 0) (match_dup 1))]
3740 {
3741 /* Preserve memory attributes. */
3742 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3743 })
3744
3745 (define_mode_iterator HFBF [HF BF])
3746
3747 (define_insn "*push<mode>_rex64"
3748 [(set (match_operand:HFBF 0 "push_operand" "=X,X")
3749 (match_operand:HFBF 1 "nonmemory_no_elim_operand" "r,x"))]
3750 "TARGET_64BIT"
3751 {
3752 /* Anything else should be already split before reg-stack. */
3753 gcc_assert (which_alternative == 0);
3754 return "push{q}\t%q1";
3755 }
3756 [(set_attr "isa" "*,sse4")
3757 (set_attr "type" "push,multi")
3758 (set_attr "mode" "DI,TI")])
3759
3760 (define_insn "*push<mode>"
3761 [(set (match_operand:HFBF 0 "push_operand" "=X,X")
3762 (match_operand:HFBF 1 "general_no_elim_operand" "rmF,x"))]
3763 "!TARGET_64BIT"
3764 {
3765 /* Anything else should be already split before reg-stack. */
3766 gcc_assert (which_alternative == 0);
3767 return "push{l}\t%k1";
3768 }
3769 [(set_attr "isa" "*,sse4")
3770 (set_attr "type" "push,multi")
3771 (set_attr "mode" "SI,TI")])
3772
3773 (define_insn "push2_di"
3774 [(set (match_operand:TI 0 "push_operand" "=<")
3775 (unspec:TI [(match_operand:DI 1 "register_operand" "r")
3776 (match_operand:DI 2 "register_operand" "r")]
3777 UNSPEC_APXPUSH2))]
3778 "TARGET_APX_PUSH2POP2"
3779 "push2\t{%2, %1|%1, %2}"
3780 [(set_attr "mode" "TI")
3781 (set_attr "type" "multi")
3782 (set_attr "prefix" "evex")])
3783
3784 (define_insn "pop2_di"
3785 [(parallel [(set (match_operand:DI 0 "register_operand" "=r")
3786 (unspec:DI [(match_operand:TI 1 "pop_operand" ">")]
3787 UNSPEC_APXPOP2_LOW))
3788 (set (match_operand:DI 2 "register_operand" "=r")
3789 (unspec:DI [(const_int 0)] UNSPEC_APXPOP2_HIGH))])]
3790 "TARGET_APX_PUSH2POP2"
3791 "pop2\t{%2, %0|%0, %2}"
3792 [(set_attr "mode" "TI")
3793 (set_attr "prefix" "evex")])
3794
3795 (define_insn "pushp_di"
3796 [(set (match_operand:DI 0 "push_operand" "=<")
3797 (match_operand:DI 1 "register_operand" "r"))
3798 (unspec:DI [(const_int 0)] UNSPEC_APX_PPX)]
3799 "TARGET_64BIT"
3800 "pushp\t%1"
3801 [(set_attr "mode" "DI")])
3802
3803 (define_insn "popp_di"
3804 [(set (match_operand:DI 0 "register_operand" "=r")
3805 (match_operand:DI 1 "pop_operand" ">"))
3806 (unspec:DI [(const_int 0)] UNSPEC_APX_PPX)]
3807 "TARGET_APX_PPX"
3808 "popp\t%0"
3809 [(set_attr "mode" "DI")])
3810
3811 (define_insn "push2p_di"
3812 [(set (match_operand:TI 0 "push_operand" "=<")
3813 (unspec:TI [(match_operand:DI 1 "register_operand" "r")
3814 (match_operand:DI 2 "register_operand" "r")]
3815 UNSPEC_APXPUSH2))
3816 (unspec:DI [(const_int 0)] UNSPEC_APX_PPX)]
3817 "TARGET_APX_PUSH2POP2 && TARGET_APX_PPX"
3818 "push2p\t{%2, %1|%1, %2}"
3819 [(set_attr "mode" "TI")
3820 (set_attr "type" "multi")
3821 (set_attr "prefix" "evex")])
3822
3823 (define_insn "pop2p_di"
3824 [(parallel [(set (match_operand:DI 0 "register_operand" "=r")
3825 (unspec:DI [(match_operand:TI 1 "pop_operand" ">")]
3826 UNSPEC_APXPOP2_LOW))
3827 (set (match_operand:DI 2 "register_operand" "=r")
3828 (unspec:DI [(const_int 0)] UNSPEC_APXPOP2_HIGH))
3829 (unspec:DI [(const_int 0)] UNSPEC_APX_PPX)])]
3830 "TARGET_APX_PUSH2POP2 && TARGET_APX_PPX"
3831 "pop2p\t{%2, %0|%0, %2}"
3832 [(set_attr "mode" "TI")
3833 (set_attr "prefix" "evex")])
3834
3835 (define_insn "*pushsf_rex64"
3836 [(set (match_operand:SF 0 "push_operand" "=X,X,X")
3837 (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,v"))]
3838 "TARGET_64BIT"
3839 {
3840 /* Anything else should be already split before reg-stack. */
3841 if (which_alternative != 1)
3842 return "#";
3843 return "push{q}\t%q1";
3844 }
3845 [(set_attr "type" "multi,push,multi")
3846 (set_attr "unit" "i387,*,*")
3847 (set_attr "mode" "SF,DI,SF")])
3848
3849 (define_insn "*pushsf"
3850 [(set (match_operand:SF 0 "push_operand" "=<,<,<")
3851 (match_operand:SF 1 "general_no_elim_operand" "f,rmF,v"))]
3852 "!TARGET_64BIT"
3853 {
3854 /* Anything else should be already split before reg-stack. */
3855 if (which_alternative != 1)
3856 return "#";
3857 return "push{l}\t%1";
3858 }
3859 [(set_attr "type" "multi,push,multi")
3860 (set_attr "unit" "i387,*,*")
3861 (set_attr "mode" "SF,SI,SF")])
3862
3863 (define_mode_iterator MODESH [SF HF BF])
3864 ;; %%% Kill this when call knows how to work this out.
3865 (define_split
3866 [(set (match_operand:MODESH 0 "push_operand")
3867 (match_operand:MODESH 1 "any_fp_register_operand"))]
3868 "reload_completed"
3869 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
3870 (set (match_dup 0) (match_dup 1))]
3871 {
3872 rtx op = XEXP (operands[0], 0);
3873 if (GET_CODE (op) == PRE_DEC)
3874 {
3875 gcc_assert (!TARGET_64BIT);
3876 op = GEN_INT (-4);
3877 }
3878 else
3879 {
3880 op = XEXP (XEXP (op, 1), 1);
3881 gcc_assert (CONST_INT_P (op));
3882 }
3883 operands[2] = op;
3884 /* Preserve memory attributes. */
3885 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3886 })
3887
3888 (define_split
3889 [(set (match_operand:SF 0 "push_operand")
3890 (match_operand:SF 1 "memory_operand"))]
3891 "reload_completed
3892 && find_constant_src (insn)"
3893 [(set (match_dup 0) (match_dup 2))]
3894 "operands[2] = find_constant_src (curr_insn);")
3895
3896 (define_split
3897 [(set (match_operand 0 "push_operand")
3898 (match_operand 1 "general_gr_operand"))]
3899 "reload_completed
3900 && (GET_MODE (operands[0]) == TFmode
3901 || GET_MODE (operands[0]) == XFmode
3902 || GET_MODE (operands[0]) == DFmode)"
3903 [(const_int 0)]
3904 "ix86_split_long_move (operands); DONE;")
3905 \f
3906 ;; Floating point move instructions.
3907
3908 (define_expand "movtf"
3909 [(set (match_operand:TF 0 "nonimmediate_operand")
3910 (match_operand:TF 1 "nonimmediate_operand"))]
3911 "TARGET_64BIT || TARGET_SSE"
3912 "ix86_expand_move (TFmode, operands); DONE;")
3913
3914 (define_expand "mov<mode>"
3915 [(set (match_operand:X87MODEFH 0 "nonimmediate_operand")
3916 (match_operand:X87MODEFH 1 "general_operand"))]
3917 ""
3918 "ix86_expand_move (<MODE>mode, operands); DONE;")
3919
3920 (define_insn "*movtf_internal"
3921 [(set (match_operand:TF 0 "nonimmediate_operand" "=v,v ,m,?*r ,!o")
3922 (match_operand:TF 1 "general_operand" "C ,vm,v,*roF,*rC"))]
3923 "(TARGET_64BIT || TARGET_SSE)
3924 && !(MEM_P (operands[0]) && MEM_P (operands[1]))
3925 && (lra_in_progress || reload_completed
3926 || !CONST_DOUBLE_P (operands[1])
3927 || (standard_sse_constant_p (operands[1], TFmode) == 1
3928 && !memory_operand (operands[0], TFmode))
3929 || (!TARGET_MEMORY_MISMATCH_STALL
3930 && memory_operand (operands[0], TFmode)))"
3931 {
3932 switch (get_attr_type (insn))
3933 {
3934 case TYPE_SSELOG1:
3935 return standard_sse_constant_opcode (insn, operands);
3936
3937 case TYPE_SSEMOV:
3938 return ix86_output_ssemov (insn, operands);
3939
3940 case TYPE_MULTI:
3941 return "#";
3942
3943 default:
3944 gcc_unreachable ();
3945 }
3946 }
3947 [(set_attr "isa" "*,*,*,x64,x64")
3948 (set_attr "type" "sselog1,ssemov,ssemov,multi,multi")
3949 (set (attr "prefix")
3950 (if_then_else (eq_attr "type" "sselog1,ssemov")
3951 (const_string "maybe_vex")
3952 (const_string "orig")))
3953 (set (attr "mode")
3954 (cond [(eq_attr "alternative" "3,4")
3955 (const_string "DI")
3956 (match_test "TARGET_AVX")
3957 (const_string "TI")
3958 (ior (not (match_test "TARGET_SSE2"))
3959 (match_test "optimize_function_for_size_p (cfun)"))
3960 (const_string "V4SF")
3961 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3962 (const_string "V4SF")
3963 (and (eq_attr "alternative" "2")
3964 (match_test "TARGET_SSE_TYPELESS_STORES"))
3965 (const_string "V4SF")
3966 ]
3967 (const_string "TI")))])
3968
3969 (define_split
3970 [(set (match_operand:TF 0 "nonimmediate_gr_operand")
3971 (match_operand:TF 1 "general_gr_operand"))]
3972 "reload_completed"
3973 [(const_int 0)]
3974 "ix86_split_long_move (operands); DONE;")
3975
3976 ;; Possible store forwarding (partial memory) stall
3977 ;; in alternatives 4, 6, 7 and 8.
3978 (define_insn "*movxf_internal"
3979 [(set (match_operand:XF 0 "nonimmediate_operand"
3980 "=f,m,f,?r ,!o,?*r ,!o,!o,!o,r ,o ,o")
3981 (match_operand:XF 1 "general_operand"
3982 "fm,f,G,roF,r ,*roF,*r,F ,C ,roF,rF,rC"))]
3983 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
3984 && (lra_in_progress || reload_completed
3985 || !CONST_DOUBLE_P (operands[1])
3986 || ((optimize_function_for_size_p (cfun)
3987 || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
3988 && standard_80387_constant_p (operands[1]) > 0
3989 && !memory_operand (operands[0], XFmode))
3990 || (!TARGET_MEMORY_MISMATCH_STALL
3991 && memory_operand (operands[0], XFmode))
3992 || !TARGET_HARD_XF_REGS)"
3993 {
3994 switch (get_attr_type (insn))
3995 {
3996 case TYPE_FMOV:
3997 if (which_alternative == 2)
3998 return standard_80387_constant_opcode (operands[1]);
3999 return output_387_reg_move (insn, operands);
4000
4001 case TYPE_MULTI:
4002 return "#";
4003
4004 default:
4005 gcc_unreachable ();
4006 }
4007 }
4008 [(set (attr "isa")
4009 (cond [(eq_attr "alternative" "7,10")
4010 (const_string "nox64")
4011 (eq_attr "alternative" "8,11")
4012 (const_string "x64")
4013 ]
4014 (const_string "*")))
4015 (set (attr "type")
4016 (cond [(eq_attr "alternative" "3,4,5,6,7,8,9,10,11")
4017 (const_string "multi")
4018 ]
4019 (const_string "fmov")))
4020 (set (attr "mode")
4021 (cond [(eq_attr "alternative" "3,4,5,6,7,8,9,10,11")
4022 (if_then_else (match_test "TARGET_64BIT")
4023 (const_string "DI")
4024 (const_string "SI"))
4025 ]
4026 (const_string "XF")))
4027 (set (attr "preferred_for_size")
4028 (cond [(eq_attr "alternative" "3,4")
4029 (symbol_ref "false")]
4030 (symbol_ref "true")))
4031 (set (attr "enabled")
4032 (cond [(eq_attr "alternative" "9,10,11")
4033 (if_then_else
4034 (match_test "TARGET_HARD_XF_REGS")
4035 (symbol_ref "false")
4036 (const_string "*"))
4037 (not (match_test "TARGET_HARD_XF_REGS"))
4038 (symbol_ref "false")
4039 ]
4040 (const_string "*")))])
4041
4042 (define_split
4043 [(set (match_operand:XF 0 "nonimmediate_gr_operand")
4044 (match_operand:XF 1 "general_gr_operand"))]
4045 "reload_completed"
4046 [(const_int 0)]
4047 "ix86_split_long_move (operands); DONE;")
4048
4049 ;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7.
4050 (define_insn "*movdf_internal"
4051 [(set (match_operand:DF 0 "nonimmediate_operand"
4052 "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,Yv,v,v,m,*x,*x,*x,m ,?r,?v,r ,o ,r ,m")
4053 (match_operand:DF 1 "general_operand"
4054 "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C ,v,m,v,C ,*x,m ,*x, v, r,roF,rF,rmF,rC"))]
4055 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
4056 && (lra_in_progress || reload_completed
4057 || !CONST_DOUBLE_P (operands[1])
4058 || ((optimize_function_for_size_p (cfun)
4059 || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
4060 && IS_STACK_MODE (DFmode)
4061 && standard_80387_constant_p (operands[1]) > 0
4062 && !memory_operand (operands[0], DFmode))
4063 || (TARGET_SSE2 && TARGET_SSE_MATH
4064 && standard_sse_constant_p (operands[1], DFmode) == 1
4065 && !memory_operand (operands[0], DFmode))
4066 || ((TARGET_64BIT || !TARGET_MEMORY_MISMATCH_STALL)
4067 && memory_operand (operands[0], DFmode))
4068 || !TARGET_HARD_DF_REGS)"
4069 {
4070 switch (get_attr_type (insn))
4071 {
4072 case TYPE_FMOV:
4073 if (which_alternative == 2)
4074 return standard_80387_constant_opcode (operands[1]);
4075 return output_387_reg_move (insn, operands);
4076
4077 case TYPE_MULTI:
4078 return "#";
4079
4080 case TYPE_IMOV:
4081 if (get_attr_mode (insn) == MODE_SI)
4082 return "mov{l}\t{%1, %k0|%k0, %1}";
4083 else if (which_alternative == 11)
4084 return "movabs{q}\t{%1, %0|%0, %1}";
4085 else
4086 return "mov{q}\t{%1, %0|%0, %1}";
4087
4088 case TYPE_SSELOG1:
4089 return standard_sse_constant_opcode (insn, operands);
4090
4091 case TYPE_SSEMOV:
4092 return ix86_output_ssemov (insn, operands);
4093
4094 default:
4095 gcc_unreachable ();
4096 }
4097 }
4098 [(set (attr "isa")
4099 (cond [(eq_attr "alternative" "3,4,5,6,7,22,23")
4100 (const_string "nox64")
4101 (eq_attr "alternative" "8,9,10,11,24,25")
4102 (const_string "x64")
4103 (eq_attr "alternative" "12,13,14,15")
4104 (const_string "sse2")
4105 (eq_attr "alternative" "20,21")
4106 (const_string "x64_sse2")
4107 ]
4108 (const_string "*")))
4109 (set (attr "type")
4110 (cond [(eq_attr "alternative" "0,1,2")
4111 (const_string "fmov")
4112 (eq_attr "alternative" "3,4,5,6,7,22,23")
4113 (const_string "multi")
4114 (eq_attr "alternative" "8,9,10,11,24,25")
4115 (const_string "imov")
4116 (eq_attr "alternative" "12,16")
4117 (const_string "sselog1")
4118 ]
4119 (const_string "ssemov")))
4120 (set (attr "modrm")
4121 (if_then_else (eq_attr "alternative" "11")
4122 (const_string "0")
4123 (const_string "*")))
4124 (set (attr "length_immediate")
4125 (if_then_else (eq_attr "alternative" "11")
4126 (const_string "8")
4127 (const_string "*")))
4128 (set (attr "prefix")
4129 (if_then_else (eq_attr "type" "sselog1,ssemov")
4130 (const_string "maybe_vex")
4131 (const_string "orig")))
4132 (set (attr "prefix_data16")
4133 (if_then_else
4134 (ior (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
4135 (eq_attr "mode" "V1DF"))
4136 (const_string "1")
4137 (const_string "*")))
4138 (set (attr "mode")
4139 (cond [(eq_attr "alternative" "3,4,5,6,7,10,22,23")
4140 (const_string "SI")
4141 (eq_attr "alternative" "8,9,11,20,21,24,25")
4142 (const_string "DI")
4143
4144 /* xorps is one byte shorter for non-AVX targets. */
4145 (eq_attr "alternative" "12,16")
4146 (cond [(match_test "TARGET_AVX")
4147 (const_string "V2DF")
4148 (ior (not (match_test "TARGET_SSE2"))
4149 (match_test "optimize_function_for_size_p (cfun)"))
4150 (const_string "V4SF")
4151 (match_test "TARGET_SSE_LOAD0_BY_PXOR")
4152 (const_string "TI")
4153 ]
4154 (const_string "V2DF"))
4155
4156 /* For architectures resolving dependencies on
4157 whole SSE registers use movapd to break dependency
4158 chains, otherwise use short move to avoid extra work. */
4159
4160 /* movaps is one byte shorter for non-AVX targets. */
4161 (eq_attr "alternative" "13,17")
4162 (cond [(match_test "TARGET_AVX512VL")
4163 (const_string "V2DF")
4164 (match_test "TARGET_AVX512F")
4165 (const_string "DF")
4166 (match_test "TARGET_AVX")
4167 (const_string "V2DF")
4168 (ior (not (match_test "TARGET_SSE2"))
4169 (match_test "optimize_function_for_size_p (cfun)"))
4170 (const_string "V4SF")
4171 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4172 (const_string "V4SF")
4173 (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
4174 (const_string "V2DF")
4175 ]
4176 (const_string "DF"))
4177
4178 /* For architectures resolving dependencies on register
4179 parts we may avoid extra work to zero out upper part
4180 of register. */
4181 (eq_attr "alternative" "14,18")
4182 (cond [(not (match_test "TARGET_SSE2"))
4183 (const_string "V2SF")
4184 (match_test "TARGET_AVX")
4185 (const_string "DF")
4186 (match_test "TARGET_SSE_SPLIT_REGS")
4187 (const_string "V1DF")
4188 ]
4189 (const_string "DF"))
4190
4191 (and (eq_attr "alternative" "15,19")
4192 (not (match_test "TARGET_SSE2")))
4193 (const_string "V2SF")
4194 ]
4195 (const_string "DF")))
4196 (set (attr "preferred_for_size")
4197 (cond [(eq_attr "alternative" "3,4")
4198 (symbol_ref "false")]
4199 (symbol_ref "true")))
4200 (set (attr "preferred_for_speed")
4201 (cond [(eq_attr "alternative" "3,4")
4202 (symbol_ref "TARGET_INTEGER_DFMODE_MOVES")
4203 (eq_attr "alternative" "20")
4204 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
4205 (eq_attr "alternative" "21")
4206 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
4207 ]
4208 (symbol_ref "true")))
4209 (set (attr "enabled")
4210 (cond [(eq_attr "alternative" "22,23,24,25")
4211 (if_then_else
4212 (match_test "TARGET_HARD_DF_REGS")
4213 (symbol_ref "false")
4214 (const_string "*"))
4215 (not (match_test "TARGET_HARD_DF_REGS"))
4216 (symbol_ref "false")
4217 ]
4218 (const_string "*")))])
4219
4220 (define_split
4221 [(set (match_operand:DF 0 "nonimmediate_gr_operand")
4222 (match_operand:DF 1 "general_gr_operand"))]
4223 "!TARGET_64BIT && reload_completed"
4224 [(const_int 0)]
4225 "ix86_split_long_move (operands); DONE;")
4226
4227 (define_insn "*movsf_internal"
4228 [(set (match_operand:SF 0 "nonimmediate_operand"
4229 "=Yf*f,m ,Yf*f,?r ,?m,Yv,v,v,m,?r,?v,!*y,!*y,!m,!r,!*y,r ,m")
4230 (match_operand:SF 1 "general_operand"
4231 "Yf*fm,Yf*f,G ,rmF,rF,C ,v,m,v,v ,r ,*y ,m ,*y,*y,r ,rmF,rF"))]
4232 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
4233 && (lra_in_progress || reload_completed
4234 || !CONST_DOUBLE_P (operands[1])
4235 || ((optimize_function_for_size_p (cfun)
4236 || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
4237 && IS_STACK_MODE (SFmode)
4238 && standard_80387_constant_p (operands[1]) > 0)
4239 || (TARGET_SSE && TARGET_SSE_MATH
4240 && standard_sse_constant_p (operands[1], SFmode) == 1)
4241 || memory_operand (operands[0], SFmode)
4242 || !TARGET_HARD_SF_REGS)"
4243 {
4244 switch (get_attr_type (insn))
4245 {
4246 case TYPE_FMOV:
4247 if (which_alternative == 2)
4248 return standard_80387_constant_opcode (operands[1]);
4249 return output_387_reg_move (insn, operands);
4250
4251 case TYPE_IMOV:
4252 return "mov{l}\t{%1, %0|%0, %1}";
4253
4254 case TYPE_SSELOG1:
4255 return standard_sse_constant_opcode (insn, operands);
4256
4257 case TYPE_SSEMOV:
4258 return ix86_output_ssemov (insn, operands);
4259
4260 case TYPE_MMXMOV:
4261 switch (get_attr_mode (insn))
4262 {
4263 case MODE_DI:
4264 return "movq\t{%1, %0|%0, %1}";
4265 case MODE_SI:
4266 return "movd\t{%1, %0|%0, %1}";
4267
4268 default:
4269 gcc_unreachable ();
4270 }
4271
4272 default:
4273 gcc_unreachable ();
4274 }
4275 }
4276 [(set (attr "isa")
4277 (cond [(eq_attr "alternative" "9,10")
4278 (const_string "sse2")
4279 ]
4280 (const_string "*")))
4281 (set (attr "type")
4282 (cond [(eq_attr "alternative" "0,1,2")
4283 (const_string "fmov")
4284 (eq_attr "alternative" "3,4,16,17")
4285 (const_string "imov")
4286 (eq_attr "alternative" "5")
4287 (const_string "sselog1")
4288 (eq_attr "alternative" "11,12,13,14,15")
4289 (const_string "mmxmov")
4290 ]
4291 (const_string "ssemov")))
4292 (set (attr "prefix")
4293 (if_then_else (eq_attr "type" "sselog1,ssemov")
4294 (const_string "maybe_vex")
4295 (const_string "orig")))
4296 (set (attr "prefix_data16")
4297 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
4298 (const_string "1")
4299 (const_string "*")))
4300 (set (attr "mode")
4301 (cond [(eq_attr "alternative" "3,4,9,10,12,13,14,15,16,17")
4302 (const_string "SI")
4303 (eq_attr "alternative" "11")
4304 (const_string "DI")
4305 (eq_attr "alternative" "5")
4306 (cond [(and (match_test "TARGET_AVX512F && TARGET_EVEX512")
4307 (not (match_test "TARGET_PREFER_AVX256")))
4308 (const_string "V16SF")
4309 (match_test "TARGET_AVX")
4310 (const_string "V4SF")
4311 (ior (not (match_test "TARGET_SSE2"))
4312 (match_test "optimize_function_for_size_p (cfun)"))
4313 (const_string "V4SF")
4314 (match_test "TARGET_SSE_LOAD0_BY_PXOR")
4315 (const_string "TI")
4316 ]
4317 (const_string "V4SF"))
4318
4319 /* For architectures resolving dependencies on
4320 whole SSE registers use APS move to break dependency
4321 chains, otherwise use short move to avoid extra work.
4322
4323 Do the same for architectures resolving dependencies on
4324 the parts. While in DF mode it is better to always handle
4325 just register parts, the SF mode is different due to lack
4326 of instructions to load just part of the register. It is
4327 better to maintain the whole registers in single format
4328 to avoid problems on using packed logical operations. */
4329 (eq_attr "alternative" "6")
4330 (cond [(match_test "TARGET_AVX512VL")
4331 (const_string "V4SF")
4332 (match_test "TARGET_AVX512F")
4333 (const_string "SF")
4334 (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
4335 (match_test "TARGET_SSE_SPLIT_REGS"))
4336 (const_string "V4SF")
4337 ]
4338 (const_string "SF"))
4339 ]
4340 (const_string "SF")))
4341 (set (attr "preferred_for_speed")
4342 (cond [(eq_attr "alternative" "9,14")
4343 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
4344 (eq_attr "alternative" "10,15")
4345 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
4346 ]
4347 (symbol_ref "true")))
4348 (set (attr "enabled")
4349 (cond [(eq_attr "alternative" "16,17")
4350 (if_then_else
4351 (match_test "TARGET_HARD_SF_REGS")
4352 (symbol_ref "false")
4353 (const_string "*"))
4354 (not (match_test "TARGET_HARD_SF_REGS"))
4355 (symbol_ref "false")
4356 ]
4357 (const_string "*")))])
4358
4359 (define_mode_attr hfbfconstf
4360 [(HF "F") (BF "")])
4361
4362 (define_insn "*mov<mode>_internal"
4363 [(set (match_operand:HFBF 0 "nonimmediate_operand"
4364 "=?r,?r,?r,?m ,Yv,v,?r,jm,m,?v,v")
4365 (match_operand:HFBF 1 "general_operand"
4366 "r ,F ,m ,r<hfbfconstf>,C ,v, v,v ,v,r ,m"))]
4367 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
4368 && (lra_in_progress
4369 || reload_completed
4370 || !CONST_DOUBLE_P (operands[1])
4371 || (TARGET_SSE2
4372 && standard_sse_constant_p (operands[1], <MODE>mode) == 1)
4373 || memory_operand (operands[0], <MODE>mode))"
4374 {
4375 switch (get_attr_type (insn))
4376 {
4377 case TYPE_IMOVX:
4378 /* movzwl is faster than movw on p2 due to partial word stalls,
4379 though not as fast as an aligned movl. */
4380 return "movz{wl|x}\t{%1, %k0|%k0, %1}";
4381
4382 case TYPE_SSEMOV:
4383 return ix86_output_ssemov (insn, operands);
4384
4385 case TYPE_SSELOG1:
4386 if (satisfies_constraint_C (operands[1]))
4387 return standard_sse_constant_opcode (insn, operands);
4388
4389 if (SSE_REG_P (operands[0]))
4390 return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
4391 else
4392 return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
4393
4394 default:
4395 if (get_attr_mode (insn) == MODE_SI)
4396 return "mov{l}\t{%k1, %k0|%k0, %k1}";
4397 else
4398 return "mov{w}\t{%1, %0|%0, %1}";
4399 }
4400 }
4401 [(set (attr "isa")
4402 (cond [(eq_attr "alternative" "4,5,6,9,10")
4403 (const_string "sse2")
4404 (eq_attr "alternative" "7")
4405 (const_string "sse4_noavx")
4406 (eq_attr "alternative" "8")
4407 (const_string "avx")
4408 ]
4409 (const_string "*")))
4410 (set (attr "addr")
4411 (if_then_else (eq_attr "alternative" "7")
4412 (const_string "gpr16")
4413 (const_string "*")))
4414 (set (attr "type")
4415 (cond [(eq_attr "alternative" "4")
4416 (const_string "sselog1")
4417 (eq_attr "alternative" "5,6,9")
4418 (const_string "ssemov")
4419 (eq_attr "alternative" "7,8,10")
4420 (if_then_else
4421 (match_test ("TARGET_AVX512FP16"))
4422 (const_string "ssemov")
4423 (const_string "sselog1"))
4424 (match_test "optimize_function_for_size_p (cfun)")
4425 (const_string "imov")
4426 (and (eq_attr "alternative" "0")
4427 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
4428 (not (match_test "TARGET_HIMODE_MATH"))))
4429 (const_string "imov")
4430 (and (eq_attr "alternative" "1,2")
4431 (match_operand:HI 1 "aligned_operand"))
4432 (const_string "imov")
4433 (and (match_test "TARGET_MOVX")
4434 (eq_attr "alternative" "0,2"))
4435 (const_string "imovx")
4436 ]
4437 (const_string "imov")))
4438 (set (attr "prefix")
4439 (cond [(eq_attr "alternative" "4,5,6,7,8,9,10")
4440 (const_string "maybe_vex")
4441 ]
4442 (const_string "orig")))
4443 (set (attr "mode")
4444 (cond [(eq_attr "alternative" "4")
4445 (const_string "V4SF")
4446 (eq_attr "alternative" "6,9")
4447 (if_then_else
4448 (match_test "TARGET_AVX512FP16")
4449 (const_string "HI")
4450 (const_string "SI"))
4451 (eq_attr "alternative" "7,8,10")
4452 (if_then_else
4453 (match_test "TARGET_AVX512FP16")
4454 (const_string "HI")
4455 (const_string "TI"))
4456 (eq_attr "alternative" "5")
4457 (cond [(match_test "TARGET_AVX512VL")
4458 (const_string "V4SF")
4459 (match_test "TARGET_AVX512FP16")
4460 (const_string "HF")
4461 (match_test "TARGET_AVX512F")
4462 (const_string "SF")
4463 (match_test "TARGET_AVX")
4464 (const_string "V4SF")
4465 (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
4466 (match_test "TARGET_SSE_SPLIT_REGS"))
4467 (const_string "V4SF")
4468 ]
4469 (const_string "SF"))
4470 (eq_attr "type" "imovx")
4471 (const_string "SI")
4472 (and (eq_attr "alternative" "1,2")
4473 (match_operand:HI 1 "aligned_operand"))
4474 (const_string "SI")
4475 (and (eq_attr "alternative" "0")
4476 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
4477 (not (match_test "TARGET_HIMODE_MATH"))))
4478 (const_string "SI")
4479 ]
4480 (const_string "HI")))
4481 (set (attr "enabled")
4482 (cond [(and (match_test "<MODE>mode == BFmode")
4483 (eq_attr "alternative" "1"))
4484 (symbol_ref "false")
4485 ]
4486 (const_string "*")))])
4487
4488 (define_split
4489 [(set (match_operand 0 "any_fp_register_operand")
4490 (match_operand 1 "memory_operand"))]
4491 "reload_completed
4492 && (GET_MODE (operands[0]) == TFmode
4493 || GET_MODE (operands[0]) == XFmode
4494 || GET_MODE (operands[0]) == DFmode
4495 || GET_MODE (operands[0]) == SFmode)
4496 && ix86_standard_x87sse_constant_load_p (insn, operands[0])"
4497 [(set (match_dup 0) (match_dup 2))]
4498 "operands[2] = find_constant_src (curr_insn);")
4499
4500 (define_split
4501 [(set (match_operand 0 "any_fp_register_operand")
4502 (float_extend (match_operand 1 "memory_operand")))]
4503 "reload_completed
4504 && (GET_MODE (operands[0]) == TFmode
4505 || GET_MODE (operands[0]) == XFmode
4506 || GET_MODE (operands[0]) == DFmode)
4507 && ix86_standard_x87sse_constant_load_p (insn, operands[0])"
4508 [(set (match_dup 0) (match_dup 2))]
4509 "operands[2] = find_constant_src (curr_insn);")
4510
4511 ;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence
4512 (define_split
4513 [(set (match_operand:X87MODEF 0 "fp_register_operand")
4514 (match_operand:X87MODEF 1 "immediate_operand"))]
4515 "reload_completed
4516 && (standard_80387_constant_p (operands[1]) == 8
4517 || standard_80387_constant_p (operands[1]) == 9)"
4518 [(set (match_dup 0)(match_dup 1))
4519 (set (match_dup 0)
4520 (neg:X87MODEF (match_dup 0)))]
4521 {
4522 if (real_isnegzero (CONST_DOUBLE_REAL_VALUE (operands[1])))
4523 operands[1] = CONST0_RTX (<MODE>mode);
4524 else
4525 operands[1] = CONST1_RTX (<MODE>mode);
4526 })
4527
4528 (define_insn "*swapxf"
4529 [(set (match_operand:XF 0 "register_operand" "+f")
4530 (match_operand:XF 1 "register_operand" "+f"))
4531 (set (match_dup 1)
4532 (match_dup 0))]
4533 "TARGET_80387"
4534 {
4535 if (STACK_TOP_P (operands[0]))
4536 return "fxch\t%1";
4537 else
4538 return "fxch\t%0";
4539 }
4540 [(set_attr "type" "fxch")
4541 (set_attr "mode" "XF")])
4542 \f
4543
4544 ;; Zero extension instructions
4545
4546 (define_insn_and_split "zero_extendditi2"
4547 [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
4548 (zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "rm,r")))]
4549 "TARGET_64BIT"
4550 "#"
4551 "&& reload_completed"
4552 [(set (match_dup 3) (match_dup 1))
4553 (set (match_dup 4) (const_int 0))]
4554 "split_double_mode (TImode, &operands[0], 1, &operands[3], &operands[4]);")
4555
4556 (define_expand "zero_extendsidi2"
4557 [(set (match_operand:DI 0 "nonimmediate_operand")
4558 (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))])
4559
4560 (define_insn "*zero_extendsidi2"
4561 [(set (match_operand:DI 0 "nonimmediate_operand"
4562 "=r,?r,?o,r ,o,?*y,?!*y,$r,$v,$x,*x,*v,*r,*k")
4563 (zero_extend:DI
4564 (match_operand:SI 1 "x86_64_zext_operand"
4565 "0 ,rm,r ,rmWz,0,r ,m ,v ,r ,m ,*x,*v,*k,*km")))]
4566 ""
4567 {
4568 switch (get_attr_type (insn))
4569 {
4570 case TYPE_IMOVX:
4571 if (ix86_use_lea_for_mov (insn, operands))
4572 return "lea{l}\t{%E1, %k0|%k0, %E1}";
4573 else
4574 return "mov{l}\t{%1, %k0|%k0, %1}";
4575
4576 case TYPE_MULTI:
4577 return "#";
4578
4579 case TYPE_MMXMOV:
4580 return "movd\t{%1, %0|%0, %1}";
4581
4582 case TYPE_SSEMOV:
4583 if (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1]))
4584 {
4585 if (EXT_REX_SSE_REG_P (operands[0])
4586 || EXT_REX_SSE_REG_P (operands[1]))
4587 return "vpmovzxdq\t{%t1, %g0|%g0, %t1}";
4588 else
4589 return "%vpmovzxdq\t{%1, %0|%0, %1}";
4590 }
4591
4592 if (GENERAL_REG_P (operands[0]))
4593 return "%vmovd\t{%1, %k0|%k0, %1}";
4594
4595 return "%vmovd\t{%1, %0|%0, %1}";
4596
4597 case TYPE_MSKMOV:
4598 return "kmovd\t{%1, %k0|%k0, %1}";
4599
4600 default:
4601 gcc_unreachable ();
4602 }
4603 }
4604 [(set (attr "isa")
4605 (cond [(eq_attr "alternative" "0,1,2")
4606 (const_string "nox64")
4607 (eq_attr "alternative" "3")
4608 (const_string "x64")
4609 (eq_attr "alternative" "7,8,9")
4610 (const_string "sse2")
4611 (eq_attr "alternative" "10")
4612 (const_string "sse4")
4613 (eq_attr "alternative" "11")
4614 (const_string "avx512f")
4615 (eq_attr "alternative" "12")
4616 (const_string "x64_avx512bw")
4617 (eq_attr "alternative" "13")
4618 (const_string "avx512bw")
4619 ]
4620 (const_string "*")))
4621 (set (attr "mmx_isa")
4622 (if_then_else (eq_attr "alternative" "5,6")
4623 (const_string "native")
4624 (const_string "*")))
4625 (set (attr "type")
4626 (cond [(eq_attr "alternative" "0,1,2,4")
4627 (const_string "multi")
4628 (eq_attr "alternative" "5,6")
4629 (const_string "mmxmov")
4630 (eq_attr "alternative" "7")
4631 (if_then_else (match_test "TARGET_64BIT")
4632 (const_string "ssemov")
4633 (const_string "multi"))
4634 (eq_attr "alternative" "8,9,10,11")
4635 (const_string "ssemov")
4636 (eq_attr "alternative" "12,13")
4637 (const_string "mskmov")
4638 ]
4639 (const_string "imovx")))
4640 (set (attr "prefix_extra")
4641 (if_then_else (eq_attr "alternative" "10,11")
4642 (const_string "1")
4643 (const_string "*")))
4644 (set (attr "prefix")
4645 (if_then_else (eq_attr "type" "ssemov")
4646 (const_string "maybe_vex")
4647 (const_string "orig")))
4648 (set (attr "prefix_0f")
4649 (if_then_else (eq_attr "type" "imovx")
4650 (const_string "0")
4651 (const_string "*")))
4652 (set (attr "mode")
4653 (cond [(eq_attr "alternative" "5,6")
4654 (const_string "DI")
4655 (and (eq_attr "alternative" "7")
4656 (match_test "TARGET_64BIT"))
4657 (const_string "TI")
4658 (eq_attr "alternative" "8,10,11")
4659 (const_string "TI")
4660 ]
4661 (const_string "SI")))
4662 (set (attr "preferred_for_speed")
4663 (cond [(eq_attr "alternative" "7")
4664 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
4665 (eq_attr "alternative" "5,8")
4666 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
4667 ]
4668 (symbol_ref "true")))])
4669
4670 (define_split
4671 [(set (match_operand:DI 0 "memory_operand")
4672 (zero_extend:DI (match_operand:SI 1 "memory_operand")))]
4673 "reload_completed"
4674 [(set (match_dup 4) (const_int 0))]
4675 "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
4676
4677 (define_split
4678 [(set (match_operand:DI 0 "general_reg_operand")
4679 (zero_extend:DI (match_operand:SI 1 "general_reg_operand")))]
4680 "!TARGET_64BIT && reload_completed
4681 && REGNO (operands[0]) == REGNO (operands[1])"
4682 [(set (match_dup 4) (const_int 0))]
4683 "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
4684
4685 (define_split
4686 [(set (match_operand:DI 0 "nonimmediate_gr_operand")
4687 (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
4688 "!TARGET_64BIT && reload_completed
4689 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4690 [(set (match_dup 3) (match_dup 1))
4691 (set (match_dup 4) (const_int 0))]
4692 "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
4693
4694 (define_mode_attr kmov_isa
4695 [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
4696
4697 (define_insn "zero_extend<mode>di2"
4698 [(set (match_operand:DI 0 "register_operand" "=r,*r,*k")
4699 (zero_extend:DI
4700 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
4701 "TARGET_64BIT"
4702 "@
4703 movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}
4704 kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}
4705 kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}"
4706 [(set_attr "isa" "*,<kmov_isa>,<kmov_isa>")
4707 (set_attr "type" "imovx,mskmov,mskmov")
4708 (set_attr "mode" "SI,<MODE>,<MODE>")])
4709
4710 (define_expand "zero_extend<mode>si2"
4711 [(set (match_operand:SI 0 "register_operand")
4712 (zero_extend:SI (match_operand:SWI12 1 "nonimmediate_operand")))]
4713 ""
4714 {
4715 if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
4716 {
4717 operands[1] = force_reg (<MODE>mode, operands[1]);
4718 emit_insn (gen_zero_extend<mode>si2_and (operands[0], operands[1]));
4719 DONE;
4720 }
4721 })
4722
4723 (define_insn_and_split "zero_extend<mode>si2_and"
4724 [(set (match_operand:SI 0 "register_operand" "=r,?&<r>")
4725 (zero_extend:SI
4726 (match_operand:SWI12 1 "nonimmediate_operand" "0,<r>m")))
4727 (clobber (reg:CC FLAGS_REG))]
4728 "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
4729 "#"
4730 "&& reload_completed"
4731 [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
4732 (clobber (reg:CC FLAGS_REG))])]
4733 {
4734 if (!REG_P (operands[1])
4735 || REGNO (operands[0]) != REGNO (operands[1]))
4736 {
4737 ix86_expand_clear (operands[0]);
4738
4739 gcc_assert (!TARGET_PARTIAL_REG_STALL);
4740 emit_insn (gen_rtx_SET
4741 (gen_rtx_STRICT_LOW_PART
4742 (VOIDmode, gen_lowpart (<MODE>mode, operands[0])),
4743 operands[1]));
4744 DONE;
4745 }
4746
4747 operands[2] = GEN_INT (GET_MODE_MASK (<MODE>mode));
4748 }
4749 [(set_attr "type" "alu1")
4750 (set_attr "mode" "SI")])
4751
4752 (define_insn "*zero_extend<mode>si2"
4753 [(set (match_operand:SI 0 "register_operand" "=r,*r,*k")
4754 (zero_extend:SI
4755 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
4756 "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
4757 "@
4758 movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}
4759 kmov<mskmodesuffix>\t{%1, %0|%0, %1}
4760 kmov<mskmodesuffix>\t{%1, %0|%0, %1}"
4761 [(set_attr "isa" "*,<kmov_isa>,<kmov_isa>")
4762 (set_attr "type" "imovx,mskmov,mskmov")
4763 (set_attr "mode" "SI,<MODE>,<MODE>")])
4764
4765 (define_expand "zero_extendqihi2"
4766 [(set (match_operand:HI 0 "register_operand")
4767 (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand")))]
4768 ""
4769 {
4770 if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
4771 {
4772 operands[1] = force_reg (QImode, operands[1]);
4773 emit_insn (gen_zero_extendqihi2_and (operands[0], operands[1]));
4774 DONE;
4775 }
4776 })
4777
4778 (define_insn_and_split "zero_extendqihi2_and"
4779 [(set (match_operand:HI 0 "register_operand" "=r,?&q")
4780 (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
4781 (clobber (reg:CC FLAGS_REG))]
4782 "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
4783 "#"
4784 "&& reload_completed"
4785 [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255)))
4786 (clobber (reg:CC FLAGS_REG))])]
4787 {
4788 if (!REG_P (operands[1])
4789 || REGNO (operands[0]) != REGNO (operands[1]))
4790 {
4791 ix86_expand_clear (operands[0]);
4792
4793 gcc_assert (!TARGET_PARTIAL_REG_STALL);
4794 emit_insn (gen_rtx_SET
4795 (gen_rtx_STRICT_LOW_PART
4796 (VOIDmode, gen_lowpart (QImode, operands[0])),
4797 operands[1]));
4798 DONE;
4799 }
4800
4801 operands[0] = gen_lowpart (SImode, operands[0]);
4802 }
4803 [(set_attr "type" "alu1")
4804 (set_attr "mode" "SI")])
4805
4806 ; zero extend to SImode to avoid partial register stalls
4807 (define_insn "*zero_extendqihi2"
4808 [(set (match_operand:HI 0 "register_operand" "=r,*r,*k")
4809 (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,*k,*km")))]
4810 "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
4811 "@
4812 movz{bl|x}\t{%1, %k0|%k0, %1}
4813 kmovb\t{%1, %k0|%k0, %1}
4814 kmovb\t{%1, %0|%0, %1}"
4815 [(set_attr "isa" "*,avx512dq,avx512dq")
4816 (set_attr "type" "imovx,mskmov,mskmov")
4817 (set_attr "mode" "SI,QI,QI")])
4818
4819 ;; Transform xorl; mov[bw] (set strict_low_part) into movz[bw]l.
4820 (define_peephole2
4821 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
4822 (const_int 0))
4823 (clobber (reg:CC FLAGS_REG))])
4824 (set (strict_low_part (match_operand:SWI12 1 "general_reg_operand"))
4825 (match_operand:SWI12 2 "nonimmediate_operand"))]
4826 "REGNO (operands[0]) == REGNO (operands[1])
4827 && (<SWI48:MODE>mode != SImode
4828 || !TARGET_ZERO_EXTEND_WITH_AND
4829 || !optimize_function_for_speed_p (cfun))"
4830 [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))])
4831
4832 ;; Likewise, but preserving FLAGS_REG.
4833 (define_peephole2
4834 [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0))
4835 (set (strict_low_part (match_operand:SWI12 1 "general_reg_operand"))
4836 (match_operand:SWI12 2 "nonimmediate_operand"))]
4837 "REGNO (operands[0]) == REGNO (operands[1])
4838 && (<SWI48:MODE>mode != SImode
4839 || !TARGET_ZERO_EXTEND_WITH_AND
4840 || !optimize_function_for_speed_p (cfun))"
4841 [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))])
4842 \f
4843 ;; Sign extension instructions
4844
4845 (define_expand "extendsidi2"
4846 [(set (match_operand:DI 0 "register_operand")
4847 (sign_extend:DI (match_operand:SI 1 "register_operand")))]
4848 ""
4849 {
4850 if (!TARGET_64BIT)
4851 {
4852 emit_insn (gen_extendsidi2_1 (operands[0], operands[1]));
4853 DONE;
4854 }
4855 })
4856
4857 (define_insn "*extendsidi2_rex64"
4858 [(set (match_operand:DI 0 "register_operand" "=*a,r")
4859 (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))]
4860 "TARGET_64BIT"
4861 "@
4862 {cltq|cdqe}
4863 movs{lq|x}\t{%1, %0|%0, %1}"
4864 [(set_attr "type" "imovx")
4865 (set_attr "mode" "DI")
4866 (set_attr "prefix_0f" "0")
4867 (set_attr "modrm" "0,1")])
4868
4869 (define_insn "extendsidi2_1"
4870 [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
4871 (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r")))
4872 (clobber (reg:CC FLAGS_REG))
4873 (clobber (match_scratch:SI 2 "=X,X,X,&r"))]
4874 "!TARGET_64BIT"
4875 "#")
4876
4877 (define_insn "extendditi2"
4878 [(set (match_operand:TI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
4879 (sign_extend:TI (match_operand:DI 1 "register_operand" "0,0,r,r")))
4880 (clobber (reg:CC FLAGS_REG))
4881 (clobber (match_scratch:DI 2 "=X,X,X,&r"))]
4882 "TARGET_64BIT"
4883 "#")
4884
4885 ;; Split the memory case. If the source register doesn't die, it will stay
4886 ;; this way, if it does die, following peephole2s take care of it.
4887 (define_split
4888 [(set (match_operand:<DWI> 0 "memory_operand")
4889 (sign_extend:<DWI> (match_operand:DWIH 1 "register_operand")))
4890 (clobber (reg:CC FLAGS_REG))
4891 (clobber (match_operand:DWIH 2 "register_operand"))]
4892 "reload_completed"
4893 [(const_int 0)]
4894 {
4895 rtx bits = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
4896
4897 split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
4898
4899 emit_move_insn (operands[3], operands[1]);
4900
4901 /* Generate a cltd if possible and doing so it profitable. */
4902 if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
4903 && REGNO (operands[1]) == AX_REG
4904 && REGNO (operands[2]) == DX_REG)
4905 {
4906 emit_insn (gen_ashr<mode>3_cvt (operands[2], operands[1], bits));
4907 }
4908 else
4909 {
4910 emit_move_insn (operands[2], operands[1]);
4911 emit_insn (gen_ashr<mode>3_cvt (operands[2], operands[2], bits));
4912 }
4913 emit_move_insn (operands[4], operands[2]);
4914 DONE;
4915 })
4916
4917 ;; Peepholes for the case where the source register does die, after
4918 ;; being split with the above splitter.
4919 (define_peephole2
4920 [(set (match_operand:DWIH 0 "memory_operand")
4921 (match_operand:DWIH 1 "general_reg_operand"))
4922 (set (match_operand:DWIH 2 "general_reg_operand") (match_dup 1))
4923 (parallel [(set (match_dup 2)
4924 (ashiftrt:DWIH (match_dup 2)
4925 (match_operand 4 "const_int_operand")))
4926 (clobber (reg:CC FLAGS_REG))])
4927 (set (match_operand:DWIH 3 "memory_operand") (match_dup 2))]
4928 "REGNO (operands[1]) != REGNO (operands[2])
4929 && INTVAL (operands[4]) == (<MODE_SIZE> * BITS_PER_UNIT - 1)
4930 && peep2_reg_dead_p (2, operands[1])
4931 && peep2_reg_dead_p (4, operands[2])
4932 && !reg_mentioned_p (operands[2], operands[3])"
4933 [(set (match_dup 0) (match_dup 1))
4934 (parallel [(set (match_dup 1) (ashiftrt:DWIH (match_dup 1) (match_dup 4)))
4935 (clobber (reg:CC FLAGS_REG))])
4936 (set (match_dup 3) (match_dup 1))])
4937
4938 (define_peephole2
4939 [(set (match_operand:DWIH 0 "memory_operand")
4940 (match_operand:DWIH 1 "general_reg_operand"))
4941 (parallel [(set (match_operand:DWIH 2 "general_reg_operand")
4942 (ashiftrt:DWIH (match_dup 1)
4943 (match_operand 4 "const_int_operand")))
4944 (clobber (reg:CC FLAGS_REG))])
4945 (set (match_operand:DWIH 3 "memory_operand") (match_dup 2))]
4946 "/* cltd is shorter than sarl $31, %eax */
4947 !optimize_function_for_size_p (cfun)
4948 && REGNO (operands[1]) == AX_REG
4949 && REGNO (operands[2]) == DX_REG
4950 && INTVAL (operands[4]) == (<MODE_SIZE> * BITS_PER_UNIT - 1)
4951 && peep2_reg_dead_p (2, operands[1])
4952 && peep2_reg_dead_p (3, operands[2])
4953 && !reg_mentioned_p (operands[2], operands[3])"
4954 [(set (match_dup 0) (match_dup 1))
4955 (parallel [(set (match_dup 1) (ashiftrt:DWIH (match_dup 1) (match_dup 4)))
4956 (clobber (reg:CC FLAGS_REG))])
4957 (set (match_dup 3) (match_dup 1))])
4958
4959 ;; Extend to register case. Optimize case where source and destination
4960 ;; registers match and cases where we can use cltd.
4961 (define_split
4962 [(set (match_operand:<DWI> 0 "register_operand")
4963 (sign_extend:<DWI> (match_operand:DWIH 1 "register_operand")))
4964 (clobber (reg:CC FLAGS_REG))
4965 (clobber (match_scratch:DWIH 2))]
4966 "reload_completed"
4967 [(const_int 0)]
4968 {
4969 rtx bits = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
4970
4971 split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
4972
4973 if (REGNO (operands[3]) != REGNO (operands[1]))
4974 emit_move_insn (operands[3], operands[1]);
4975
4976 rtx src = operands[1];
4977 if (REGNO (operands[3]) == AX_REG)
4978 src = operands[3];
4979
4980 /* Generate a cltd if possible and doing so it profitable. */
4981 if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
4982 && REGNO (src) == AX_REG
4983 && REGNO (operands[4]) == DX_REG)
4984 {
4985 emit_insn (gen_ashr<mode>3_cvt (operands[4], src, bits));
4986 DONE;
4987 }
4988
4989 if (REGNO (operands[4]) != REGNO (operands[1]))
4990 emit_move_insn (operands[4], operands[1]);
4991
4992 emit_insn (gen_ashr<mode>3_cvt (operands[4], operands[4], bits));
4993 DONE;
4994 })
4995
4996 (define_insn "extend<mode>di2"
4997 [(set (match_operand:DI 0 "register_operand" "=r")
4998 (sign_extend:DI
4999 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
5000 "TARGET_64BIT"
5001 "movs{<imodesuffix>q|x}\t{%1, %0|%0, %1}"
5002 [(set_attr "type" "imovx")
5003 (set_attr "mode" "DI")])
5004
5005 (define_insn "extendhisi2"
5006 [(set (match_operand:SI 0 "register_operand" "=*a,r")
5007 (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))]
5008 ""
5009 {
5010 switch (get_attr_prefix_0f (insn))
5011 {
5012 case 0:
5013 return "{cwtl|cwde}";
5014 default:
5015 return "movs{wl|x}\t{%1, %0|%0, %1}";
5016 }
5017 }
5018 [(set_attr "type" "imovx")
5019 (set_attr "mode" "SI")
5020 (set (attr "prefix_0f")
5021 ;; movsx is short decodable while cwtl is vector decoded.
5022 (if_then_else (and (eq_attr "cpu" "!k6")
5023 (eq_attr "alternative" "0"))
5024 (const_string "0")
5025 (const_string "1")))
5026 (set (attr "znver1_decode")
5027 (if_then_else (eq_attr "prefix_0f" "0")
5028 (const_string "double")
5029 (const_string "direct")))
5030 (set (attr "modrm")
5031 (if_then_else (eq_attr "prefix_0f" "0")
5032 (const_string "0")
5033 (const_string "1")))])
5034
5035 (define_insn "*extendhisi2_zext"
5036 [(set (match_operand:DI 0 "register_operand" "=*a,r")
5037 (zero_extend:DI
5038 (sign_extend:SI
5039 (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))]
5040 "TARGET_64BIT"
5041 {
5042 switch (get_attr_prefix_0f (insn))
5043 {
5044 case 0:
5045 return "{cwtl|cwde}";
5046 default:
5047 return "movs{wl|x}\t{%1, %k0|%k0, %1}";
5048 }
5049 }
5050 [(set_attr "type" "imovx")
5051 (set_attr "mode" "SI")
5052 (set (attr "prefix_0f")
5053 ;; movsx is short decodable while cwtl is vector decoded.
5054 (if_then_else (and (eq_attr "cpu" "!k6")
5055 (eq_attr "alternative" "0"))
5056 (const_string "0")
5057 (const_string "1")))
5058 (set (attr "modrm")
5059 (if_then_else (eq_attr "prefix_0f" "0")
5060 (const_string "0")
5061 (const_string "1")))])
5062
5063 (define_insn "extendqisi2"
5064 [(set (match_operand:SI 0 "register_operand" "=r")
5065 (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
5066 ""
5067 "movs{bl|x}\t{%1, %0|%0, %1}"
5068 [(set_attr "type" "imovx")
5069 (set_attr "mode" "SI")])
5070
5071 (define_insn "*extendqisi2_zext"
5072 [(set (match_operand:DI 0 "register_operand" "=r")
5073 (zero_extend:DI
5074 (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))]
5075 "TARGET_64BIT"
5076 "movs{bl|x}\t{%1, %k0|%k0, %1}"
5077 [(set_attr "type" "imovx")
5078 (set_attr "mode" "SI")])
5079
5080 (define_insn "extendqihi2"
5081 [(set (match_operand:HI 0 "register_operand" "=*a,r")
5082 (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))]
5083 ""
5084 {
5085 switch (get_attr_prefix_0f (insn))
5086 {
5087 case 0:
5088 return "{cbtw|cbw}";
5089 default:
5090 return "movs{bw|x}\t{%1, %0|%0, %1}";
5091 }
5092 }
5093 [(set_attr "type" "imovx")
5094 (set_attr "mode" "HI")
5095 (set (attr "prefix_0f")
5096 ;; movsx is short decodable while cwtl is vector decoded.
5097 (if_then_else (and (eq_attr "cpu" "!k6")
5098 (eq_attr "alternative" "0"))
5099 (const_string "0")
5100 (const_string "1")))
5101 (set (attr "modrm")
5102 (if_then_else (eq_attr "prefix_0f" "0")
5103 (const_string "0")
5104 (const_string "1")))])
5105
5106 (define_insn "*extendqi<SWI24:mode>_ext_1"
5107 [(set (match_operand:SWI24 0 "register_operand" "=R")
5108 (sign_extend:SWI24
5109 (subreg:QI
5110 (match_operator:SWI248 2 "extract_operator"
5111 [(match_operand 1 "int248_register_operand" "Q")
5112 (const_int 8)
5113 (const_int 8)]) 0)))]
5114 ""
5115 "movs{b<SWI24:imodesuffix>|x}\t{%h1, %0|%0, %h1}"
5116 [(set_attr "type" "imovx")
5117 (set_attr "mode" "<SWI24:MODE>")])
5118 \f
5119 ;; Conversions between float and double.
5120
5121 ;; These are all no-ops in the model used for the 80387.
5122 ;; So just emit moves.
5123
5124 ;; %%% Kill these when call knows how to work out a DFmode push earlier.
5125 (define_split
5126 [(set (match_operand:DF 0 "push_operand")
5127 (float_extend:DF (match_operand:SF 1 "fp_register_operand")))]
5128 "reload_completed"
5129 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
5130 (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))])
5131
5132 (define_split
5133 [(set (match_operand:XF 0 "push_operand")
5134 (float_extend:XF (match_operand:MODEF 1 "fp_register_operand")))]
5135 "reload_completed"
5136 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
5137 (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))]
5138 "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));")
5139
5140 (define_expand "extendsfdf2"
5141 [(set (match_operand:DF 0 "nonimm_ssenomem_operand")
5142 (float_extend:DF (match_operand:SF 1 "general_operand")))]
5143 "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
5144 {
5145 /* ??? Needed for compress_float_constant since all fp constants
5146 are TARGET_LEGITIMATE_CONSTANT_P. */
5147 if (CONST_DOUBLE_P (operands[1]))
5148 {
5149 if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387)
5150 && standard_80387_constant_p (operands[1]) > 0)
5151 {
5152 operands[1] = simplify_const_unary_operation
5153 (FLOAT_EXTEND, DFmode, operands[1], SFmode);
5154 emit_move_insn_1 (operands[0], operands[1]);
5155 DONE;
5156 }
5157 operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
5158 }
5159 })
5160
5161 (define_insn "*extendsfdf2"
5162 [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v,v")
5163 (float_extend:DF
5164 (match_operand:SF 1 "nonimmediate_operand" "fm,f,v,m")))]
5165 "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
5166 {
5167 switch (which_alternative)
5168 {
5169 case 0:
5170 case 1:
5171 return output_387_reg_move (insn, operands);
5172
5173 case 2:
5174 return "%vcvtss2sd\t{%d1, %0|%0, %d1}";
5175 case 3:
5176 return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
5177
5178 default:
5179 gcc_unreachable ();
5180 }
5181 }
5182 [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
5183 (set_attr "avx_partial_xmm_update" "false,false,false,true")
5184 (set_attr "prefix" "orig,orig,maybe_vex,maybe_vex")
5185 (set_attr "mode" "SF,XF,DF,DF")
5186 (set (attr "enabled")
5187 (if_then_else
5188 (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
5189 (if_then_else
5190 (eq_attr "alternative" "0,1")
5191 (symbol_ref "TARGET_MIX_SSE_I387")
5192 (symbol_ref "true"))
5193 (if_then_else
5194 (eq_attr "alternative" "0,1")
5195 (symbol_ref "true")
5196 (symbol_ref "false"))))])
5197
5198 /* For converting SF(xmm2) to DF(xmm1), use the following code instead of
5199 cvtss2sd:
5200 unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs
5201 cvtps2pd xmm2,xmm1
5202 We do the conversion post reload to avoid producing of 128bit spills
5203 that might lead to ICE on 32bit target. The sequence unlikely combine
5204 anyway. */
5205 (define_split
5206 [(set (match_operand:DF 0 "sse_reg_operand")
5207 (float_extend:DF
5208 (match_operand:SF 1 "nonimmediate_operand")))]
5209 "TARGET_USE_VECTOR_FP_CONVERTS
5210 && optimize_insn_for_speed_p ()
5211 && reload_completed
5212 && (!EXT_REX_SSE_REG_P (operands[0])
5213 || TARGET_AVX512VL)"
5214 [(set (match_dup 2)
5215 (float_extend:V2DF
5216 (vec_select:V2SF
5217 (match_dup 3)
5218 (parallel [(const_int 0) (const_int 1)]))))]
5219 {
5220 operands[2] = lowpart_subreg (V2DFmode, operands[0], DFmode);
5221 operands[3] = lowpart_subreg (V4SFmode, operands[0], DFmode);
5222 /* Use movss for loading from memory, unpcklps reg, reg for registers.
5223 Try to avoid move when unpacking can be done in source. */
5224 if (REG_P (operands[1]))
5225 {
5226 /* If it is unsafe to overwrite upper half of source, we need
5227 to move to destination and unpack there. */
5228 if (REGNO (operands[0]) != REGNO (operands[1])
5229 || (EXT_REX_SSE_REG_P (operands[1]) && !TARGET_AVX512VL))
5230 {
5231 rtx tmp = lowpart_subreg (SFmode, operands[0], DFmode);
5232 emit_move_insn (tmp, operands[1]);
5233 }
5234 else
5235 operands[3] = lowpart_subreg (V4SFmode, operands[1], SFmode);
5236 /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow
5237 =v, v, then vbroadcastss will be only needed for AVX512F without
5238 AVX512VL. */
5239 if (!EXT_REX_SSE_REGNO_P (REGNO (operands[3])))
5240 emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
5241 operands[3]));
5242 else
5243 {
5244 rtx tmp = lowpart_subreg (V16SFmode, operands[3], V4SFmode);
5245 emit_insn (gen_avx512f_vec_dupv16sf_1 (tmp, tmp));
5246 }
5247 }
5248 else
5249 emit_insn (gen_vec_setv4sf_0 (operands[3],
5250 CONST0_RTX (V4SFmode), operands[1]));
5251 })
5252
5253 ;; It's more profitable to split and then extend in the same register.
5254 (define_peephole2
5255 [(set (match_operand:DF 0 "sse_reg_operand")
5256 (float_extend:DF
5257 (match_operand:SF 1 "memory_operand")))]
5258 "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
5259 && optimize_insn_for_speed_p ()"
5260 [(set (match_dup 2) (match_dup 1))
5261 (set (match_dup 0) (float_extend:DF (match_dup 2)))]
5262 "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
5263
5264 ;; Break partial SSE register dependency stall. This splitter should split
5265 ;; late in the pass sequence (after register rename pass), so allocated
5266 ;; registers won't change anymore
5267
5268 (define_split
5269 [(set (match_operand:DF 0 "sse_reg_operand")
5270 (float_extend:DF
5271 (match_operand:SF 1 "nonimmediate_operand")))]
5272 "!TARGET_AVX
5273 && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
5274 && epilogue_completed
5275 && optimize_function_for_speed_p (cfun)
5276 && (!REG_P (operands[1])
5277 || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
5278 && (!EXT_REX_SSE_REG_P (operands[0])
5279 || TARGET_AVX512VL)"
5280 [(set (match_dup 0)
5281 (vec_merge:V2DF
5282 (vec_duplicate:V2DF
5283 (float_extend:DF
5284 (match_dup 1)))
5285 (match_dup 0)
5286 (const_int 1)))]
5287 {
5288 operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
5289 emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
5290 })
5291
5292 (define_expand "extendhfsf2"
5293 [(set (match_operand:SF 0 "register_operand")
5294 (float_extend:SF
5295 (match_operand:HF 1 "nonimmediate_operand")))]
5296 "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
5297 {
5298 if (!TARGET_AVX512FP16)
5299 {
5300 rtx res = gen_reg_rtx (V4SFmode);
5301 rtx tmp = gen_reg_rtx (V8HFmode);
5302 rtx zero = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
5303
5304 emit_insn (gen_vec_setv8hf_0 (tmp, zero, operands[1]));
5305 emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
5306 emit_move_insn (operands[0], gen_lowpart (SFmode, res));
5307 DONE;
5308 }
5309 })
5310
5311 (define_expand "extendhfdf2"
5312 [(set (match_operand:DF 0 "register_operand")
5313 (float_extend:DF
5314 (match_operand:HF 1 "nonimmediate_operand")))]
5315 "TARGET_AVX512FP16")
5316
5317 (define_insn "*extendhf<mode>2"
5318 [(set (match_operand:MODEF 0 "register_operand" "=v")
5319 (float_extend:MODEF
5320 (match_operand:HF 1 "nonimmediate_operand" "vm")))]
5321 "TARGET_AVX512FP16"
5322 "vcvtsh2<ssemodesuffix>\t{%1, %0, %0|%0, %0, %1}"
5323 [(set_attr "type" "ssecvt")
5324 (set_attr "prefix" "evex")
5325 (set_attr "mode" "<MODE>")])
5326
5327 (define_expand "extendbfsf2"
5328 [(set (match_operand:SF 0 "register_operand")
5329 (unspec:SF
5330 [(match_operand:BF 1 "register_operand")]
5331 UNSPEC_CVTBFSF))]
5332 "TARGET_SSE2 && !HONOR_NANS (BFmode)")
5333
5334 ;; Don't use float_extend since psrlld doesn't raise
5335 ;; exceptions and turn a sNaN into a qNaN.
5336 (define_insn "extendbfsf2_1"
5337 [(set (match_operand:SF 0 "register_operand" "=x,Yv,v")
5338 (unspec:SF
5339 [(match_operand:BF 1 "register_operand" " 0,Yv,v")]
5340 UNSPEC_CVTBFSF))]
5341 "TARGET_SSE2"
5342 "@
5343 pslld\t{$16, %0|%0, 16}
5344 vpslld\t{$16, %1, %0|%0, %1, 16}
5345 vpslld\t{$16, %g1, %g0|%g0, %g1, 16}"
5346 [(set_attr "isa" "noavx,avx,*")
5347 (set_attr "type" "sseishft1")
5348 (set_attr "length_immediate" "1")
5349 (set_attr "prefix_data16" "1,*,*")
5350 (set_attr "prefix" "orig,maybe_evex,evex")
5351 (set_attr "mode" "TI,TI,XI")
5352 (set_attr "memory" "none")
5353 (set (attr "enabled")
5354 (if_then_else (eq_attr "alternative" "2")
5355 (symbol_ref "TARGET_AVX512F && TARGET_EVEX512
5356 && !TARGET_AVX512VL && !TARGET_PREFER_AVX256")
5357 (const_string "*")))])
5358
5359 (define_expand "extend<mode>xf2"
5360 [(set (match_operand:XF 0 "nonimmediate_operand")
5361 (float_extend:XF (match_operand:MODEF 1 "general_operand")))]
5362 "TARGET_80387"
5363 {
5364 /* ??? Needed for compress_float_constant since all fp constants
5365 are TARGET_LEGITIMATE_CONSTANT_P. */
5366 if (CONST_DOUBLE_P (operands[1]))
5367 {
5368 if (standard_80387_constant_p (operands[1]) > 0)
5369 {
5370 operands[1] = simplify_const_unary_operation
5371 (FLOAT_EXTEND, XFmode, operands[1], <MODE>mode);
5372 emit_move_insn_1 (operands[0], operands[1]);
5373 DONE;
5374 }
5375 operands[1] = validize_mem (force_const_mem (<MODE>mode, operands[1]));
5376 }
5377 })
5378
5379 (define_insn "*extend<mode>xf2_i387"
5380 [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m")
5381 (float_extend:XF
5382 (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))]
5383 "TARGET_80387"
5384 "* return output_387_reg_move (insn, operands);"
5385 [(set_attr "type" "fmov")
5386 (set_attr "mode" "<MODE>,XF")])
5387
5388 ;; %%% This seems like bad news.
5389 ;; This cannot output into an f-reg because there is no way to be sure
5390 ;; of truncating in that case. Otherwise this is just like a simple move
5391 ;; insn. So we pretend we can output to a reg in order to get better
5392 ;; register preferencing, but we really use a stack slot.
5393
5394 ;; Conversion from DFmode to SFmode.
5395
5396 (define_insn "truncdfsf2"
5397 [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v,v")
5398 (float_truncate:SF
5399 (match_operand:DF 1 "register_ssemem_operand" "f,f,v,m")))]
5400 "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
5401 {
5402 switch (which_alternative)
5403 {
5404 case 0:
5405 case 1:
5406 return output_387_reg_move (insn, operands);
5407
5408 case 2:
5409 return "%vcvtsd2ss\t{%d1, %0|%0, %d1}";
5410 case 3:
5411 return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
5412
5413 default:
5414 gcc_unreachable ();
5415 }
5416 }
5417 [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
5418 (set_attr "avx_partial_xmm_update" "false,false,false,true")
5419 (set_attr "mode" "SF")
5420 (set (attr "enabled")
5421 (if_then_else
5422 (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
5423 (cond [(eq_attr "alternative" "0")
5424 (symbol_ref "TARGET_MIX_SSE_I387")
5425 (eq_attr "alternative" "1")
5426 (symbol_ref "TARGET_MIX_SSE_I387
5427 && flag_unsafe_math_optimizations")
5428 ]
5429 (symbol_ref "true"))
5430 (cond [(eq_attr "alternative" "0")
5431 (symbol_ref "true")
5432 (eq_attr "alternative" "1")
5433 (symbol_ref "flag_unsafe_math_optimizations")
5434 ]
5435 (symbol_ref "false"))))])
5436
5437 /* For converting DF(xmm2) to SF(xmm1), use the following code instead of
5438 cvtsd2ss:
5439 unpcklpd xmm2,xmm2 ; packed conversion might crash on signaling NaNs
5440 cvtpd2ps xmm2,xmm1
5441 We do the conversion post reload to avoid producing of 128bit spills
5442 that might lead to ICE on 32bit target. The sequence unlikely combine
5443 anyway. */
5444 (define_split
5445 [(set (match_operand:SF 0 "sse_reg_operand")
5446 (float_truncate:SF
5447 (match_operand:DF 1 "nonimmediate_operand")))]
5448 "TARGET_USE_VECTOR_FP_CONVERTS
5449 && optimize_insn_for_speed_p ()
5450 && reload_completed
5451 && (!EXT_REX_SSE_REG_P (operands[0])
5452 || TARGET_AVX512VL)"
5453 [(set (match_dup 2)
5454 (vec_concat:V4SF
5455 (float_truncate:V2SF
5456 (match_dup 4))
5457 (match_dup 3)))]
5458 {
5459 operands[2] = lowpart_subreg (V4SFmode, operands[0], SFmode);
5460 operands[3] = CONST0_RTX (V2SFmode);
5461 operands[4] = lowpart_subreg (V2DFmode, operands[0], SFmode);
5462 /* Use movsd for loading from memory, unpcklpd for registers.
5463 Try to avoid move when unpacking can be done in source, or SSE3
5464 movddup is available. */
5465 if (REG_P (operands[1]))
5466 {
5467 if ((!TARGET_SSE3 && REGNO (operands[0]) != REGNO (operands[1]))
5468 || (EXT_REX_SSE_REG_P (operands[1]) && !TARGET_AVX512VL))
5469 {
5470 rtx tmp = lowpart_subreg (DFmode, operands[0], SFmode);
5471 emit_move_insn (tmp, operands[1]);
5472 operands[1] = tmp;
5473 }
5474 else if (!TARGET_SSE3)
5475 operands[4] = lowpart_subreg (V2DFmode, operands[1], DFmode);
5476 emit_insn (gen_vec_dupv2df (operands[4], operands[1]));
5477 }
5478 else
5479 emit_insn (gen_vec_concatv2df (operands[4], operands[1],
5480 CONST0_RTX (DFmode)));
5481 })
5482
5483 ;; It's more profitable to split and then truncate in the same register.
5484 (define_peephole2
5485 [(set (match_operand:SF 0 "sse_reg_operand")
5486 (float_truncate:SF
5487 (match_operand:DF 1 "memory_operand")))]
5488 "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
5489 && optimize_insn_for_speed_p ()"
5490 [(set (match_dup 2) (match_dup 1))
5491 (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
5492 "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
5493
5494 ;; Break partial SSE register dependency stall. This splitter should split
5495 ;; late in the pass sequence (after register rename pass), so allocated
5496 ;; registers won't change anymore
5497
5498 (define_split
5499 [(set (match_operand:SF 0 "sse_reg_operand")
5500 (float_truncate:SF
5501 (match_operand:DF 1 "nonimmediate_operand")))]
5502 "!TARGET_AVX
5503 && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
5504 && epilogue_completed
5505 && optimize_function_for_speed_p (cfun)
5506 && (!REG_P (operands[1])
5507 || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
5508 && (!EXT_REX_SSE_REG_P (operands[0])
5509 || TARGET_AVX512VL)"
5510 [(set (match_dup 0)
5511 (vec_merge:V4SF
5512 (vec_duplicate:V4SF
5513 (float_truncate:SF
5514 (match_dup 1)))
5515 (match_dup 0)
5516 (const_int 1)))]
5517 {
5518 operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
5519 emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
5520 })
5521
5522 ;; Conversion from XFmode to {SF,DF}mode
5523
5524 (define_insn "truncxf<mode>2"
5525 [(set (match_operand:MODEF 0 "nonimmediate_operand" "=m,f")
5526 (float_truncate:MODEF
5527 (match_operand:XF 1 "register_operand" "f,f")))]
5528 "TARGET_80387"
5529 "* return output_387_reg_move (insn, operands);"
5530 [(set_attr "type" "fmov")
5531 (set_attr "mode" "<MODE>")
5532 (set (attr "enabled")
5533 (cond [(eq_attr "alternative" "1")
5534 (symbol_ref "flag_unsafe_math_optimizations")
5535 ]
5536 (symbol_ref "true")))])
5537
5538 ;; Conversion from {SF,DF}mode to HFmode.
5539
5540 (define_expand "truncsfhf2"
5541 [(set (match_operand:HF 0 "register_operand")
5542 (float_truncate:HF
5543 (match_operand:SF 1 "nonimmediate_operand")))]
5544 "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
5545 {
5546 if (!TARGET_AVX512FP16)
5547 {
5548 rtx res = gen_reg_rtx (V8HFmode);
5549 rtx tmp = gen_reg_rtx (V4SFmode);
5550 rtx zero = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
5551
5552 emit_insn (gen_vec_setv4sf_0 (tmp, zero, operands[1]));
5553 emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
5554 emit_move_insn (operands[0], gen_lowpart (HFmode, res));
5555 DONE;
5556 }
5557 })
5558
5559 (define_expand "truncdfhf2"
5560 [(set (match_operand:HF 0 "register_operand")
5561 (float_truncate:HF
5562 (match_operand:DF 1 "nonimmediate_operand")))]
5563 "TARGET_AVX512FP16")
5564
5565 (define_insn "*trunc<mode>hf2"
5566 [(set (match_operand:HF 0 "register_operand" "=v")
5567 (float_truncate:HF
5568 (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
5569 "TARGET_AVX512FP16"
5570 "vcvt<ssemodesuffix>2sh\t{%1, %d0|%d0, %1}"
5571 [(set_attr "type" "ssecvt")
5572 (set_attr "prefix" "evex")
5573 (set_attr "mode" "HF")])
5574
5575 (define_insn "truncsfbf2"
5576 [(set (match_operand:BF 0 "register_operand" "=x, v")
5577 (float_truncate:BF
5578 (match_operand:SF 1 "register_operand" "x,v")))]
5579 "((TARGET_AVX512BF16 && TARGET_AVX512VL) || TARGET_AVXNECONVERT)
5580 && !HONOR_NANS (BFmode) && flag_unsafe_math_optimizations"
5581 "@
5582 %{vex%} vcvtneps2bf16\t{%1, %0|%0, %1}
5583 vcvtneps2bf16\t{%1, %0|%0, %1}"
5584 [(set_attr "isa" "avxneconvert,avx512bf16vl")
5585 (set_attr "prefix" "vex,evex")])
5586
5587 ;; Signed conversion to DImode.
5588
5589 (define_expand "fix_truncxfdi2"
5590 [(parallel [(set (match_operand:DI 0 "nonimmediate_operand")
5591 (fix:DI (match_operand:XF 1 "register_operand")))
5592 (clobber (reg:CC FLAGS_REG))])]
5593 "TARGET_80387"
5594 {
5595 if (TARGET_FISTTP)
5596 {
5597 emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1]));
5598 DONE;
5599 }
5600 })
5601
5602 (define_expand "fix_trunc<mode>di2"
5603 [(parallel [(set (match_operand:DI 0 "nonimmediate_operand")
5604 (fix:DI (match_operand:MODEF 1 "register_operand")))
5605 (clobber (reg:CC FLAGS_REG))])]
5606 "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))"
5607 {
5608 if (TARGET_FISTTP
5609 && !(TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
5610 {
5611 emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1]));
5612 DONE;
5613 }
5614 if (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))
5615 {
5616 rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode);
5617 emit_insn (gen_fix_trunc<mode>di_sse (out, operands[1]));
5618 if (out != operands[0])
5619 emit_move_insn (operands[0], out);
5620 DONE;
5621 }
5622 })
5623
5624 (define_insn "fix<fixunssuffix>_trunchf<mode>2"
5625 [(set (match_operand:SWI48 0 "register_operand" "=r")
5626 (any_fix:SWI48
5627 (match_operand:HF 1 "nonimmediate_operand" "vm")))]
5628 "TARGET_AVX512FP16"
5629 "vcvttsh2<fixsuffix>si\t{%1, %0|%0, %1}"
5630 [(set_attr "type" "sseicvt")
5631 (set_attr "prefix" "evex")
5632 (set_attr "mode" "<MODE>")])
5633
5634 ;; Signed conversion to SImode.
5635
5636 (define_expand "fix_truncxfsi2"
5637 [(parallel [(set (match_operand:SI 0 "nonimmediate_operand")
5638 (fix:SI (match_operand:XF 1 "register_operand")))
5639 (clobber (reg:CC FLAGS_REG))])]
5640 "TARGET_80387"
5641 {
5642 if (TARGET_FISTTP)
5643 {
5644 emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1]));
5645 DONE;
5646 }
5647 })
5648
5649 (define_expand "fix_trunc<mode>si2"
5650 [(parallel [(set (match_operand:SI 0 "nonimmediate_operand")
5651 (fix:SI (match_operand:MODEF 1 "register_operand")))
5652 (clobber (reg:CC FLAGS_REG))])]
5653 "TARGET_80387 || SSE_FLOAT_MODE_P (<MODE>mode)"
5654 {
5655 if (TARGET_FISTTP
5656 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
5657 {
5658 emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1]));
5659 DONE;
5660 }
5661 if (SSE_FLOAT_MODE_P (<MODE>mode))
5662 {
5663 rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode);
5664 emit_insn (gen_fix_trunc<mode>si_sse (out, operands[1]));
5665 if (out != operands[0])
5666 emit_move_insn (operands[0], out);
5667 DONE;
5668 }
5669 })
5670
5671 ;; Signed conversion to HImode.
5672
5673 (define_expand "fix_trunc<mode>hi2"
5674 [(parallel [(set (match_operand:HI 0 "nonimmediate_operand")
5675 (fix:HI (match_operand:X87MODEF 1 "register_operand")))
5676 (clobber (reg:CC FLAGS_REG))])]
5677 "TARGET_80387
5678 && !(SSE_FLOAT_MODE_P (<MODE>mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))"
5679 {
5680 if (TARGET_FISTTP)
5681 {
5682 emit_insn (gen_fix_trunchi_i387_fisttp (operands[0], operands[1]));
5683 DONE;
5684 }
5685 })
5686
5687 ;; Unsigned conversion to DImode
5688
5689 (define_insn "fixuns_trunc<mode>di2"
5690 [(set (match_operand:DI 0 "register_operand" "=r")
5691 (unsigned_fix:DI
5692 (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
5693 "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH"
5694 "vcvtt<ssemodesuffix>2usi\t{%1, %0|%0, %1}"
5695 [(set_attr "type" "sseicvt")
5696 (set_attr "prefix" "evex")
5697 (set_attr "mode" "DI")])
5698
5699 ;; Unsigned conversion to SImode.
5700
5701 (define_expand "fixuns_trunc<mode>si2"
5702 [(parallel
5703 [(set (match_operand:SI 0 "register_operand")
5704 (unsigned_fix:SI
5705 (match_operand:MODEF 1 "nonimmediate_operand")))
5706 (use (match_dup 2))
5707 (clobber (scratch:<ssevecmode>))
5708 (clobber (scratch:<ssevecmode>))])]
5709 "(!TARGET_64BIT || TARGET_AVX512F) && TARGET_SSE2 && TARGET_SSE_MATH"
5710 {
5711 machine_mode mode = <MODE>mode;
5712 machine_mode vecmode = <ssevecmode>mode;
5713 REAL_VALUE_TYPE TWO31r;
5714 rtx two31;
5715
5716 if (TARGET_AVX512F)
5717 {
5718 emit_insn (gen_fixuns_trunc<mode>si2_avx512f (operands[0], operands[1]));
5719 DONE;
5720 }
5721
5722 if (optimize_insn_for_size_p ())
5723 FAIL;
5724
5725 real_ldexp (&TWO31r, &dconst1, 31);
5726 two31 = const_double_from_real_value (TWO31r, mode);
5727 two31 = ix86_build_const_vector (vecmode, true, two31);
5728 operands[2] = force_reg (vecmode, two31);
5729 })
5730
5731 (define_insn "fixuns_trunc<mode>si2_avx512f"
5732 [(set (match_operand:SI 0 "register_operand" "=r")
5733 (unsigned_fix:SI
5734 (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
5735 "TARGET_AVX512F && TARGET_SSE_MATH"
5736 "vcvtt<ssemodesuffix>2usi\t{%1, %0|%0, %1}"
5737 [(set_attr "type" "sseicvt")
5738 (set_attr "prefix" "evex")
5739 (set_attr "mode" "SI")])
5740
5741 (define_insn "*fixuns_trunchfsi2zext"
5742 [(set (match_operand:DI 0 "register_operand" "=r")
5743 (zero_extend:DI
5744 (unsigned_fix:SI
5745 (match_operand:HF 1 "nonimmediate_operand" "vm"))))]
5746 "TARGET_64BIT && TARGET_AVX512FP16"
5747 "vcvttsh2usi\t{%1, %k0|%k0, %1}"
5748 [(set_attr "type" "sseicvt")
5749 (set_attr "prefix" "evex")
5750 (set_attr "mode" "SI")])
5751
5752 (define_insn "*fixuns_trunc<mode>si2_avx512f_zext"
5753 [(set (match_operand:DI 0 "register_operand" "=r")
5754 (zero_extend:DI
5755 (unsigned_fix:SI
5756 (match_operand:MODEF 1 "nonimmediate_operand" "vm"))))]
5757 "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH"
5758 "vcvtt<ssemodesuffix>2usi\t{%1, %k0|%k0, %1}"
5759 [(set_attr "type" "sseicvt")
5760 (set_attr "prefix" "evex")
5761 (set_attr "mode" "SI")])
5762
5763 (define_insn_and_split "*fixuns_trunc<mode>_1"
5764 [(set (match_operand:SI 0 "register_operand" "=&x,&x")
5765 (unsigned_fix:SI
5766 (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm")))
5767 (use (match_operand:<ssevecmode> 4 "nonimmediate_operand" "m,x"))
5768 (clobber (match_scratch:<ssevecmode> 1 "=x,&x"))
5769 (clobber (match_scratch:<ssevecmode> 2 "=x,x"))]
5770 "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
5771 && optimize_function_for_speed_p (cfun)"
5772 "#"
5773 "&& reload_completed"
5774 [(const_int 0)]
5775 {
5776 ix86_split_convert_uns_si_sse (operands);
5777 DONE;
5778 })
5779
5780 ;; Unsigned conversion to HImode.
5781 ;; Without these patterns, we'll try the unsigned SI conversion which
5782 ;; is complex for SSE, rather than the signed SI conversion, which isn't.
5783
5784 (define_expand "fixuns_trunchfhi2"
5785 [(set (match_dup 2)
5786 (fix:SI (match_operand:HF 1 "nonimmediate_operand")))
5787 (set (match_operand:HI 0 "nonimmediate_operand")
5788 (subreg:HI (match_dup 2) 0))]
5789 "TARGET_AVX512FP16"
5790 "operands[2] = gen_reg_rtx (SImode);")
5791
5792 (define_expand "fixuns_trunc<mode>hi2"
5793 [(set (match_dup 2)
5794 (fix:SI (match_operand:MODEF 1 "nonimmediate_operand")))
5795 (set (match_operand:HI 0 "nonimmediate_operand")
5796 (subreg:HI (match_dup 2) 0))]
5797 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
5798 "operands[2] = gen_reg_rtx (SImode);")
5799
5800 ;; When SSE is available, it is always faster to use it!
5801 (define_insn "fix_trunc<MODEF:mode><SWI48:mode>_sse"
5802 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5803 (fix:SWI48 (match_operand:MODEF 1 "nonimmediate_operand" "v,m")))]
5804 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
5805 && (!TARGET_FISTTP || TARGET_SSE_MATH)"
5806 "%vcvtt<MODEF:ssemodesuffix>2si<SWI48:rex64suffix>\t{%1, %0|%0, %1}"
5807 [(set_attr "type" "sseicvt")
5808 (set_attr "prefix" "maybe_vex")
5809 (set (attr "prefix_rex")
5810 (if_then_else
5811 (match_test "<SWI48:MODE>mode == DImode")
5812 (const_string "1")
5813 (const_string "*")))
5814 (set_attr "mode" "<MODEF:MODE>")
5815 (set_attr "athlon_decode" "double,vector")
5816 (set_attr "amdfam10_decode" "double,double")
5817 (set_attr "bdver1_decode" "double,double")])
5818
5819 ;; Avoid vector decoded forms of the instruction.
5820 (define_peephole2
5821 [(match_scratch:MODEF 2 "x")
5822 (set (match_operand:SWI48 0 "register_operand")
5823 (fix:SWI48 (match_operand:MODEF 1 "memory_operand")))]
5824 "TARGET_AVOID_VECTOR_DECODE
5825 && SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
5826 && optimize_insn_for_speed_p ()"
5827 [(set (match_dup 2) (match_dup 1))
5828 (set (match_dup 0) (fix:SWI48 (match_dup 2)))])
5829
5830 (define_insn "fix_trunc<mode>_i387_fisttp"
5831 [(set (match_operand:SWI248x 0 "nonimmediate_operand" "=m")
5832 (fix:SWI248x (match_operand 1 "register_operand" "f")))
5833 (clobber (match_scratch:XF 2 "=&f"))]
5834 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5835 && TARGET_FISTTP
5836 && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
5837 && (TARGET_64BIT || <MODE>mode != DImode))
5838 && TARGET_SSE_MATH)"
5839 "* return output_fix_trunc (insn, operands, true);"
5840 [(set_attr "type" "fisttp")
5841 (set_attr "mode" "<MODE>")])
5842
5843 ;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description
5844 ;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control
5845 ;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG
5846 ;; clobbering insns can be used. Look at emit_i387_cw_initialization ()
5847 ;; function in i386.cc.
5848 (define_insn_and_split "*fix_trunc<mode>_i387_1"
5849 [(set (match_operand:SWI248x 0 "nonimmediate_operand")
5850 (fix:SWI248x (match_operand 1 "register_operand")))
5851 (clobber (reg:CC FLAGS_REG))]
5852 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5853 && !TARGET_FISTTP
5854 && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
5855 && (TARGET_64BIT || <MODE>mode != DImode))
5856 && ix86_pre_reload_split ()"
5857 "#"
5858 "&& 1"
5859 [(const_int 0)]
5860 {
5861 ix86_optimize_mode_switching[I387_TRUNC] = 1;
5862
5863 operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
5864 operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
5865
5866 emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1],
5867 operands[2], operands[3]));
5868 DONE;
5869 }
5870 [(set_attr "type" "fistp")
5871 (set_attr "i387_cw" "trunc")
5872 (set_attr "mode" "<MODE>")])
5873
5874 (define_insn "fix_truncdi_i387"
5875 [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
5876 (fix:DI (match_operand 1 "register_operand" "f")))
5877 (use (match_operand:HI 2 "memory_operand" "m"))
5878 (use (match_operand:HI 3 "memory_operand" "m"))
5879 (clobber (match_scratch:XF 4 "=&f"))]
5880 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5881 && !TARGET_FISTTP
5882 && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
5883 "* return output_fix_trunc (insn, operands, false);"
5884 [(set_attr "type" "fistp")
5885 (set_attr "i387_cw" "trunc")
5886 (set_attr "mode" "DI")])
5887
5888 (define_insn "fix_trunc<mode>_i387"
5889 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
5890 (fix:SWI24 (match_operand 1 "register_operand" "f")))
5891 (use (match_operand:HI 2 "memory_operand" "m"))
5892 (use (match_operand:HI 3 "memory_operand" "m"))]
5893 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5894 && !TARGET_FISTTP
5895 && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
5896 "* return output_fix_trunc (insn, operands, false);"
5897 [(set_attr "type" "fistp")
5898 (set_attr "i387_cw" "trunc")
5899 (set_attr "mode" "<MODE>")])
5900
5901 (define_insn "x86_fnstcw_1"
5902 [(set (match_operand:HI 0 "memory_operand" "=m")
5903 (unspec:HI [(const_int 0)] UNSPEC_FSTCW))]
5904 "TARGET_80387"
5905 "fnstcw\t%0"
5906 [(set (attr "length")
5907 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))
5908 (set_attr "mode" "HI")
5909 (set_attr "unit" "i387")
5910 (set_attr "bdver1_decode" "vector")])
5911 \f
5912 ;; Conversion between fixed point and floating point.
5913
5914 ;; Even though we only accept memory inputs, the backend _really_
5915 ;; wants to be able to do this between registers. Thankfully, LRA
5916 ;; will fix this up for us during register allocation.
5917
5918 (define_insn "floathi<mode>2"
5919 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
5920 (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m")))]
5921 "TARGET_80387
5922 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
5923 || TARGET_MIX_SSE_I387)"
5924 "fild%Z1\t%1"
5925 [(set_attr "type" "fmov")
5926 (set_attr "mode" "<MODE>")
5927 (set_attr "znver1_decode" "double")
5928 (set_attr "fp_int_src" "true")])
5929
5930 (define_insn "float<SWI48x:mode>xf2"
5931 [(set (match_operand:XF 0 "register_operand" "=f")
5932 (float:XF (match_operand:SWI48x 1 "nonimmediate_operand" "m")))]
5933 "TARGET_80387"
5934 "fild%Z1\t%1"
5935 [(set_attr "type" "fmov")
5936 (set_attr "mode" "XF")
5937 (set_attr "znver1_decode" "double")
5938 (set_attr "fp_int_src" "true")])
5939
5940 (define_expand "float<SWI48x:mode><MODEF:mode>2"
5941 [(set (match_operand:MODEF 0 "register_operand")
5942 (float:MODEF (match_operand:SWI48x 1 "nonimmediate_operand")))]
5943 "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48x:MODE>mode))
5944 || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
5945 && ((<SWI48x:MODE>mode != DImode) || TARGET_64BIT))")
5946
5947 (define_insn "*float<SWI48:mode><MODEF:mode>2"
5948 [(set (match_operand:MODEF 0 "register_operand" "=f,v,v")
5949 (float:MODEF
5950 (match_operand:SWI48 1 "nonimmediate_operand" "m,r,m")))]
5951 "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48:MODE>mode))
5952 || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)"
5953 "@
5954 fild%Z1\t%1
5955 %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}
5956 %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}"
5957 [(set_attr "type" "fmov,sseicvt,sseicvt")
5958 (set_attr "avx_partial_xmm_update" "false,true,true")
5959 (set_attr "prefix" "orig,maybe_vex,maybe_vex")
5960 (set_attr "mode" "<MODEF:MODE>")
5961 (set (attr "prefix_rex")
5962 (if_then_else
5963 (and (eq_attr "prefix" "maybe_vex")
5964 (match_test "<SWI48:MODE>mode == DImode"))
5965 (const_string "1")
5966 (const_string "*")))
5967 (set_attr "unit" "i387,*,*")
5968 (set_attr "athlon_decode" "*,double,direct")
5969 (set_attr "amdfam10_decode" "*,vector,double")
5970 (set_attr "bdver1_decode" "*,double,direct")
5971 (set_attr "znver1_decode" "double,*,*")
5972 (set_attr "fp_int_src" "true")
5973 (set (attr "enabled")
5974 (if_then_else
5975 (match_test ("SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"))
5976 (if_then_else
5977 (eq_attr "alternative" "0")
5978 (symbol_ref "TARGET_MIX_SSE_I387
5979 && X87_ENABLE_FLOAT (<MODEF:MODE>mode,
5980 <SWI48:MODE>mode)")
5981 (symbol_ref "true"))
5982 (if_then_else
5983 (eq_attr "alternative" "0")
5984 (symbol_ref "true")
5985 (symbol_ref "false"))))
5986 (set (attr "preferred_for_speed")
5987 (cond [(eq_attr "alternative" "1")
5988 (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS")]
5989 (symbol_ref "true")))])
5990
5991 (define_insn "float<floatunssuffix><mode>hf2"
5992 [(set (match_operand:HF 0 "register_operand" "=v")
5993 (any_float:HF
5994 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
5995 "TARGET_AVX512FP16"
5996 "vcvt<floatsuffix>si2sh<rex64suffix>\t{%1, %d0|%d0, %1}"
5997 [(set_attr "type" "sseicvt")
5998 (set_attr "prefix" "evex")
5999 (set_attr "mode" "HF")])
6000
6001 (define_insn "*floatdi<MODEF:mode>2_i387"
6002 [(set (match_operand:MODEF 0 "register_operand" "=f")
6003 (float:MODEF (match_operand:DI 1 "nonimmediate_operand" "m")))]
6004 "!TARGET_64BIT
6005 && TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, DImode)"
6006 "fild%Z1\t%1"
6007 [(set_attr "type" "fmov")
6008 (set_attr "mode" "<MODEF:MODE>")
6009 (set_attr "znver1_decode" "double")
6010 (set_attr "fp_int_src" "true")])
6011
6012 ;; Try TARGET_USE_VECTOR_CONVERTS, but not so hard as to require extra memory
6013 ;; slots when !TARGET_INTER_UNIT_MOVES_TO_VEC disables the general_regs
6014 ;; alternative in sse2_loadld.
6015 (define_split
6016 [(set (match_operand:MODEF 0 "sse_reg_operand")
6017 (float:MODEF (match_operand:SI 1 "nonimmediate_operand")))]
6018 "TARGET_SSE2
6019 && TARGET_USE_VECTOR_CONVERTS
6020 && optimize_function_for_speed_p (cfun)
6021 && reload_completed
6022 && (MEM_P (operands[1]) || TARGET_INTER_UNIT_MOVES_TO_VEC)
6023 && (!EXT_REX_SSE_REG_P (operands[0])
6024 || TARGET_AVX512VL)"
6025 [(const_int 0)]
6026 {
6027 operands[3] = lowpart_subreg (<ssevecmode>mode, operands[0], <MODE>mode);
6028 operands[4] = lowpart_subreg (V4SImode, operands[0], <MODE>mode);
6029
6030 emit_insn (gen_sse2_loadld (operands[4],
6031 CONST0_RTX (V4SImode), operands[1]));
6032
6033 if (<ssevecmode>mode == V4SFmode)
6034 emit_insn (gen_floatv4siv4sf2 (operands[3], operands[4]));
6035 else
6036 emit_insn (gen_sse2_cvtdq2pd (operands[3], operands[4]));
6037 DONE;
6038 })
6039
6040 ;; Avoid store forwarding (partial memory) stall penalty
6041 ;; by passing DImode value through XMM registers. */
6042
6043 (define_split
6044 [(set (match_operand:X87MODEF 0 "register_operand")
6045 (float:X87MODEF
6046 (match_operand:DI 1 "register_operand")))]
6047 "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC
6048 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
6049 && TARGET_SSE2 && optimize_function_for_speed_p (cfun)
6050 && can_create_pseudo_p ()"
6051 [(const_int 0)]
6052 {
6053 rtx s = assign_386_stack_local (DImode, SLOT_FLOATxFDI_387);
6054 emit_insn (gen_floatdi<mode>2_i387_with_xmm (operands[0], operands[1], s));
6055 DONE;
6056 })
6057
6058 (define_insn_and_split "floatdi<X87MODEF:mode>2_i387_with_xmm"
6059 [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
6060 (float:X87MODEF
6061 (match_operand:DI 1 "register_operand" "r,r")))
6062 (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
6063 (clobber (match_scratch:V4SI 3 "=x,x"))
6064 (clobber (match_scratch:V4SI 4 "=X,x"))]
6065 "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC
6066 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
6067 && TARGET_SSE2 && optimize_function_for_speed_p (cfun)"
6068 "#"
6069 "&& reload_completed"
6070 [(set (match_dup 2) (match_dup 3))
6071 (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
6072 {
6073 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
6074 Assemble the 64-bit DImode value in an xmm register. */
6075 emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
6076 gen_lowpart (SImode, operands[1])));
6077 if (TARGET_SSE4_1)
6078 emit_insn (gen_sse4_1_pinsrd (operands[3], operands[3],
6079 gen_highpart (SImode, operands[1]),
6080 GEN_INT (2)));
6081 else
6082 {
6083 emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
6084 gen_highpart (SImode, operands[1])));
6085 emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3],
6086 operands[4]));
6087 }
6088 operands[3] = gen_lowpart (DImode, operands[3]);
6089 }
6090 [(set_attr "isa" "sse4,*")
6091 (set_attr "type" "multi")
6092 (set_attr "mode" "<X87MODEF:MODE>")
6093 (set_attr "unit" "i387")
6094 (set_attr "fp_int_src" "true")])
6095
6096 ;; Break partial SSE register dependency stall. This splitter should split
6097 ;; late in the pass sequence (after register rename pass), so allocated
6098 ;; registers won't change anymore
6099
6100 (define_split
6101 [(set (match_operand:MODEF 0 "sse_reg_operand")
6102 (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
6103 "!TARGET_AVX
6104 && TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY
6105 && epilogue_completed
6106 && optimize_function_for_speed_p (cfun)
6107 && (!EXT_REX_SSE_REG_P (operands[0])
6108 || TARGET_AVX512VL)"
6109 [(set (match_dup 0)
6110 (vec_merge:<MODEF:ssevecmode>
6111 (vec_duplicate:<MODEF:ssevecmode>
6112 (float:MODEF
6113 (match_dup 1)))
6114 (match_dup 0)
6115 (const_int 1)))]
6116 {
6117 const machine_mode vmode = <MODEF:ssevecmode>mode;
6118
6119 operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
6120 emit_move_insn (operands[0], CONST0_RTX (vmode));
6121 })
6122
6123 (define_expand "floatuns<SWI12:mode><MODEF:mode>2"
6124 [(set (match_operand:MODEF 0 "register_operand")
6125 (unsigned_float:MODEF
6126 (match_operand:SWI12 1 "nonimmediate_operand")))]
6127 "!TARGET_64BIT
6128 && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"
6129 {
6130 operands[1] = convert_to_mode (SImode, operands[1], 1);
6131 emit_insn (gen_floatsi<MODEF:mode>2 (operands[0], operands[1]));
6132 DONE;
6133 })
6134
6135 (define_insn "*floatuns<SWI48:mode><MODEF:mode>2_avx512"
6136 [(set (match_operand:MODEF 0 "register_operand" "=v")
6137 (unsigned_float:MODEF
6138 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
6139 "TARGET_AVX512F && TARGET_SSE_MATH"
6140 "vcvtusi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %0, %0|%0, %0, %1}"
6141 [(set_attr "type" "sseicvt")
6142 (set_attr "avx_partial_xmm_update" "true")
6143 (set_attr "prefix" "evex")
6144 (set_attr "mode" "<MODEF:MODE>")])
6145
6146 ;; Avoid store forwarding (partial memory) stall penalty by extending
6147 ;; SImode value to DImode through XMM register instead of pushing two
6148 ;; SImode values to stack. Also note that fild loads from memory only.
6149
6150 (define_insn_and_split "floatunssi<mode>2_i387_with_xmm"
6151 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
6152 (unsigned_float:X87MODEF
6153 (match_operand:SI 1 "nonimmediate_operand" "rm")))
6154 (clobber (match_operand:DI 2 "memory_operand" "=m"))
6155 (clobber (match_scratch:DI 3 "=x"))]
6156 "!TARGET_64BIT
6157 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
6158 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
6159 "#"
6160 "&& reload_completed"
6161 [(set (match_dup 3) (zero_extend:DI (match_dup 1)))
6162 (set (match_dup 2) (match_dup 3))
6163 (set (match_dup 0)
6164 (float:X87MODEF (match_dup 2)))]
6165 ""
6166 [(set_attr "type" "multi")
6167 (set_attr "mode" "<MODE>")])
6168
6169 (define_expand "floatunssi<mode>2"
6170 [(set (match_operand:X87MODEF 0 "register_operand")
6171 (unsigned_float:X87MODEF
6172 (match_operand:SI 1 "nonimmediate_operand")))]
6173 "(!TARGET_64BIT
6174 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
6175 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC)
6176 || ((!TARGET_64BIT || TARGET_AVX512F)
6177 && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
6178 {
6179 if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
6180 {
6181 emit_insn (gen_floatunssi<mode>2_i387_with_xmm
6182 (operands[0], operands[1],
6183 assign_386_stack_local (DImode, SLOT_TEMP)));
6184 DONE;
6185 }
6186 if (!TARGET_AVX512F)
6187 {
6188 ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
6189 DONE;
6190 }
6191 })
6192
6193 (define_expand "floatunsdisf2"
6194 [(set (match_operand:SF 0 "register_operand")
6195 (unsigned_float:SF
6196 (match_operand:DI 1 "nonimmediate_operand")))]
6197 "TARGET_64BIT && TARGET_SSE && TARGET_SSE_MATH"
6198 {
6199 if (!TARGET_AVX512F)
6200 {
6201 x86_emit_floatuns (operands);
6202 DONE;
6203 }
6204 })
6205
6206 (define_expand "floatunsdidf2"
6207 [(set (match_operand:DF 0 "register_operand")
6208 (unsigned_float:DF
6209 (match_operand:DI 1 "nonimmediate_operand")))]
6210 "((TARGET_64BIT && TARGET_AVX512F)
6211 || TARGET_KEEPS_VECTOR_ALIGNED_STACK)
6212 && TARGET_SSE2 && TARGET_SSE_MATH"
6213 {
6214 if (!TARGET_64BIT)
6215 {
6216 ix86_expand_convert_uns_didf_sse (operands[0], operands[1]);
6217 DONE;
6218 }
6219 if (!TARGET_AVX512F)
6220 {
6221 x86_emit_floatuns (operands);
6222 DONE;
6223 }
6224 })
6225 \f
6226 ;; Load effective address instructions
6227
6228 (define_insn "*lea<mode>"
6229 [(set (match_operand:SWI48 0 "register_operand" "=r")
6230 (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
6231 "ix86_hardreg_mov_ok (operands[0], operands[1])"
6232 {
6233 if (SImode_address_operand (operands[1], VOIDmode))
6234 {
6235 gcc_assert (TARGET_64BIT);
6236 return "lea{l}\t{%E1, %k0|%k0, %E1}";
6237 }
6238 else
6239 return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
6240 }
6241 [(set_attr "type" "lea")
6242 (set (attr "mode")
6243 (if_then_else
6244 (match_operand 1 "SImode_address_operand")
6245 (const_string "SI")
6246 (const_string "<MODE>")))])
6247
6248 (define_peephole2
6249 [(set (match_operand:SWI48 0 "register_operand")
6250 (match_operand:SWI48 1 "address_no_seg_operand"))]
6251 "ix86_hardreg_mov_ok (operands[0], operands[1])
6252 && peep2_regno_dead_p (0, FLAGS_REG)
6253 && ix86_avoid_lea_for_addr (peep2_next_insn (0), operands)"
6254 [(const_int 0)]
6255 {
6256 machine_mode mode = <MODE>mode;
6257
6258 /* Emit all operations in SImode for zero-extended addresses. */
6259 if (SImode_address_operand (operands[1], VOIDmode))
6260 mode = SImode;
6261
6262 ix86_split_lea_for_addr (peep2_next_insn (0), operands, mode);
6263
6264 /* Zero-extend return register to DImode for zero-extended addresses. */
6265 if (mode != <MODE>mode)
6266 emit_insn (gen_zero_extendsidi2 (operands[0],
6267 gen_lowpart (mode, operands[0])));
6268
6269 DONE;
6270 })
6271
6272 ;; ix86_split_lea_for_addr emits the shifts as MULT to avoid it from being
6273 ;; peephole2 optimized back into a lea. Split that into the shift during
6274 ;; the following split pass.
6275 (define_split
6276 [(set (match_operand:SWI48 0 "general_reg_operand")
6277 (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))
6278 (clobber (reg:CC FLAGS_REG))]
6279 "reload_completed"
6280 [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
6281 (clobber (reg:CC FLAGS_REG))])]
6282 "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
6283 \f
6284 ;; Add instructions
6285
6286 (define_expand "add<mode>3"
6287 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
6288 (plus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
6289 (match_operand:SDWIM 2 "<general_hilo_operand>")))]
6290 ""
6291 {
6292 ix86_expand_binary_operator (PLUS, <MODE>mode, operands, TARGET_APX_NDD);
6293 DONE;
6294 })
6295
6296 (define_insn_and_split "*add<dwi>3_doubleword"
6297 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
6298 (plus:<DWI>
6299 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r")
6300 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,r")))
6301 (clobber (reg:CC FLAGS_REG))]
6302 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands, TARGET_APX_NDD)"
6303 "#"
6304 "&& reload_completed"
6305 [(parallel [(set (reg:CCC FLAGS_REG)
6306 (compare:CCC
6307 (plus:DWIH (match_dup 1) (match_dup 2))
6308 (match_dup 1)))
6309 (set (match_dup 0)
6310 (plus:DWIH (match_dup 1) (match_dup 2)))])
6311 (parallel [(set (match_dup 3)
6312 (plus:DWIH
6313 (plus:DWIH
6314 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6315 (match_dup 4))
6316 (match_dup 5)))
6317 (clobber (reg:CC FLAGS_REG))])]
6318 {
6319 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
6320 if (operands[2] == const0_rtx)
6321 {
6322 /* Under NDD op0 and op1 may not equal, do not delete insn then. */
6323 bool emit_insn_deleted_note_p = true;
6324 if (!rtx_equal_p (operands[0], operands[1]))
6325 {
6326 emit_move_insn (operands[0], operands[1]);
6327 emit_insn_deleted_note_p = false;
6328 }
6329 if (operands[5] != const0_rtx)
6330 ix86_expand_binary_operator (PLUS, <MODE>mode, &operands[3],
6331 TARGET_APX_NDD);
6332 else if (!rtx_equal_p (operands[3], operands[4]))
6333 emit_move_insn (operands[3], operands[4]);
6334 else if (emit_insn_deleted_note_p)
6335 emit_note (NOTE_INSN_DELETED);
6336 DONE;
6337 }
6338 }
6339 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
6340
6341 (define_insn_and_split "*add<dwi>3_doubleword_zext"
6342 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,&r,&r")
6343 (plus:<DWI>
6344 (zero_extend:<DWI>
6345 (match_operand:DWIH 2 "nonimmediate_operand" "rm,r,rm,r"))
6346 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,r,m")))
6347 (clobber (reg:CC FLAGS_REG))]
6348 "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands, TARGET_APX_NDD)"
6349 "#"
6350 "&& reload_completed"
6351 [(parallel [(set (reg:CCC FLAGS_REG)
6352 (compare:CCC
6353 (plus:DWIH (match_dup 1) (match_dup 2))
6354 (match_dup 1)))
6355 (set (match_dup 0)
6356 (plus:DWIH (match_dup 1) (match_dup 2)))])
6357 (parallel [(set (match_dup 3)
6358 (plus:DWIH
6359 (plus:DWIH
6360 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6361 (match_dup 4))
6362 (const_int 0)))
6363 (clobber (reg:CC FLAGS_REG))])]
6364 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);"
6365 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
6366
6367 (define_insn_and_split "*add<dwi>3_doubleword_concat"
6368 [(set (match_operand:<DWI> 0 "register_operand" "=&r")
6369 (plus:<DWI>
6370 (any_or_plus:<DWI>
6371 (ashift:<DWI>
6372 (zero_extend:<DWI>
6373 (match_operand:DWIH 2 "nonimmediate_operand" "rm"))
6374 (match_operand:QI 3 "const_int_operand"))
6375 (zero_extend:<DWI>
6376 (match_operand:DWIH 4 "nonimmediate_operand" "rm")))
6377 (match_operand:<DWI> 1 "register_operand" "0")))
6378 (clobber (reg:CC FLAGS_REG))]
6379 "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
6380 "#"
6381 "&& reload_completed"
6382 [(parallel [(set (reg:CCC FLAGS_REG)
6383 (compare:CCC
6384 (plus:DWIH (match_dup 1) (match_dup 4))
6385 (match_dup 1)))
6386 (set (match_dup 0)
6387 (plus:DWIH (match_dup 1) (match_dup 4)))])
6388 (parallel [(set (match_dup 5)
6389 (plus:DWIH
6390 (plus:DWIH
6391 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6392 (match_dup 6))
6393 (match_dup 2)))
6394 (clobber (reg:CC FLAGS_REG))])]
6395 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[5]);")
6396
6397 (define_insn_and_split "*add<dwi>3_doubleword_concat_zext"
6398 [(set (match_operand:<DWI> 0 "register_operand" "=&r")
6399 (plus:<DWI>
6400 (any_or_plus:<DWI>
6401 (ashift:<DWI>
6402 (zero_extend:<DWI>
6403 (match_operand:DWIH 2 "nonimmediate_operand" "rm"))
6404 (match_operand:QI 3 "const_int_operand"))
6405 (zero_extend:<DWI>
6406 (match_operand:DWIH 4 "nonimmediate_operand" "rm")))
6407 (zero_extend:<DWI>
6408 (match_operand:DWIH 1 "nonimmediate_operand" "rm"))))
6409 (clobber (reg:CC FLAGS_REG))]
6410 "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
6411 "#"
6412 "&& reload_completed"
6413 [(set (match_dup 0) (match_dup 4))
6414 (set (match_dup 5) (match_dup 2))
6415 (parallel [(set (reg:CCC FLAGS_REG)
6416 (compare:CCC
6417 (plus:DWIH (match_dup 0) (match_dup 1))
6418 (match_dup 0)))
6419 (set (match_dup 0)
6420 (plus:DWIH (match_dup 0) (match_dup 1)))])
6421 (parallel [(set (match_dup 5)
6422 (plus:DWIH
6423 (plus:DWIH
6424 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6425 (match_dup 5))
6426 (const_int 0)))
6427 (clobber (reg:CC FLAGS_REG))])]
6428 "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);")
6429
6430 (define_insn "*add<mode>_1"
6431 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r,r")
6432 (plus:SWI48
6433 (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rm,r,m,r")
6434 (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,je,BM")))
6435 (clobber (reg:CC FLAGS_REG))]
6436 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
6437 {
6438 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6439 switch (get_attr_type (insn))
6440 {
6441 case TYPE_LEA:
6442 return "#";
6443
6444 case TYPE_INCDEC:
6445 if (operands[2] == const1_rtx)
6446 return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
6447 : "inc{<imodesuffix>}\t%0";
6448 else
6449 {
6450 gcc_assert (operands[2] == constm1_rtx);
6451 return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
6452 : "dec{<imodesuffix>}\t%0";
6453 }
6454
6455 default:
6456 /* For most processors, ADD is faster than LEA. This alternative
6457 was added to use ADD as much as possible. */
6458 if (which_alternative == 2)
6459 std::swap (operands[1], operands[2]);
6460
6461 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6462 return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
6463 : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6464
6465 return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
6466 : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6467 }
6468 }
6469 [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd")
6470 (set (attr "type")
6471 (cond [(eq_attr "alternative" "3")
6472 (const_string "lea")
6473 (match_operand:SWI48 2 "incdec_operand")
6474 (const_string "incdec")
6475 ]
6476 (const_string "alu")))
6477 (set (attr "length_immediate")
6478 (if_then_else
6479 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6480 (const_string "1")
6481 (const_string "*")))
6482 (set_attr "mode" "<MODE>")])
6483
6484 ;; It may seem that nonimmediate operand is proper one for operand 1.
6485 ;; The addsi_1 pattern allows nonimmediate operand at that place and
6486 ;; we take care in ix86_binary_operator_ok to not allow two memory
6487 ;; operands so proper swapping will be done in reload. This allow
6488 ;; patterns constructed from addsi_1 to match.
6489
6490 (define_insn "addsi_1_zext"
6491 [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r")
6492 (zero_extend:DI
6493 (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,r,rm")
6494 (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,le,rBMe,re"))))
6495 (clobber (reg:CC FLAGS_REG))]
6496 "TARGET_64BIT
6497 && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
6498 {
6499 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6500 switch (get_attr_type (insn))
6501 {
6502 case TYPE_LEA:
6503 return "#";
6504
6505 case TYPE_INCDEC:
6506 if (operands[2] == const1_rtx)
6507 return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}"
6508 : "inc{l}\t%k0";
6509 else
6510 {
6511 gcc_assert (operands[2] == constm1_rtx);
6512 return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}"
6513 : "dec{l}\t%k0";
6514 }
6515
6516 default:
6517 /* For most processors, ADD is faster than LEA. This alternative
6518 was added to use ADD as much as possible. */
6519 if (which_alternative == 1)
6520 std::swap (operands[1], operands[2]);
6521
6522 if (x86_maybe_negate_const_int (&operands[2], SImode))
6523 return use_ndd ? "sub{l}\t{%2 ,%1, %k0|%k0, %1, %2}"
6524 : "sub{l}\t{%2, %k0|%k0, %2}";
6525
6526 return use_ndd ? "add{l}\t{%2 ,%1, %k0|%k0, %1, %2}"
6527 : "add{l}\t{%2, %k0|%k0, %2}";
6528 }
6529 }
6530 [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd")
6531 (set (attr "type")
6532 (cond [(eq_attr "alternative" "2")
6533 (const_string "lea")
6534 (match_operand:SI 2 "incdec_operand")
6535 (const_string "incdec")
6536 ]
6537 (const_string "alu")))
6538 (set (attr "length_immediate")
6539 (if_then_else
6540 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6541 (const_string "1")
6542 (const_string "*")))
6543 (set_attr "mode" "SI")])
6544
6545 (define_insn "*addhi_1"
6546 [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r")
6547 (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r")
6548 (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))
6549 (clobber (reg:CC FLAGS_REG))]
6550 "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)"
6551 {
6552 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6553 switch (get_attr_type (insn))
6554 {
6555 case TYPE_LEA:
6556 return "#";
6557
6558 case TYPE_INCDEC:
6559 if (operands[2] == const1_rtx)
6560 return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0";
6561 else
6562 {
6563 gcc_assert (operands[2] == constm1_rtx);
6564 return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0";
6565 }
6566
6567 default:
6568 /* For most processors, ADD is faster than LEA. This alternative
6569 was added to use ADD as much as possible. */
6570 if (which_alternative == 2)
6571 std::swap (operands[1], operands[2]);
6572
6573 if (x86_maybe_negate_const_int (&operands[2], HImode))
6574 return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}"
6575 : "sub{w}\t{%2, %0|%0, %2}";
6576
6577 return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}"
6578 : "add{w}\t{%2, %0|%0, %2}";
6579 }
6580 }
6581 [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
6582 (set (attr "type")
6583 (cond [(eq_attr "alternative" "3")
6584 (const_string "lea")
6585 (match_operand:HI 2 "incdec_operand")
6586 (const_string "incdec")
6587 ]
6588 (const_string "alu")))
6589 (set (attr "length_immediate")
6590 (if_then_else
6591 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6592 (const_string "1")
6593 (const_string "*")))
6594 (set_attr "mode" "HI,HI,HI,SI,HI,HI")])
6595
6596 (define_insn "*addqi_1"
6597 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r")
6598 (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r")
6599 (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))
6600 (clobber (reg:CC FLAGS_REG))]
6601 "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)"
6602 {
6603 bool widen = (get_attr_mode (insn) != MODE_QI);
6604 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6605 switch (get_attr_type (insn))
6606 {
6607 case TYPE_LEA:
6608 return "#";
6609
6610 case TYPE_INCDEC:
6611 if (operands[2] == const1_rtx)
6612 if (use_ndd)
6613 return "inc{b}\t{%1, %0|%0, %1}";
6614 else
6615 return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
6616 else
6617 {
6618 gcc_assert (operands[2] == constm1_rtx);
6619 if (use_ndd)
6620 return "dec{b}\t{%1, %0|%0, %1}";
6621 else
6622 return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
6623 }
6624
6625 default:
6626 /* For most processors, ADD is faster than LEA. These alternatives
6627 were added to use ADD as much as possible. */
6628 if (which_alternative == 2 || which_alternative == 4)
6629 std::swap (operands[1], operands[2]);
6630
6631 if (x86_maybe_negate_const_int (&operands[2], QImode))
6632 {
6633 if (use_ndd)
6634 return "sub{b}\t{%2, %1, %0|%0, %1, %2}";
6635 else
6636 return widen ? "sub{l}\t{%2, %k0|%k0, %2}"
6637 : "sub{b}\t{%2, %0|%0, %2}";
6638 }
6639 if (use_ndd)
6640 return "add{b}\t{%2, %1, %0|%0, %1, %2}";
6641 else
6642 return widen ? "add{l}\t{%k2, %k0|%k0, %k2}"
6643 : "add{b}\t{%2, %0|%0, %2}";
6644 }
6645 }
6646 [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd")
6647 (set (attr "type")
6648 (cond [(eq_attr "alternative" "5")
6649 (const_string "lea")
6650 (match_operand:QI 2 "incdec_operand")
6651 (const_string "incdec")
6652 ]
6653 (const_string "alu")))
6654 (set (attr "length_immediate")
6655 (if_then_else
6656 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6657 (const_string "1")
6658 (const_string "*")))
6659 (set_attr "mode" "QI,QI,QI,SI,SI,SI,QI,QI")
6660 ;; Potential partial reg stall on alternatives 3 and 4.
6661 (set (attr "preferred_for_speed")
6662 (cond [(eq_attr "alternative" "3,4")
6663 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
6664 (symbol_ref "true")))])
6665
6666 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
6667 (define_insn_and_split "*add<mode>_1_slp"
6668 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
6669 (plus:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
6670 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
6671 (clobber (reg:CC FLAGS_REG))]
6672 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6673 {
6674 if (which_alternative)
6675 return "#";
6676
6677 switch (get_attr_type (insn))
6678 {
6679 case TYPE_INCDEC:
6680 if (operands[2] == const1_rtx)
6681 return "inc{<imodesuffix>}\t%0";
6682 else
6683 {
6684 gcc_assert (operands[2] == constm1_rtx);
6685 return "dec{<imodesuffix>}\t%0";
6686 }
6687
6688 default:
6689 if (x86_maybe_negate_const_int (&operands[2], QImode))
6690 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6691
6692 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6693 }
6694 }
6695 "&& reload_completed
6696 && !(rtx_equal_p (operands[0], operands[1])
6697 || rtx_equal_p (operands[0], operands[2]))"
6698 [(set (strict_low_part (match_dup 0)) (match_dup 1))
6699 (parallel
6700 [(set (strict_low_part (match_dup 0))
6701 (plus:SWI12 (match_dup 0) (match_dup 2)))
6702 (clobber (reg:CC FLAGS_REG))])]
6703 ""
6704 [(set (attr "type")
6705 (if_then_else (match_operand:QI 2 "incdec_operand")
6706 (const_string "incdec")
6707 (const_string "alu")))
6708 (set_attr "mode" "<MODE>")])
6709
6710 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
6711 (define_insn_and_split "*addqi_ext<mode>_1_slp"
6712 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q"))
6713 (plus:QI
6714 (subreg:QI
6715 (match_operator:SWI248 3 "extract_operator"
6716 [(match_operand 2 "int248_register_operand" "Q,Q")
6717 (const_int 8)
6718 (const_int 8)]) 0)
6719 (match_operand:QI 1 "nonimmediate_operand" "0,!qm")))
6720 (clobber (reg:CC FLAGS_REG))]
6721 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6722 "@
6723 add{b}\t{%h2, %0|%0, %h2}
6724 #"
6725 "&& reload_completed
6726 && !rtx_equal_p (operands[0], operands[1])"
6727 [(set (strict_low_part (match_dup 0)) (match_dup 1))
6728 (parallel
6729 [(set (strict_low_part (match_dup 0))
6730 (plus:QI
6731 (subreg:QI
6732 (match_op_dup 3
6733 [(match_dup 2) (const_int 8) (const_int 8)]) 0)
6734 (match_dup 0)))
6735 (clobber (reg:CC FLAGS_REG))])]
6736 ""
6737 [(set_attr "type" "alu")
6738 (set_attr "mode" "QI")])
6739
6740 (define_insn_and_split "*addqi_ext<mode>_2_slp"
6741 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q"))
6742 (plus:QI
6743 (subreg:QI
6744 (match_operator:SWI248 3 "extract_operator"
6745 [(match_operand 1 "int248_register_operand" "Q")
6746 (const_int 8)
6747 (const_int 8)]) 0)
6748 (subreg:QI
6749 (match_operator:SWI248 4 "extract_operator"
6750 [(match_operand 2 "int248_register_operand" "Q")
6751 (const_int 8)
6752 (const_int 8)]) 0)))
6753 (clobber (reg:CC FLAGS_REG))]
6754 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6755 "#"
6756 "&& reload_completed"
6757 [(set (strict_low_part (match_dup 0))
6758 (subreg:QI
6759 (match_op_dup 4
6760 [(match_dup 2) (const_int 8) (const_int 8)]) 0))
6761 (parallel
6762 [(set (strict_low_part (match_dup 0))
6763 (plus:QI
6764 (subreg:QI
6765 (match_op_dup 3
6766 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
6767 (match_dup 0)))
6768 (clobber (reg:CC FLAGS_REG))])]
6769 ""
6770 [(set_attr "type" "alu")
6771 (set_attr "mode" "QI")])
6772
6773 ;; Split non destructive adds if we cannot use lea.
6774 (define_split
6775 [(set (match_operand:SWI48 0 "register_operand")
6776 (plus:SWI48 (match_operand:SWI48 1 "register_operand")
6777 (match_operand:SWI48 2 "x86_64_nonmemory_operand")))
6778 (clobber (reg:CC FLAGS_REG))]
6779 "reload_completed && ix86_avoid_lea_for_add (insn, operands)"
6780 [(set (match_dup 0) (match_dup 1))
6781 (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2)))
6782 (clobber (reg:CC FLAGS_REG))])])
6783
6784 ;; Split non destructive adds if we cannot use lea.
6785 (define_split
6786 [(set (match_operand:DI 0 "register_operand")
6787 (zero_extend:DI
6788 (plus:SI (match_operand:SI 1 "register_operand")
6789 (match_operand:SI 2 "x86_64_nonmemory_operand"))))
6790 (clobber (reg:CC FLAGS_REG))]
6791 "TARGET_64BIT
6792 && reload_completed && ix86_avoid_lea_for_add (insn, operands)"
6793 [(set (match_dup 3) (match_dup 1))
6794 (parallel [(set (match_dup 0)
6795 (zero_extend:DI (plus:SI (match_dup 3) (match_dup 2))))
6796 (clobber (reg:CC FLAGS_REG))])]
6797 "operands[3] = gen_lowpart (SImode, operands[0]);")
6798
6799 ;; Convert add to the lea pattern to avoid flags dependency.
6800 (define_split
6801 [(set (match_operand:SWI 0 "register_operand")
6802 (plus:SWI (match_operand:SWI 1 "register_operand")
6803 (match_operand:SWI 2 "<nonmemory_operand>")))
6804 (clobber (reg:CC FLAGS_REG))]
6805 "reload_completed && ix86_lea_for_add_ok (insn, operands)"
6806 [(set (match_dup 0)
6807 (plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
6808 {
6809 if (<MODE>mode != <LEAMODE>mode)
6810 {
6811 operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
6812 operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
6813 operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
6814 }
6815 })
6816
6817 ;; Convert add to the lea pattern to avoid flags dependency.
6818 (define_split
6819 [(set (match_operand:DI 0 "register_operand")
6820 (zero_extend:DI
6821 (plus:SI (match_operand:SI 1 "register_operand")
6822 (match_operand:SI 2 "x86_64_nonmemory_operand"))))
6823 (clobber (reg:CC FLAGS_REG))]
6824 "TARGET_64BIT && reload_completed && ix86_lea_for_add_ok (insn, operands)"
6825 [(set (match_dup 0)
6826 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))])
6827
6828 (define_insn "*add<mode>_2"
6829 [(set (reg FLAGS_REG)
6830 (compare
6831 (plus:SWI
6832 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>,rm,r")
6833 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,0,r<i>,<m>"))
6834 (const_int 0)))
6835 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,<r>,r,r")
6836 (plus:SWI (match_dup 1) (match_dup 2)))]
6837 "ix86_match_ccmode (insn, CCGOCmode)
6838 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
6839 {
6840 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6841 switch (get_attr_type (insn))
6842 {
6843 case TYPE_INCDEC:
6844 if (operands[2] == const1_rtx)
6845 return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
6846 : "inc{<imodesuffix>}\t%0";
6847 else
6848 {
6849 gcc_assert (operands[2] == constm1_rtx);
6850 return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
6851 : "dec{<imodesuffix>}\t%0";
6852 }
6853
6854 default:
6855 if (which_alternative == 2)
6856 std::swap (operands[1], operands[2]);
6857
6858 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6859 return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
6860 : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6861
6862 return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
6863 : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6864 }
6865 }
6866 [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd")
6867 (set (attr "type")
6868 (if_then_else (match_operand:SWI 2 "incdec_operand")
6869 (const_string "incdec")
6870 (const_string "alu")))
6871 (set (attr "length_immediate")
6872 (if_then_else
6873 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6874 (const_string "1")
6875 (const_string "*")))
6876 (set_attr "mode" "<MODE>")])
6877
6878 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
6879 (define_insn "*addsi_2_zext"
6880 [(set (reg FLAGS_REG)
6881 (compare
6882 (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,rm")
6883 (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,rBMe,re"))
6884 (const_int 0)))
6885 (set (match_operand:DI 0 "register_operand" "=r,r,r,r")
6886 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
6887 "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
6888 && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
6889 {
6890 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6891 switch (get_attr_type (insn))
6892 {
6893 case TYPE_INCDEC:
6894 if (operands[2] == const1_rtx)
6895 return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}"
6896 : "inc{l}\t%k0";
6897 else
6898 {
6899 gcc_assert (operands[2] == constm1_rtx);
6900 return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}"
6901 : "dec{l}\t%k0";
6902 }
6903
6904 default:
6905 if (which_alternative == 1)
6906 std::swap (operands[1], operands[2]);
6907
6908 if (x86_maybe_negate_const_int (&operands[2], SImode))
6909 return use_ndd ? "sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
6910 : "sub{l}\t{%2, %k0|%k0, %2}";
6911
6912 return use_ndd ? "add{l}\t{%2, %1, %k0|%k0, %1, %2}"
6913 : "add{l}\t{%2, %k0|%k0, %2}";
6914 }
6915 }
6916 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
6917 (set (attr "type")
6918 (if_then_else (match_operand:SI 2 "incdec_operand")
6919 (const_string "incdec")
6920 (const_string "alu")))
6921 (set (attr "length_immediate")
6922 (if_then_else
6923 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6924 (const_string "1")
6925 (const_string "*")))
6926 (set_attr "mode" "SI")])
6927
6928 (define_insn "*add<mode>_3"
6929 [(set (reg FLAGS_REG)
6930 (compare
6931 (neg:SWI (match_operand:SWI 2 "<general_operand>" "<g>,0,<g>,re"))
6932 (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>,r,rm")))
6933 (clobber (match_scratch:SWI 0 "=<r>,<r>,r,r"))]
6934 "ix86_match_ccmode (insn, CCZmode)
6935 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6936 {
6937 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6938 switch (get_attr_type (insn))
6939 {
6940 case TYPE_INCDEC:
6941 if (operands[2] == const1_rtx)
6942 return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
6943 : "inc{<imodesuffix>}\t%0";
6944 else
6945 {
6946 gcc_assert (operands[2] == constm1_rtx);
6947 return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
6948 : "dec{<imodesuffix>}\t%0";
6949 }
6950
6951 default:
6952 if (which_alternative == 1)
6953 std::swap (operands[1], operands[2]);
6954
6955 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6956 return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
6957 : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6958
6959 return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
6960 : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6961 }
6962 }
6963 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
6964 (set (attr "type")
6965 (if_then_else (match_operand:SWI 2 "incdec_operand")
6966 (const_string "incdec")
6967 (const_string "alu")))
6968 (set (attr "length_immediate")
6969 (if_then_else
6970 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6971 (const_string "1")
6972 (const_string "*")))
6973 (set_attr "mode" "<MODE>")])
6974
6975 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
6976 (define_insn "*addsi_3_zext"
6977 [(set (reg FLAGS_REG)
6978 (compare
6979 (neg:SI (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,rBMe,re"))
6980 (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,rm")))
6981 (set (match_operand:DI 0 "register_operand" "=r,r,r,r")
6982 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
6983 "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode)
6984 && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
6985 {
6986 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6987 switch (get_attr_type (insn))
6988 {
6989 case TYPE_INCDEC:
6990 if (operands[2] == const1_rtx)
6991 return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}" : "inc{l}\t%k0";
6992 else
6993 {
6994 gcc_assert (operands[2] == constm1_rtx);
6995 return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}" : "dec{l}\t%k0";
6996 }
6997
6998 default:
6999 if (which_alternative == 1)
7000 std::swap (operands[1], operands[2]);
7001
7002 if (x86_maybe_negate_const_int (&operands[2], SImode))
7003 return use_ndd ? "sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
7004 : "sub{l}\t{%2, %k0|%k0, %2}";
7005
7006 return use_ndd ? "add{l}\t{%2, %1, %k0|%k0, %1, %2}"
7007 : "add{l}\t{%2, %k0|%k0, %2}";
7008 }
7009 }
7010 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7011 (set (attr "type")
7012 (if_then_else (match_operand:SI 2 "incdec_operand")
7013 (const_string "incdec")
7014 (const_string "alu")))
7015 (set (attr "length_immediate")
7016 (if_then_else
7017 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
7018 (const_string "1")
7019 (const_string "*")))
7020 (set_attr "mode" "SI")])
7021
7022 ; For comparisons against 1, -1 and 128, we may generate better code
7023 ; by converting cmp to add, inc or dec as done by peephole2. This pattern
7024 ; is matched then. We can't accept general immediate, because for
7025 ; case of overflows, the result is messed up.
7026 ; Also carry flag is reversed compared to cmp, so this conversion is valid
7027 ; only for comparisons not depending on it.
7028
7029 (define_insn "*adddi_4"
7030 [(set (reg FLAGS_REG)
7031 (compare
7032 (match_operand:DI 1 "nonimmediate_operand" "0,rm")
7033 (match_operand:DI 2 "x86_64_immediate_operand" "e,e")))
7034 (clobber (match_scratch:DI 0 "=r,r"))]
7035 "TARGET_64BIT
7036 && ix86_match_ccmode (insn, CCGCmode)"
7037 {
7038 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
7039 switch (get_attr_type (insn))
7040 {
7041 case TYPE_INCDEC:
7042 if (operands[2] == constm1_rtx)
7043 return use_ndd ? "inc{q}\t{%1, %0|%0, %1}" : "inc{q}\t%0";
7044 else
7045 {
7046 gcc_assert (operands[2] == const1_rtx);
7047 return use_ndd ? "dec{q}\t{%1, %0|%0, %1}" : "dec{q}\t%0";
7048 }
7049
7050 default:
7051 if (x86_maybe_negate_const_int (&operands[2], DImode))
7052 return use_ndd ? "add{q}\t{%2, %1, %0|%0, %1, %2}"
7053 : "add{q}\t{%2, %0|%0, %2}";
7054
7055 return use_ndd ? "sub{q}\t{%2, %1, %0|%0, %1, %2}"
7056 : "sub{q}\t{%2, %0|%0, %2}";
7057 }
7058 }
7059 [(set_attr "isa" "*,apx_ndd")
7060 (set (attr "type")
7061 (if_then_else (match_operand:DI 2 "incdec_operand")
7062 (const_string "incdec")
7063 (const_string "alu")))
7064 (set (attr "length_immediate")
7065 (if_then_else
7066 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
7067 (const_string "1")
7068 (const_string "*")))
7069 (set_attr "mode" "DI")])
7070
7071 ; For comparisons against 1, -1 and 128, we may generate better code
7072 ; by converting cmp to add, inc or dec as done by peephole2. This pattern
7073 ; is matched then. We can't accept general immediate, because for
7074 ; case of overflows, the result is messed up.
7075 ; Also carry flag is reversed compared to cmp, so this conversion is valid
7076 ; only for comparisons not depending on it.
7077
7078 (define_insn "*add<mode>_4"
7079 [(set (reg FLAGS_REG)
7080 (compare
7081 (match_operand:SWI124 1 "nonimmediate_operand" "0,rm")
7082 (match_operand:SWI124 2 "const_int_operand")))
7083 (clobber (match_scratch:SWI124 0 "=<r>,r"))]
7084 "ix86_match_ccmode (insn, CCGCmode)"
7085 {
7086 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
7087 switch (get_attr_type (insn))
7088 {
7089 case TYPE_INCDEC:
7090 if (operands[2] == constm1_rtx)
7091 return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
7092 : "inc{<imodesuffix>}\t%0";
7093 else
7094 {
7095 gcc_assert (operands[2] == const1_rtx);
7096 return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
7097 : "dec{<imodesuffix>}\t%0";
7098 }
7099
7100 default:
7101 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
7102 return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7103 : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
7104
7105 return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7106 : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
7107 }
7108 }
7109 [(set_attr "isa" "*,apx_ndd")
7110 (set (attr "type")
7111 (if_then_else (match_operand:<MODE> 2 "incdec_operand")
7112 (const_string "incdec")
7113 (const_string "alu")))
7114 (set (attr "length_immediate")
7115 (if_then_else
7116 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
7117 (const_string "1")
7118 (const_string "*")))
7119 (set_attr "mode" "<MODE>")])
7120
7121 (define_insn "*add<mode>_5"
7122 [(set (reg FLAGS_REG)
7123 (compare
7124 (plus:SWI
7125 (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>,r,rm")
7126 (match_operand:SWI 2 "<general_operand>" "<g>,0,<g>,re"))
7127 (const_int 0)))
7128 (clobber (match_scratch:SWI 0 "=<r>,<r>,r,r"))]
7129 "ix86_match_ccmode (insn, CCGOCmode)
7130 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7131 {
7132 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
7133 switch (get_attr_type (insn))
7134 {
7135 case TYPE_INCDEC:
7136 if (operands[2] == const1_rtx)
7137 return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
7138 : "inc{<imodesuffix>}\t%0";
7139 else
7140 {
7141 gcc_assert (operands[2] == constm1_rtx);
7142 return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
7143 : "dec{<imodesuffix>}\t%0";
7144 }
7145
7146 default:
7147 if (which_alternative == 1)
7148 std::swap (operands[1], operands[2]);
7149
7150 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
7151 return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7152 : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
7153
7154 return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7155 : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
7156 }
7157 }
7158 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7159 (set (attr "type")
7160 (if_then_else (match_operand:SWI 2 "incdec_operand")
7161 (const_string "incdec")
7162 (const_string "alu")))
7163 (set (attr "length_immediate")
7164 (if_then_else
7165 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
7166 (const_string "1")
7167 (const_string "*")))
7168 (set_attr "mode" "<MODE>")])
7169
7170 (define_insn "*addqi_ext<mode>_0"
7171 [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
7172 (plus:QI
7173 (subreg:QI
7174 (match_operator:SWI248 3 "extract_operator"
7175 [(match_operand 2 "int248_register_operand" "Q")
7176 (const_int 8)
7177 (const_int 8)]) 0)
7178 (match_operand:QI 1 "nonimmediate_operand" "0")))
7179 (clobber (reg:CC FLAGS_REG))]
7180 ""
7181 "add{b}\t{%h2, %0|%0, %h2}"
7182 [(set_attr "addr" "gpr8")
7183 (set_attr "type" "alu")
7184 (set_attr "mode" "QI")])
7185
7186 (define_insn_and_split "*addqi_ext2<mode>_0"
7187 [(set (match_operand:QI 0 "register_operand" "=&Q")
7188 (plus:QI
7189 (subreg:QI
7190 (match_operator:SWI248 3 "extract_operator"
7191 [(match_operand 1 "int248_register_operand" "Q")
7192 (const_int 8)
7193 (const_int 8)]) 0)
7194 (subreg:QI
7195 (match_operator:SWI248 4 "extract_operator"
7196 [(match_operand 2 "int248_register_operand" "Q")
7197 (const_int 8)
7198 (const_int 8)]) 0)))
7199 (clobber (reg:CC FLAGS_REG))]
7200 ""
7201 "#"
7202 "&& reload_completed"
7203 [(set (match_dup 0)
7204 (subreg:QI
7205 (match_op_dup 4
7206 [(match_dup 2) (const_int 8) (const_int 8)]) 0))
7207 (parallel
7208 [(set (match_dup 0)
7209 (plus:QI
7210 (subreg:QI
7211 (match_op_dup 3
7212 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
7213 (match_dup 0)))
7214 (clobber (reg:CC FLAGS_REG))])]
7215 ""
7216 [(set_attr "type" "alu")
7217 (set_attr "mode" "QI")])
7218
7219 (define_expand "addqi_ext_1"
7220 [(parallel
7221 [(set (zero_extract:HI (match_operand:HI 0 "register_operand")
7222 (const_int 8)
7223 (const_int 8))
7224 (subreg:HI
7225 (plus:QI
7226 (subreg:QI
7227 (zero_extract:HI (match_operand:HI 1 "register_operand")
7228 (const_int 8)
7229 (const_int 8)) 0)
7230 (match_operand:QI 2 "const_int_operand")) 0))
7231 (clobber (reg:CC FLAGS_REG))])])
7232
7233 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
7234 (define_insn_and_split "*addqi_ext<mode>_1"
7235 [(set (zero_extract:SWI248
7236 (match_operand 0 "int248_register_operand" "+Q,&Q")
7237 (const_int 8)
7238 (const_int 8))
7239 (subreg:SWI248
7240 (plus:QI
7241 (subreg:QI
7242 (match_operator:SWI248 3 "extract_operator"
7243 [(match_operand 1 "int248_register_operand" "0,!Q")
7244 (const_int 8)
7245 (const_int 8)]) 0)
7246 (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
7247 (clobber (reg:CC FLAGS_REG))]
7248 ""
7249 {
7250 if (which_alternative)
7251 return "#";
7252
7253 switch (get_attr_type (insn))
7254 {
7255 case TYPE_INCDEC:
7256 if (operands[2] == const1_rtx)
7257 return "inc{b}\t%h0";
7258 else
7259 {
7260 gcc_assert (operands[2] == constm1_rtx);
7261 return "dec{b}\t%h0";
7262 }
7263
7264 default:
7265 return "add{b}\t{%2, %h0|%h0, %2}";
7266 }
7267 }
7268 "reload_completed
7269 && !rtx_equal_p (operands[0], operands[1])"
7270 [(set (zero_extract:SWI248
7271 (match_dup 0) (const_int 8) (const_int 8))
7272 (zero_extract:SWI248
7273 (match_dup 1) (const_int 8) (const_int 8)))
7274 (parallel
7275 [(set (zero_extract:SWI248
7276 (match_dup 0) (const_int 8) (const_int 8))
7277 (subreg:SWI248
7278 (plus:QI
7279 (subreg:QI
7280 (match_op_dup 3
7281 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
7282 (match_dup 2)) 0))
7283 (clobber (reg:CC FLAGS_REG))])]
7284 ""
7285 [(set_attr "addr" "gpr8")
7286 (set (attr "type")
7287 (if_then_else (match_operand:QI 2 "incdec_operand")
7288 (const_string "incdec")
7289 (const_string "alu")))
7290 (set_attr "mode" "QI")])
7291
7292 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
7293 (define_insn_and_split "*<insn>qi_ext<mode>_2"
7294 [(set (zero_extract:SWI248
7295 (match_operand 0 "int248_register_operand" "+Q,&Q")
7296 (const_int 8)
7297 (const_int 8))
7298 (subreg:SWI248
7299 (plusminus:QI
7300 (subreg:QI
7301 (match_operator:SWI248 3 "extract_operator"
7302 [(match_operand 1 "int248_register_operand" "<comm>0,!Q")
7303 (const_int 8)
7304 (const_int 8)]) 0)
7305 (subreg:QI
7306 (match_operator:SWI248 4 "extract_operator"
7307 [(match_operand 2 "int248_register_operand" "Q,Q")
7308 (const_int 8)
7309 (const_int 8)]) 0)) 0))
7310 (clobber (reg:CC FLAGS_REG))]
7311 ""
7312 "@
7313 <insn>{b}\t{%h2, %h0|%h0, %h2}
7314 #"
7315 "reload_completed
7316 && !(rtx_equal_p (operands[0], operands[1])
7317 || (<CODE> == PLUS && rtx_equal_p (operands[0], operands[2])))"
7318 [(set (zero_extract:SWI248
7319 (match_dup 0) (const_int 8) (const_int 8))
7320 (zero_extract:SWI248
7321 (match_dup 1) (const_int 8) (const_int 8)))
7322 (parallel
7323 [(set (zero_extract:SWI248
7324 (match_dup 0) (const_int 8) (const_int 8))
7325 (subreg:SWI248
7326 (plusminus:QI
7327 (subreg:QI
7328 (match_op_dup 3
7329 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
7330 (subreg:QI
7331 (match_op_dup 4
7332 [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0))
7333 (clobber (reg:CC FLAGS_REG))])]
7334 ""
7335 [(set_attr "type" "alu")
7336 (set_attr "mode" "QI")])
7337
7338 ;; Like DWI, but use POImode instead of OImode.
7339 (define_mode_attr DPWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "POI")])
7340
7341 ;; Add with jump on overflow.
7342 (define_expand "addv<mode>4"
7343 [(parallel [(set (reg:CCO FLAGS_REG)
7344 (eq:CCO
7345 (plus:<DPWI>
7346 (sign_extend:<DPWI>
7347 (match_operand:SWIDWI 1 "nonimmediate_operand"))
7348 (match_dup 4))
7349 (sign_extend:<DPWI>
7350 (plus:SWIDWI (match_dup 1)
7351 (match_operand:SWIDWI 2
7352 "<general_hilo_operand>")))))
7353 (set (match_operand:SWIDWI 0 "register_operand")
7354 (plus:SWIDWI (match_dup 1) (match_dup 2)))])
7355 (set (pc) (if_then_else
7356 (eq (reg:CCO FLAGS_REG) (const_int 0))
7357 (label_ref (match_operand 3))
7358 (pc)))]
7359 ""
7360 {
7361 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
7362 if (CONST_SCALAR_INT_P (operands[2]))
7363 operands[4] = operands[2];
7364 else
7365 operands[4] = gen_rtx_SIGN_EXTEND (<DPWI>mode, operands[2]);
7366 })
7367
7368 (define_insn "*addv<mode>4"
7369 [(set (reg:CCO FLAGS_REG)
7370 (eq:CCO (plus:<DWI>
7371 (sign_extend:<DWI>
7372 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))
7373 (sign_extend:<DWI>
7374 (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m,rWe,m")))
7375 (sign_extend:<DWI>
7376 (plus:SWI (match_dup 1) (match_dup 2)))))
7377 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
7378 (plus:SWI (match_dup 1) (match_dup 2)))]
7379 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
7380 "@
7381 add{<imodesuffix>}\t{%2, %0|%0, %2}
7382 add{<imodesuffix>}\t{%2, %0|%0, %2}
7383 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
7384 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7385 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7386 (set_attr "type" "alu")
7387 (set_attr "mode" "<MODE>")])
7388
7389 (define_insn "addv<mode>4_1"
7390 [(set (reg:CCO FLAGS_REG)
7391 (eq:CCO (plus:<DWI>
7392 (sign_extend:<DWI>
7393 (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
7394 (match_operand:<DWI> 3 "const_int_operand"))
7395 (sign_extend:<DWI>
7396 (plus:SWI
7397 (match_dup 1)
7398 (match_operand:SWI 2 "x86_64_immediate_operand" "<i>,<i>")))))
7399 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
7400 (plus:SWI (match_dup 1) (match_dup 2)))]
7401 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
7402 && CONST_INT_P (operands[2])
7403 && INTVAL (operands[2]) == INTVAL (operands[3])"
7404 "@
7405 add{<imodesuffix>}\t{%2, %0|%0, %2}
7406 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7407 [(set_attr "isa" "*,apx_ndd")
7408 (set_attr "type" "alu")
7409 (set_attr "mode" "<MODE>")
7410 (set (attr "length_immediate")
7411 (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
7412 (const_string "1")
7413 (match_test "<MODE_SIZE> == 8")
7414 (const_string "4")]
7415 (const_string "<MODE_SIZE>")))])
7416
7417 ;; Quad word integer modes as mode attribute.
7418 (define_mode_attr QPWI [(SI "TI") (DI "POI")])
7419
7420 (define_insn_and_split "*addv<dwi>4_doubleword"
7421 [(set (reg:CCO FLAGS_REG)
7422 (eq:CCO
7423 (plus:<QPWI>
7424 (sign_extend:<QPWI>
7425 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r"))
7426 (sign_extend:<QPWI>
7427 (match_operand:<DWI> 2 "nonimmediate_operand" "r,o,r,o")))
7428 (sign_extend:<QPWI>
7429 (plus:<DWI> (match_dup 1) (match_dup 2)))))
7430 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
7431 (plus:<DWI> (match_dup 1) (match_dup 2)))]
7432 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands, TARGET_APX_NDD)"
7433 "#"
7434 "&& reload_completed"
7435 [(parallel [(set (reg:CCC FLAGS_REG)
7436 (compare:CCC
7437 (plus:DWIH (match_dup 1) (match_dup 2))
7438 (match_dup 1)))
7439 (set (match_dup 0)
7440 (plus:DWIH (match_dup 1) (match_dup 2)))])
7441 (parallel [(set (reg:CCO FLAGS_REG)
7442 (eq:CCO
7443 (plus:<DWI>
7444 (plus:<DWI>
7445 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
7446 (sign_extend:<DWI> (match_dup 4)))
7447 (sign_extend:<DWI> (match_dup 5)))
7448 (sign_extend:<DWI>
7449 (plus:DWIH
7450 (plus:DWIH
7451 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
7452 (match_dup 4))
7453 (match_dup 5)))))
7454 (set (match_dup 3)
7455 (plus:DWIH
7456 (plus:DWIH
7457 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
7458 (match_dup 4))
7459 (match_dup 5)))])]
7460 {
7461 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
7462 }
7463 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
7464
7465 (define_insn_and_split "*addv<dwi>4_doubleword_1"
7466 [(set (reg:CCO FLAGS_REG)
7467 (eq:CCO
7468 (plus:<QPWI>
7469 (sign_extend:<QPWI>
7470 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,rm"))
7471 (match_operand:<QPWI> 3 "const_scalar_int_operand" "n,n"))
7472 (sign_extend:<QPWI>
7473 (plus:<DWI>
7474 (match_dup 1)
7475 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>,<di>")))))
7476 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,&r")
7477 (plus:<DWI> (match_dup 1) (match_dup 2)))]
7478 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands, TARGET_APX_NDD)
7479 && CONST_SCALAR_INT_P (operands[2])
7480 && rtx_equal_p (operands[2], operands[3])"
7481 "#"
7482 "&& reload_completed"
7483 [(parallel [(set (reg:CCC FLAGS_REG)
7484 (compare:CCC
7485 (plus:DWIH (match_dup 1) (match_dup 2))
7486 (match_dup 1)))
7487 (set (match_dup 0)
7488 (plus:DWIH (match_dup 1) (match_dup 2)))])
7489 (parallel [(set (reg:CCO FLAGS_REG)
7490 (eq:CCO
7491 (plus:<DWI>
7492 (plus:<DWI>
7493 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
7494 (sign_extend:<DWI> (match_dup 4)))
7495 (match_dup 5))
7496 (sign_extend:<DWI>
7497 (plus:DWIH
7498 (plus:DWIH
7499 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
7500 (match_dup 4))
7501 (match_dup 5)))))
7502 (set (match_dup 3)
7503 (plus:DWIH
7504 (plus:DWIH
7505 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
7506 (match_dup 4))
7507 (match_dup 5)))])]
7508 {
7509 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
7510 if (operands[2] == const0_rtx)
7511 {
7512 if (!rtx_equal_p (operands[0], operands[1]))
7513 emit_move_insn (operands[0], operands[1]);
7514 emit_insn (gen_addv<mode>4_1 (operands[3], operands[4], operands[5],
7515 operands[5]));
7516 DONE;
7517 }
7518 }
7519 [(set_attr "isa" "*,apx_ndd")])
7520
7521 (define_insn "*addv<mode>4_overflow_1"
7522 [(set (reg:CCO FLAGS_REG)
7523 (eq:CCO
7524 (plus:<DWI>
7525 (plus:<DWI>
7526 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
7527 [(match_operand 3 "flags_reg_operand") (const_int 0)])
7528 (sign_extend:<DWI>
7529 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")))
7530 (sign_extend:<DWI>
7531 (match_operand:SWI 2 "<general_sext_operand>" "rWe,m,rWe,m")))
7532 (sign_extend:<DWI>
7533 (plus:SWI
7534 (plus:SWI
7535 (match_operator:SWI 5 "ix86_carry_flag_operator"
7536 [(match_dup 3) (const_int 0)])
7537 (match_dup 1))
7538 (match_dup 2)))))
7539 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r,r,r")
7540 (plus:SWI
7541 (plus:SWI
7542 (match_op_dup 5 [(match_dup 3) (const_int 0)])
7543 (match_dup 1))
7544 (match_dup 2)))]
7545 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
7546 "@
7547 adc{<imodesuffix>}\t{%2, %0|%0, %2}
7548 adc{<imodesuffix>}\t{%2, %0|%0, %2}
7549 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
7550 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7551 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7552 (set_attr "type" "alu")
7553 (set_attr "mode" "<MODE>")])
7554
7555 (define_insn "*addv<mode>4_overflow_2"
7556 [(set (reg:CCO FLAGS_REG)
7557 (eq:CCO
7558 (plus:<DWI>
7559 (plus:<DWI>
7560 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
7561 [(match_operand 3 "flags_reg_operand") (const_int 0)])
7562 (sign_extend:<DWI>
7563 (match_operand:SWI 1 "nonimmediate_operand" "%0,rm")))
7564 (match_operand:<DWI> 6 "const_int_operand" "n,n"))
7565 (sign_extend:<DWI>
7566 (plus:SWI
7567 (plus:SWI
7568 (match_operator:SWI 5 "ix86_carry_flag_operator"
7569 [(match_dup 3) (const_int 0)])
7570 (match_dup 1))
7571 (match_operand:SWI 2 "x86_64_immediate_operand" "e,e")))))
7572 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
7573 (plus:SWI
7574 (plus:SWI
7575 (match_op_dup 5 [(match_dup 3) (const_int 0)])
7576 (match_dup 1))
7577 (match_dup 2)))]
7578 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
7579 && CONST_INT_P (operands[2])
7580 && INTVAL (operands[2]) == INTVAL (operands[6])"
7581 "@
7582 adc{<imodesuffix>}\t{%2, %0|%0, %2}
7583 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7584 [(set_attr "isa" "*,apx_ndd")
7585 (set_attr "type" "alu")
7586 (set_attr "mode" "<MODE>")
7587 (set (attr "length_immediate")
7588 (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
7589 (const_string "1")
7590 (const_string "4")))])
7591
7592 (define_expand "uaddv<mode>4"
7593 [(parallel [(set (reg:CCC FLAGS_REG)
7594 (compare:CCC
7595 (plus:SWIDWI
7596 (match_operand:SWIDWI 1 "nonimmediate_operand")
7597 (match_operand:SWIDWI 2 "<general_hilo_operand>"))
7598 (match_dup 1)))
7599 (set (match_operand:SWIDWI 0 "register_operand")
7600 (plus:SWIDWI (match_dup 1) (match_dup 2)))])
7601 (set (pc) (if_then_else
7602 (ltu (reg:CCC FLAGS_REG) (const_int 0))
7603 (label_ref (match_operand 3))
7604 (pc)))]
7605 ""
7606 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
7607
7608 ;; The lea patterns for modes less than 32 bits need to be matched by
7609 ;; several insns converted to real lea by splitters.
7610
7611 (define_insn_and_split "*lea<mode>_general_1"
7612 [(set (match_operand:SWI12 0 "register_operand" "=r")
7613 (plus:SWI12
7614 (plus:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7615 (match_operand:SWI12 2 "register_operand" "r"))
7616 (match_operand:SWI12 3 "immediate_operand" "i")))]
7617 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7618 "#"
7619 "&& reload_completed"
7620 [(set (match_dup 0)
7621 (plus:SI
7622 (plus:SI (match_dup 1) (match_dup 2))
7623 (match_dup 3)))]
7624 {
7625 operands[0] = gen_lowpart (SImode, operands[0]);
7626 operands[1] = gen_lowpart (SImode, operands[1]);
7627 operands[2] = gen_lowpart (SImode, operands[2]);
7628 operands[3] = gen_lowpart (SImode, operands[3]);
7629 }
7630 [(set_attr "type" "lea")
7631 (set_attr "mode" "SI")])
7632
7633 (define_insn_and_split "*lea<mode>_general_2"
7634 [(set (match_operand:SWI12 0 "register_operand" "=r")
7635 (plus:SWI12
7636 (mult:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7637 (match_operand 2 "const248_operand" "n"))
7638 (match_operand:SWI12 3 "nonmemory_operand" "ri")))]
7639 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7640 "#"
7641 "&& reload_completed"
7642 [(set (match_dup 0)
7643 (plus:SI
7644 (mult:SI (match_dup 1) (match_dup 2))
7645 (match_dup 3)))]
7646 {
7647 operands[0] = gen_lowpart (SImode, operands[0]);
7648 operands[1] = gen_lowpart (SImode, operands[1]);
7649 operands[3] = gen_lowpart (SImode, operands[3]);
7650 }
7651 [(set_attr "type" "lea")
7652 (set_attr "mode" "SI")])
7653
7654 (define_insn_and_split "*lea<mode>_general_2b"
7655 [(set (match_operand:SWI12 0 "register_operand" "=r")
7656 (plus:SWI12
7657 (ashift:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7658 (match_operand 2 "const123_operand" "n"))
7659 (match_operand:SWI12 3 "nonmemory_operand" "ri")))]
7660 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7661 "#"
7662 "&& reload_completed"
7663 [(set (match_dup 0)
7664 (plus:SI
7665 (ashift:SI (match_dup 1) (match_dup 2))
7666 (match_dup 3)))]
7667 {
7668 operands[0] = gen_lowpart (SImode, operands[0]);
7669 operands[1] = gen_lowpart (SImode, operands[1]);
7670 operands[3] = gen_lowpart (SImode, operands[3]);
7671 }
7672 [(set_attr "type" "lea")
7673 (set_attr "mode" "SI")])
7674
7675 (define_insn_and_split "*lea<mode>_general_3"
7676 [(set (match_operand:SWI12 0 "register_operand" "=r")
7677 (plus:SWI12
7678 (plus:SWI12
7679 (mult:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7680 (match_operand 2 "const248_operand" "n"))
7681 (match_operand:SWI12 3 "register_operand" "r"))
7682 (match_operand:SWI12 4 "immediate_operand" "i")))]
7683 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7684 "#"
7685 "&& reload_completed"
7686 [(set (match_dup 0)
7687 (plus:SI
7688 (plus:SI
7689 (mult:SI (match_dup 1) (match_dup 2))
7690 (match_dup 3))
7691 (match_dup 4)))]
7692 {
7693 operands[0] = gen_lowpart (SImode, operands[0]);
7694 operands[1] = gen_lowpart (SImode, operands[1]);
7695 operands[3] = gen_lowpart (SImode, operands[3]);
7696 operands[4] = gen_lowpart (SImode, operands[4]);
7697 }
7698 [(set_attr "type" "lea")
7699 (set_attr "mode" "SI")])
7700
7701 (define_insn_and_split "*lea<mode>_general_3b"
7702 [(set (match_operand:SWI12 0 "register_operand" "=r")
7703 (plus:SWI12
7704 (plus:SWI12
7705 (ashift:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7706 (match_operand 2 "const123_operand" "n"))
7707 (match_operand:SWI12 3 "register_operand" "r"))
7708 (match_operand:SWI12 4 "immediate_operand" "i")))]
7709 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7710 "#"
7711 "&& reload_completed"
7712 [(set (match_dup 0)
7713 (plus:SI
7714 (plus:SI
7715 (ashift:SI (match_dup 1) (match_dup 2))
7716 (match_dup 3))
7717 (match_dup 4)))]
7718 {
7719 operands[0] = gen_lowpart (SImode, operands[0]);
7720 operands[1] = gen_lowpart (SImode, operands[1]);
7721 operands[3] = gen_lowpart (SImode, operands[3]);
7722 operands[4] = gen_lowpart (SImode, operands[4]);
7723 }
7724 [(set_attr "type" "lea")
7725 (set_attr "mode" "SI")])
7726
7727 (define_insn_and_split "*lea<mode>_general_4"
7728 [(set (match_operand:SWI12 0 "register_operand" "=r")
7729 (any_or:SWI12
7730 (ashift:SWI12
7731 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7732 (match_operand 2 "const_0_to_3_operand"))
7733 (match_operand 3 "const_int_operand")))]
7734 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
7735 && ((unsigned HOST_WIDE_INT) INTVAL (operands[3])
7736 < (HOST_WIDE_INT_1U << INTVAL (operands[2])))"
7737 "#"
7738 "&& reload_completed"
7739 [(set (match_dup 0)
7740 (plus:SI
7741 (mult:SI (match_dup 1) (match_dup 2))
7742 (match_dup 3)))]
7743 {
7744 operands[0] = gen_lowpart (SImode, operands[0]);
7745 operands[1] = gen_lowpart (SImode, operands[1]);
7746 operands[2] = GEN_INT (1 << INTVAL (operands[2]));
7747 }
7748 [(set_attr "type" "lea")
7749 (set_attr "mode" "SI")])
7750
7751 (define_insn_and_split "*lea<mode>_general_4"
7752 [(set (match_operand:SWI48 0 "register_operand" "=r")
7753 (any_or:SWI48
7754 (ashift:SWI48
7755 (match_operand:SWI48 1 "register_no_SP_operand" "l")
7756 (match_operand 2 "const_0_to_3_operand"))
7757 (match_operand 3 "const_int_operand")))]
7758 "(unsigned HOST_WIDE_INT) INTVAL (operands[3])
7759 < (HOST_WIDE_INT_1U << INTVAL (operands[2]))"
7760 "#"
7761 "&& reload_completed"
7762 [(set (match_dup 0)
7763 (plus:SWI48
7764 (mult:SWI48 (match_dup 1) (match_dup 2))
7765 (match_dup 3)))]
7766 "operands[2] = GEN_INT (1 << INTVAL (operands[2]));"
7767 [(set_attr "type" "lea")
7768 (set_attr "mode" "<MODE>")])
7769 \f
7770 ;; Subtract instructions
7771
7772 (define_expand "sub<mode>3"
7773 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
7774 (minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
7775 (match_operand:SDWIM 2 "<general_hilo_operand>")))]
7776 ""
7777 {
7778 ix86_expand_binary_operator (MINUS, <MODE>mode, operands, TARGET_APX_NDD);
7779 DONE;
7780 })
7781
7782 (define_insn_and_split "*sub<dwi>3_doubleword"
7783 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
7784 (minus:<DWI>
7785 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,ro,r")
7786 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o")))
7787 (clobber (reg:CC FLAGS_REG))]
7788 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
7789 "#"
7790 "&& reload_completed"
7791 [(parallel [(set (reg:CC FLAGS_REG)
7792 (compare:CC (match_dup 1) (match_dup 2)))
7793 (set (match_dup 0)
7794 (minus:DWIH (match_dup 1) (match_dup 2)))])
7795 (parallel [(set (match_dup 3)
7796 (minus:DWIH
7797 (minus:DWIH
7798 (match_dup 4)
7799 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
7800 (match_dup 5)))
7801 (clobber (reg:CC FLAGS_REG))])]
7802 {
7803 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
7804 if (operands[2] == const0_rtx)
7805 {
7806 if (!rtx_equal_p (operands[0], operands[1]))
7807 emit_move_insn (operands[0], operands[1]);
7808 ix86_expand_binary_operator (MINUS, <MODE>mode, &operands[3],
7809 TARGET_APX_NDD);
7810 DONE;
7811 }
7812 }
7813 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
7814
7815 (define_insn_and_split "*sub<dwi>3_doubleword_zext"
7816 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,&r,&r")
7817 (minus:<DWI>
7818 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,r,o")
7819 (zero_extend:<DWI>
7820 (match_operand:DWIH 2 "nonimmediate_operand" "rm,r,rm,r"))))
7821 (clobber (reg:CC FLAGS_REG))]
7822 "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands, TARGET_APX_NDD)"
7823 "#"
7824 "&& reload_completed"
7825 [(parallel [(set (reg:CC FLAGS_REG)
7826 (compare:CC (match_dup 1) (match_dup 2)))
7827 (set (match_dup 0)
7828 (minus:DWIH (match_dup 1) (match_dup 2)))])
7829 (parallel [(set (match_dup 3)
7830 (minus:DWIH
7831 (minus:DWIH
7832 (match_dup 4)
7833 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
7834 (const_int 0)))
7835 (clobber (reg:CC FLAGS_REG))])]
7836 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);"
7837 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
7838
7839 (define_insn "*sub<mode>_1"
7840 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
7841 (minus:SWI
7842 (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
7843 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
7844 (clobber (reg:CC FLAGS_REG))]
7845 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
7846 "@
7847 sub{<imodesuffix>}\t{%2, %0|%0, %2}
7848 sub{<imodesuffix>}\t{%2, %0|%0, %2}
7849 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
7850 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7851 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7852 (set_attr "type" "alu")
7853 (set_attr "mode" "<MODE>")])
7854
7855 (define_insn "*subsi_1_zext"
7856 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
7857 (zero_extend:DI
7858 (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
7859 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))))
7860 (clobber (reg:CC FLAGS_REG))]
7861 "TARGET_64BIT
7862 && ix86_binary_operator_ok (MINUS, SImode, operands, TARGET_APX_NDD)"
7863 "@
7864 sub{l}\t{%2, %k0|%k0, %2}
7865 sub{l}\t{%2, %1, %k0|%k0, %1, %2}
7866 sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
7867 [(set_attr "isa" "*,apx_ndd,apx_ndd")
7868 (set_attr "type" "alu")
7869 (set_attr "mode" "SI")])
7870
7871 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
7872 (define_insn_and_split "*sub<mode>_1_slp"
7873 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
7874 (minus:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
7875 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
7876 (clobber (reg:CC FLAGS_REG))]
7877 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7878 "@
7879 sub{<imodesuffix>}\t{%2, %0|%0, %2}
7880 #"
7881 "&& reload_completed
7882 && !(rtx_equal_p (operands[0], operands[1]))"
7883 [(set (strict_low_part (match_dup 0)) (match_dup 1))
7884 (parallel
7885 [(set (strict_low_part (match_dup 0))
7886 (minus:SWI12 (match_dup 0) (match_dup 2)))
7887 (clobber (reg:CC FLAGS_REG))])]
7888 ""
7889 [(set_attr "type" "alu")
7890 (set_attr "mode" "<MODE>")])
7891
7892 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
7893 (define_insn_and_split "*subqi_ext<mode>_1_slp"
7894 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q"))
7895 (minus:QI
7896 (match_operand:QI 1 "nonimmediate_operand" "0,!qm")
7897 (subreg:QI
7898 (match_operator:SWI248 3 "extract_operator"
7899 [(match_operand 2 "int248_register_operand" "Q,Q")
7900 (const_int 8)
7901 (const_int 8)]) 0)))
7902 (clobber (reg:CC FLAGS_REG))]
7903 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7904 "@
7905 sub{b}\t{%h2, %0|%0, %h2}
7906 #"
7907 "&& reload_completed
7908 && !rtx_equal_p (operands[0], operands[1])"
7909 [(set (strict_low_part (match_dup 0)) (match_dup 1))
7910 (parallel
7911 [(set (strict_low_part (match_dup 0))
7912 (minus:QI
7913 (match_dup 0)
7914 (subreg:QI
7915 (match_op_dup 3
7916 [(match_dup 2) (const_int 8) (const_int 8)]) 0)))
7917 (clobber (reg:CC FLAGS_REG))])]
7918 ""
7919 [(set_attr "type" "alu")
7920 (set_attr "mode" "QI")])
7921
7922 (define_insn_and_split "*subqi_ext<mode>_2_slp"
7923 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q"))
7924 (minus:QI
7925 (subreg:QI
7926 (match_operator:SWI248 3 "extract_operator"
7927 [(match_operand 1 "int248_register_operand" "Q")
7928 (const_int 8)
7929 (const_int 8)]) 0)
7930 (subreg:QI
7931 (match_operator:SWI248 4 "extract_operator"
7932 [(match_operand 2 "int248_register_operand" "Q")
7933 (const_int 8)
7934 (const_int 8)]) 0)))
7935 (clobber (reg:CC FLAGS_REG))]
7936 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7937 "#"
7938 "&& reload_completed"
7939 [(set (strict_low_part (match_dup 0))
7940 (subreg:QI
7941 (match_op_dup 3
7942 [(match_dup 1) (const_int 8) (const_int 8)]) 0))
7943 (parallel
7944 [(set (strict_low_part (match_dup 0))
7945 (minus:QI
7946 (match_dup 0)
7947 (subreg:QI
7948 (match_op_dup 4
7949 [(match_dup 2) (const_int 8) (const_int 8)]) 0)))
7950 (clobber (reg:CC FLAGS_REG))])]
7951 ""
7952 [(set_attr "type" "alu")
7953 (set_attr "mode" "QI")])
7954
7955 (define_insn "*sub<mode>_2"
7956 [(set (reg FLAGS_REG)
7957 (compare
7958 (minus:SWI
7959 (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
7960 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
7961 (const_int 0)))
7962 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
7963 (minus:SWI (match_dup 1) (match_dup 2)))]
7964 "ix86_match_ccmode (insn, CCGOCmode)
7965 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
7966 "@
7967 sub{<imodesuffix>}\t{%2, %0|%0, %2}
7968 sub{<imodesuffix>}\t{%2, %0|%0, %2}
7969 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
7970 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7971 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7972 (set_attr "type" "alu")
7973 (set_attr "mode" "<MODE>")])
7974
7975 (define_insn "*subsi_2_zext"
7976 [(set (reg FLAGS_REG)
7977 (compare
7978 (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
7979 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))
7980 (const_int 0)))
7981 (set (match_operand:DI 0 "register_operand" "=r,r,r")
7982 (zero_extend:DI
7983 (minus:SI (match_dup 1)
7984 (match_dup 2))))]
7985 "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
7986 && ix86_binary_operator_ok (MINUS, SImode, operands, TARGET_APX_NDD)"
7987 "@
7988 sub{l}\t{%2, %k0|%k0, %2}
7989 sub{l}\t{%2, %1, %k0|%k0, %1, %2}
7990 sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
7991 [(set_attr "isa" "*,apx_ndd,apx_ndd")
7992 (set_attr "type" "alu")
7993 (set_attr "mode" "SI")])
7994
7995 (define_insn "*subqi_ext<mode>_0"
7996 [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
7997 (minus:QI
7998 (match_operand:QI 1 "nonimmediate_operand" "0")
7999 (subreg:QI
8000 (match_operator:SWI248 3 "extract_operator"
8001 [(match_operand 2 "int248_register_operand" "Q")
8002 (const_int 8)
8003 (const_int 8)]) 0)))
8004 (clobber (reg:CC FLAGS_REG))]
8005 ""
8006 "sub{b}\t{%h2, %0|%0, %h2}"
8007 [(set_attr "addr" "gpr8")
8008 (set_attr "type" "alu")
8009 (set_attr "mode" "QI")])
8010
8011 (define_insn_and_split "*subqi_ext2<mode>_0"
8012 [(set (match_operand:QI 0 "register_operand" "=&Q")
8013 (minus:QI
8014 (subreg:QI
8015 (match_operator:SWI248 3 "extract_operator"
8016 [(match_operand 1 "int248_register_operand" "Q")
8017 (const_int 8)
8018 (const_int 8)]) 0)
8019 (subreg:QI
8020 (match_operator:SWI248 4 "extract_operator"
8021 [(match_operand 2 "int248_register_operand" "Q")
8022 (const_int 8)
8023 (const_int 8)]) 0)))
8024 (clobber (reg:CC FLAGS_REG))]
8025 ""
8026 "#"
8027 "&& reload_completed"
8028 [(set (match_dup 0)
8029 (subreg:QI
8030 (match_op_dup 3
8031 [(match_dup 1) (const_int 8) (const_int 8)]) 0))
8032 (parallel
8033 [(set (match_dup 0)
8034 (minus:QI
8035 (match_dup 0)
8036 (subreg:QI
8037 (match_op_dup 4
8038 [(match_dup 2) (const_int 8) (const_int 8)]) 0)))
8039 (clobber (reg:CC FLAGS_REG))])]
8040 ""
8041 [(set_attr "type" "alu")
8042 (set_attr "mode" "QI")])
8043
8044 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
8045 (define_insn_and_split "*subqi_ext<mode>_1"
8046 [(set (zero_extract:SWI248
8047 (match_operand 0 "int248_register_operand" "+Q,&Q")
8048 (const_int 8)
8049 (const_int 8))
8050 (subreg:SWI248
8051 (minus:QI
8052 (subreg:QI
8053 (match_operator:SWI248 3 "extract_operator"
8054 [(match_operand 1 "int248_register_operand" "0,!Q")
8055 (const_int 8)
8056 (const_int 8)]) 0)
8057 (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
8058 (clobber (reg:CC FLAGS_REG))]
8059 ""
8060 "@
8061 sub{b}\t{%2, %h0|%h0, %2}
8062 #"
8063 "reload_completed
8064 && !(rtx_equal_p (operands[0], operands[1]))"
8065 [(set (zero_extract:SWI248
8066 (match_dup 0) (const_int 8) (const_int 8))
8067 (zero_extract:SWI248
8068 (match_dup 1) (const_int 8) (const_int 8)))
8069 (parallel
8070 [(set (zero_extract:SWI248
8071 (match_dup 0) (const_int 8) (const_int 8))
8072 (subreg:SWI248
8073 (minus:QI
8074 (subreg:QI
8075 (match_op_dup 3
8076 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
8077 (match_dup 2)) 0))
8078 (clobber (reg:CC FLAGS_REG))])]
8079 ""
8080 [(set_attr "addr" "gpr8")
8081 (set_attr "type" "alu")
8082 (set_attr "mode" "QI")])
8083
8084 ;; Subtract with jump on overflow.
8085 (define_expand "subv<mode>4"
8086 [(parallel [(set (reg:CCO FLAGS_REG)
8087 (eq:CCO
8088 (minus:<DPWI>
8089 (sign_extend:<DPWI>
8090 (match_operand:SWIDWI 1 "nonimmediate_operand"))
8091 (match_dup 4))
8092 (sign_extend:<DPWI>
8093 (minus:SWIDWI (match_dup 1)
8094 (match_operand:SWIDWI 2
8095 "<general_hilo_operand>")))))
8096 (set (match_operand:SWIDWI 0 "register_operand")
8097 (minus:SWIDWI (match_dup 1) (match_dup 2)))])
8098 (set (pc) (if_then_else
8099 (eq (reg:CCO FLAGS_REG) (const_int 0))
8100 (label_ref (match_operand 3))
8101 (pc)))]
8102 ""
8103 {
8104 ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands,
8105 TARGET_APX_NDD);
8106 if (CONST_SCALAR_INT_P (operands[2]))
8107 operands[4] = operands[2];
8108 else
8109 operands[4] = gen_rtx_SIGN_EXTEND (<DPWI>mode, operands[2]);
8110 })
8111
8112 (define_insn "*subv<mode>4"
8113 [(set (reg:CCO FLAGS_REG)
8114 (eq:CCO (minus:<DWI>
8115 (sign_extend:<DWI>
8116 (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r"))
8117 (sign_extend:<DWI>
8118 (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m,rWe,m")))
8119 (sign_extend:<DWI>
8120 (minus:SWI (match_dup 1) (match_dup 2)))))
8121 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
8122 (minus:SWI (match_dup 1) (match_dup 2)))]
8123 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
8124 "@
8125 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8126 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8127 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8128 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8129 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8130 (set_attr "type" "alu")
8131 (set_attr "mode" "<MODE>")])
8132
8133 (define_insn "subv<mode>4_1"
8134 [(set (reg:CCO FLAGS_REG)
8135 (eq:CCO (minus:<DWI>
8136 (sign_extend:<DWI>
8137 (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
8138 (match_operand:<DWI> 3 "const_int_operand"))
8139 (sign_extend:<DWI>
8140 (minus:SWI
8141 (match_dup 1)
8142 (match_operand:SWI 2 "x86_64_immediate_operand" "<i>,<i>")))))
8143 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
8144 (minus:SWI (match_dup 1) (match_dup 2)))]
8145 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
8146 && CONST_INT_P (operands[2])
8147 && INTVAL (operands[2]) == INTVAL (operands[3])"
8148 "@
8149 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8150 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8151 [(set_attr "isa" "*,apx_ndd")
8152 (set_attr "type" "alu")
8153 (set_attr "mode" "<MODE>")
8154 (set (attr "length_immediate")
8155 (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
8156 (const_string "1")
8157 (match_test "<MODE_SIZE> == 8")
8158 (const_string "4")]
8159 (const_string "<MODE_SIZE>")))])
8160
8161 (define_insn_and_split "*subv<dwi>4_doubleword"
8162 [(set (reg:CCO FLAGS_REG)
8163 (eq:CCO
8164 (minus:<QPWI>
8165 (sign_extend:<QPWI>
8166 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,ro,r"))
8167 (sign_extend:<QPWI>
8168 (match_operand:<DWI> 2 "nonimmediate_operand" "r,o,r,o")))
8169 (sign_extend:<QPWI>
8170 (minus:<DWI> (match_dup 1) (match_dup 2)))))
8171 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
8172 (minus:<DWI> (match_dup 1) (match_dup 2)))]
8173 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
8174 "#"
8175 "&& reload_completed"
8176 [(parallel [(set (reg:CC FLAGS_REG)
8177 (compare:CC (match_dup 1) (match_dup 2)))
8178 (set (match_dup 0)
8179 (minus:DWIH (match_dup 1) (match_dup 2)))])
8180 (parallel [(set (reg:CCO FLAGS_REG)
8181 (eq:CCO
8182 (minus:<DWI>
8183 (minus:<DWI>
8184 (sign_extend:<DWI> (match_dup 4))
8185 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))
8186 (sign_extend:<DWI> (match_dup 5)))
8187 (sign_extend:<DWI>
8188 (minus:DWIH
8189 (minus:DWIH
8190 (match_dup 4)
8191 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
8192 (match_dup 5)))))
8193 (set (match_dup 3)
8194 (minus:DWIH
8195 (minus:DWIH
8196 (match_dup 4)
8197 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
8198 (match_dup 5)))])]
8199 {
8200 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
8201 }
8202 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
8203
8204 (define_insn_and_split "*subv<dwi>4_doubleword_1"
8205 [(set (reg:CCO FLAGS_REG)
8206 (eq:CCO
8207 (minus:<QPWI>
8208 (sign_extend:<QPWI>
8209 (match_operand:<DWI> 1 "nonimmediate_operand" "0,ro"))
8210 (match_operand:<QPWI> 3 "const_scalar_int_operand"))
8211 (sign_extend:<QPWI>
8212 (minus:<DWI>
8213 (match_dup 1)
8214 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>,<di>")))))
8215 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,&r")
8216 (minus:<DWI> (match_dup 1) (match_dup 2)))]
8217 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
8218 && CONST_SCALAR_INT_P (operands[2])
8219 && rtx_equal_p (operands[2], operands[3])"
8220 "#"
8221 "&& reload_completed"
8222 [(parallel [(set (reg:CC FLAGS_REG)
8223 (compare:CC (match_dup 1) (match_dup 2)))
8224 (set (match_dup 0)
8225 (minus:DWIH (match_dup 1) (match_dup 2)))])
8226 (parallel [(set (reg:CCO FLAGS_REG)
8227 (eq:CCO
8228 (minus:<DWI>
8229 (minus:<DWI>
8230 (sign_extend:<DWI> (match_dup 4))
8231 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))
8232 (match_dup 5))
8233 (sign_extend:<DWI>
8234 (minus:DWIH
8235 (minus:DWIH
8236 (match_dup 4)
8237 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
8238 (match_dup 5)))))
8239 (set (match_dup 3)
8240 (minus:DWIH
8241 (minus:DWIH
8242 (match_dup 4)
8243 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
8244 (match_dup 5)))])]
8245 {
8246 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
8247 if (operands[2] == const0_rtx)
8248 {
8249 if (!rtx_equal_p (operands[0], operands[1]))
8250 emit_move_insn (operands[0], operands[1]);
8251 emit_insn (gen_subv<mode>4_1 (operands[3], operands[4], operands[5],
8252 operands[5]));
8253 DONE;
8254 }
8255 }
8256 [(set_attr "isa" "*,apx_ndd")])
8257
8258 (define_insn "*subv<mode>4_overflow_1"
8259 [(set (reg:CCO FLAGS_REG)
8260 (eq:CCO
8261 (minus:<DWI>
8262 (minus:<DWI>
8263 (sign_extend:<DWI>
8264 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))
8265 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
8266 [(match_operand 3 "flags_reg_operand") (const_int 0)]))
8267 (sign_extend:<DWI>
8268 (match_operand:SWI 2 "<general_sext_operand>" "rWe,m,rWe,m")))
8269 (sign_extend:<DWI>
8270 (minus:SWI
8271 (minus:SWI
8272 (match_dup 1)
8273 (match_operator:SWI 5 "ix86_carry_flag_operator"
8274 [(match_dup 3) (const_int 0)]))
8275 (match_dup 2)))))
8276 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r,r,r")
8277 (minus:SWI
8278 (minus:SWI
8279 (match_dup 1)
8280 (match_op_dup 5 [(match_dup 3) (const_int 0)]))
8281 (match_dup 2)))]
8282 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
8283 "@
8284 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
8285 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
8286 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8287 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8288 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8289 (set_attr "type" "alu")
8290 (set_attr "mode" "<MODE>")])
8291
8292 (define_insn "*subv<mode>4_overflow_2"
8293 [(set (reg:CCO FLAGS_REG)
8294 (eq:CCO
8295 (minus:<DWI>
8296 (minus:<DWI>
8297 (sign_extend:<DWI>
8298 (match_operand:SWI 1 "nonimmediate_operand" "%0,rm"))
8299 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
8300 [(match_operand 3 "flags_reg_operand") (const_int 0)]))
8301 (match_operand:<DWI> 6 "const_int_operand" "n,n"))
8302 (sign_extend:<DWI>
8303 (minus:SWI
8304 (minus:SWI
8305 (match_dup 1)
8306 (match_operator:SWI 5 "ix86_carry_flag_operator"
8307 [(match_dup 3) (const_int 0)]))
8308 (match_operand:SWI 2 "x86_64_immediate_operand" "e,e")))))
8309 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
8310 (minus:SWI
8311 (minus:SWI
8312 (match_dup 1)
8313 (match_op_dup 5 [(match_dup 3) (const_int 0)]))
8314 (match_dup 2)))]
8315 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
8316 && CONST_INT_P (operands[2])
8317 && INTVAL (operands[2]) == INTVAL (operands[6])"
8318 "@
8319 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
8320 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8321 [(set_attr "isa" "*,apx_ndd")
8322 (set_attr "type" "alu")
8323 (set_attr "mode" "<MODE>")
8324 (set (attr "length_immediate")
8325 (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
8326 (const_string "1")
8327 (const_string "4")))])
8328
8329 (define_expand "usubv<mode>4"
8330 [(parallel [(set (reg:CC FLAGS_REG)
8331 (compare:CC
8332 (match_operand:SWI 1 "nonimmediate_operand")
8333 (match_operand:SWI 2 "<general_operand>")))
8334 (set (match_operand:SWI 0 "register_operand")
8335 (minus:SWI (match_dup 1) (match_dup 2)))])
8336 (set (pc) (if_then_else
8337 (ltu (reg:CC FLAGS_REG) (const_int 0))
8338 (label_ref (match_operand 3))
8339 (pc)))]
8340 ""
8341 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands,
8342 TARGET_APX_NDD);")
8343
8344 (define_insn "*sub<mode>_3"
8345 [(set (reg FLAGS_REG)
8346 (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
8347 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
8348 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>i,r,r")
8349 (minus:SWI (match_dup 1) (match_dup 2)))]
8350 "ix86_match_ccmode (insn, CCmode)
8351 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
8352 "@
8353 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8354 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8355 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8356 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8357 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8358 (set_attr "type" "alu")
8359 (set_attr "mode" "<MODE>")])
8360
8361 (define_peephole2
8362 [(parallel
8363 [(set (reg:CC FLAGS_REG)
8364 (compare:CC (match_operand:SWI 0 "general_reg_operand")
8365 (match_operand:SWI 1 "general_gr_operand")))
8366 (set (match_dup 0)
8367 (minus:SWI (match_dup 0) (match_dup 1)))])]
8368 "find_regno_note (peep2_next_insn (0), REG_UNUSED, REGNO (operands[0])) != 0"
8369 [(set (reg:CC FLAGS_REG)
8370 (compare:CC (match_dup 0) (match_dup 1)))])
8371
8372 (define_peephole2
8373 [(set (match_operand:SWI 0 "general_reg_operand")
8374 (match_operand:SWI 1 "memory_operand"))
8375 (parallel [(set (reg:CC FLAGS_REG)
8376 (compare:CC (match_dup 0)
8377 (match_operand:SWI 2 "memory_operand")))
8378 (set (match_dup 0)
8379 (minus:SWI (match_dup 0) (match_dup 2)))])
8380 (set (match_dup 1) (match_dup 0))]
8381 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8382 && peep2_reg_dead_p (3, operands[0])
8383 && !reg_overlap_mentioned_p (operands[0], operands[1])
8384 && !reg_overlap_mentioned_p (operands[0], operands[2])"
8385 [(set (match_dup 0) (match_dup 2))
8386 (parallel [(set (reg:CC FLAGS_REG)
8387 (compare:CC (match_dup 1) (match_dup 0)))
8388 (set (match_dup 1)
8389 (minus:SWI (match_dup 1) (match_dup 0)))])])
8390
8391 ;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into
8392 ;; subl $1, %eax; jnc .Lxx;
8393 (define_peephole2
8394 [(parallel
8395 [(set (match_operand:SWI 0 "general_reg_operand")
8396 (plus:SWI (match_dup 0) (const_int -1)))
8397 (clobber (reg FLAGS_REG))])
8398 (set (reg:CCZ FLAGS_REG)
8399 (compare:CCZ (match_dup 0) (const_int -1)))
8400 (set (pc)
8401 (if_then_else (match_operator 1 "bt_comparison_operator"
8402 [(reg:CCZ FLAGS_REG) (const_int 0)])
8403 (match_operand 2)
8404 (pc)))]
8405 "peep2_regno_dead_p (3, FLAGS_REG)"
8406 [(parallel
8407 [(set (reg:CC FLAGS_REG)
8408 (compare:CC (match_dup 0) (const_int 1)))
8409 (set (match_dup 0)
8410 (minus:SWI (match_dup 0) (const_int 1)))])
8411 (set (pc)
8412 (if_then_else (match_dup 3)
8413 (match_dup 2)
8414 (pc)))]
8415 {
8416 rtx cc = gen_rtx_REG (CCmode, FLAGS_REG);
8417 operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == NE
8418 ? GEU : LTU, VOIDmode, cc, const0_rtx);
8419 })
8420
8421 ;; Help combine use borrow flag to test for -1 after dec (add $-1).
8422 (define_insn_and_split "*dec_cmov<mode>"
8423 [(set (match_operand:SWI248 0 "register_operand" "=r")
8424 (if_then_else:SWI248
8425 (match_operator 1 "bt_comparison_operator"
8426 [(match_operand:SWI248 2 "register_operand" "0") (const_int 0)])
8427 (plus:SWI248 (match_dup 2) (const_int -1))
8428 (match_operand:SWI248 3 "nonimmediate_operand" "rm")))
8429 (clobber (reg:CC FLAGS_REG))]
8430 "TARGET_CMOVE"
8431 "#"
8432 "&& reload_completed"
8433 [(parallel [(set (reg:CC FLAGS_REG)
8434 (compare:CC (match_dup 2) (const_int 1)))
8435 (set (match_dup 0) (minus:SWI248 (match_dup 2) (const_int 1)))])
8436 (set (match_dup 0)
8437 (if_then_else:SWI248 (match_dup 4) (match_dup 0) (match_dup 3)))]
8438 {
8439 rtx cc = gen_rtx_REG (CCCmode, FLAGS_REG);
8440 operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == NE
8441 ? GEU : LTU, VOIDmode, cc, const0_rtx);
8442 })
8443
8444 (define_insn "*subsi_3_zext"
8445 [(set (reg FLAGS_REG)
8446 (compare (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
8447 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")))
8448 (set (match_operand:DI 0 "register_operand" "=r,r,r")
8449 (zero_extend:DI
8450 (minus:SI (match_dup 1)
8451 (match_dup 2))))]
8452 "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
8453 && ix86_binary_operator_ok (MINUS, SImode, operands, TARGET_APX_NDD)"
8454 "@
8455 sub{l}\t{%2, %1|%1, %2}
8456 sub{l}\t{%2, %1, %k0|%k0, %1, %2}
8457 sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
8458 [(set_attr "isa" "*,apx_ndd,apx_ndd")
8459 (set_attr "type" "alu")
8460 (set_attr "mode" "SI")])
8461 \f
8462 ;; Add with carry and subtract with borrow
8463
8464 (define_insn "@add<mode>3_carry"
8465 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
8466 (plus:SWI
8467 (plus:SWI
8468 (match_operator:SWI 4 "ix86_carry_flag_operator"
8469 [(match_operand 3 "flags_reg_operand") (const_int 0)])
8470 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))
8471 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
8472 (clobber (reg:CC FLAGS_REG))]
8473 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
8474 "@
8475 adc{<imodesuffix>}\t{%2, %0|%0, %2}
8476 adc{<imodesuffix>}\t{%2, %0|%0, %2}
8477 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8478 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8479 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8480 (set_attr "type" "alu")
8481 (set_attr "use_carry" "1")
8482 (set_attr "pent_pair" "pu")
8483 (set_attr "mode" "<MODE>")])
8484
8485 (define_peephole2
8486 [(set (match_operand:SWI 0 "general_reg_operand")
8487 (match_operand:SWI 1 "memory_operand"))
8488 (parallel [(set (match_dup 0)
8489 (plus:SWI
8490 (plus:SWI
8491 (match_operator:SWI 4 "ix86_carry_flag_operator"
8492 [(match_operand 3 "flags_reg_operand")
8493 (const_int 0)])
8494 (match_dup 0))
8495 (match_operand:SWI 2 "memory_operand")))
8496 (clobber (reg:CC FLAGS_REG))])
8497 (set (match_dup 1) (match_dup 0))]
8498 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8499 && peep2_reg_dead_p (3, operands[0])
8500 && !reg_overlap_mentioned_p (operands[0], operands[1])
8501 && !reg_overlap_mentioned_p (operands[0], operands[2])"
8502 [(set (match_dup 0) (match_dup 2))
8503 (parallel [(set (match_dup 1)
8504 (plus:SWI (plus:SWI (match_op_dup 4
8505 [(match_dup 3) (const_int 0)])
8506 (match_dup 1))
8507 (match_dup 0)))
8508 (clobber (reg:CC FLAGS_REG))])])
8509
8510 (define_peephole2
8511 [(set (match_operand:SWI 0 "general_reg_operand")
8512 (match_operand:SWI 1 "memory_operand"))
8513 (parallel [(set (match_dup 0)
8514 (plus:SWI
8515 (plus:SWI
8516 (match_operator:SWI 4 "ix86_carry_flag_operator"
8517 [(match_operand 3 "flags_reg_operand")
8518 (const_int 0)])
8519 (match_dup 0))
8520 (match_operand:SWI 2 "memory_operand")))
8521 (clobber (reg:CC FLAGS_REG))])
8522 (set (match_operand:SWI 5 "general_reg_operand") (match_dup 0))
8523 (set (match_dup 1) (match_dup 5))]
8524 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8525 && peep2_reg_dead_p (3, operands[0])
8526 && peep2_reg_dead_p (4, operands[5])
8527 && !reg_overlap_mentioned_p (operands[0], operands[1])
8528 && !reg_overlap_mentioned_p (operands[0], operands[2])
8529 && !reg_overlap_mentioned_p (operands[5], operands[1])"
8530 [(set (match_dup 0) (match_dup 2))
8531 (parallel [(set (match_dup 1)
8532 (plus:SWI (plus:SWI (match_op_dup 4
8533 [(match_dup 3) (const_int 0)])
8534 (match_dup 1))
8535 (match_dup 0)))
8536 (clobber (reg:CC FLAGS_REG))])])
8537
8538 (define_insn "*add<mode>3_carry_0"
8539 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
8540 (plus:SWI
8541 (match_operator:SWI 2 "ix86_carry_flag_operator"
8542 [(reg FLAGS_REG) (const_int 0)])
8543 (match_operand:SWI 1 "nonimmediate_operand" "0")))
8544 (clobber (reg:CC FLAGS_REG))]
8545 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
8546 "adc{<imodesuffix>}\t{$0, %0|%0, 0}"
8547 [(set_attr "type" "alu")
8548 (set_attr "use_carry" "1")
8549 (set_attr "pent_pair" "pu")
8550 (set_attr "mode" "<MODE>")])
8551
8552 (define_insn "*add<mode>3_carry_0r"
8553 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
8554 (plus:SWI
8555 (match_operator:SWI 2 "ix86_carry_flag_unset_operator"
8556 [(reg FLAGS_REG) (const_int 0)])
8557 (match_operand:SWI 1 "nonimmediate_operand" "0")))
8558 (clobber (reg:CC FLAGS_REG))]
8559 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
8560 "sbb{<imodesuffix>}\t{$-1, %0|%0, -1}"
8561 [(set_attr "type" "alu")
8562 (set_attr "use_carry" "1")
8563 (set_attr "pent_pair" "pu")
8564 (set_attr "mode" "<MODE>")])
8565
8566 (define_insn "*addsi3_carry_zext"
8567 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
8568 (zero_extend:DI
8569 (plus:SI
8570 (plus:SI (match_operator:SI 3 "ix86_carry_flag_operator"
8571 [(reg FLAGS_REG) (const_int 0)])
8572 (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm"))
8573 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))))
8574 (clobber (reg:CC FLAGS_REG))]
8575 "TARGET_64BIT
8576 && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
8577 "@
8578 adc{l}\t{%2, %k0|%k0, %2}
8579 adc{l}\t{%2, %1, %k0|%k0, %1, %2}
8580 adc{l}\t{%2, %1, %k0|%k0, %1, %2}"
8581 [(set_attr "isa" "*,apx_ndd,apx_ndd")
8582 (set_attr "type" "alu")
8583 (set_attr "use_carry" "1")
8584 (set_attr "pent_pair" "pu")
8585 (set_attr "mode" "SI")])
8586
8587 (define_insn "*addsi3_carry_zext_0"
8588 [(set (match_operand:DI 0 "register_operand" "=r,r")
8589 (zero_extend:DI
8590 (plus:SI (match_operator:SI 2 "ix86_carry_flag_operator"
8591 [(reg FLAGS_REG) (const_int 0)])
8592 (match_operand:SI 1 "nonimmediate_operand" "0,rm"))))
8593 (clobber (reg:CC FLAGS_REG))]
8594 "TARGET_64BIT"
8595 "@
8596 adc{l}\t{$0, %k0|%k0, 0}
8597 adc{l}\t{$0, %1, %k0|%k0, %1, 0}"
8598 [(set_attr "isa" "*,apx_ndd")
8599 (set_attr "type" "alu")
8600 (set_attr "use_carry" "1")
8601 (set_attr "pent_pair" "pu")
8602 (set_attr "mode" "SI")])
8603
8604 (define_insn "*addsi3_carry_zext_0r"
8605 [(set (match_operand:DI 0 "register_operand" "=r,r")
8606 (zero_extend:DI
8607 (plus:SI (match_operator:SI 2 "ix86_carry_flag_unset_operator"
8608 [(reg FLAGS_REG) (const_int 0)])
8609 (match_operand:SI 1 "nonimmediate_operand" "0,rm"))))
8610 (clobber (reg:CC FLAGS_REG))]
8611 "TARGET_64BIT"
8612 "@
8613 sbb{l}\t{$-1, %k0|%k0, -1}
8614 sbb{l}\t{$-1, %1, %k0|%k0, %1, -1}"
8615 [(set_attr "isa" "*,apx_ndd")
8616 (set_attr "type" "alu")
8617 (set_attr "use_carry" "1")
8618 (set_attr "pent_pair" "pu")
8619 (set_attr "mode" "SI")])
8620
8621 ;; There is no point to generate ADCX instruction. ADC is shorter and faster.
8622
8623 (define_insn "addcarry<mode>"
8624 [(set (reg:CCC FLAGS_REG)
8625 (compare:CCC
8626 (zero_extend:<DWI>
8627 (plus:SWI48
8628 (plus:SWI48
8629 (match_operator:SWI48 5 "ix86_carry_flag_operator"
8630 [(match_operand 3 "flags_reg_operand") (const_int 0)])
8631 (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,rm,r"))
8632 (match_operand:SWI48 2 "nonimmediate_operand" "r,rm,r,m")))
8633 (plus:<DWI>
8634 (zero_extend:<DWI> (match_dup 2))
8635 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
8636 [(match_dup 3) (const_int 0)]))))
8637 (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
8638 (plus:SWI48 (plus:SWI48 (match_op_dup 5
8639 [(match_dup 3) (const_int 0)])
8640 (match_dup 1))
8641 (match_dup 2)))]
8642 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
8643 "@
8644 adc{<imodesuffix>}\t{%2, %0|%0, %2}
8645 adc{<imodesuffix>}\t{%2, %0|%0, %2}
8646 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8647 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8648 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8649 (set_attr "type" "alu")
8650 (set_attr "use_carry" "1")
8651 (set_attr "pent_pair" "pu")
8652 (set_attr "mode" "<MODE>")])
8653
8654 (define_peephole2
8655 [(parallel [(set (reg:CCC FLAGS_REG)
8656 (compare:CCC
8657 (zero_extend:<DWI>
8658 (plus:SWI48
8659 (plus:SWI48
8660 (match_operator:SWI48 4 "ix86_carry_flag_operator"
8661 [(match_operand 2 "flags_reg_operand")
8662 (const_int 0)])
8663 (match_operand:SWI48 0 "general_reg_operand"))
8664 (match_operand:SWI48 1 "memory_operand")))
8665 (plus:<DWI>
8666 (zero_extend:<DWI> (match_dup 1))
8667 (match_operator:<DWI> 3 "ix86_carry_flag_operator"
8668 [(match_dup 2) (const_int 0)]))))
8669 (set (match_dup 0)
8670 (plus:SWI48 (plus:SWI48 (match_op_dup 4
8671 [(match_dup 2) (const_int 0)])
8672 (match_dup 0))
8673 (match_dup 1)))])
8674 (set (match_dup 1) (match_dup 0))]
8675 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8676 && peep2_reg_dead_p (2, operands[0])
8677 && !reg_overlap_mentioned_p (operands[0], operands[1])"
8678 [(parallel [(set (reg:CCC FLAGS_REG)
8679 (compare:CCC
8680 (zero_extend:<DWI>
8681 (plus:SWI48
8682 (plus:SWI48
8683 (match_op_dup 4
8684 [(match_dup 2) (const_int 0)])
8685 (match_dup 1))
8686 (match_dup 0)))
8687 (plus:<DWI>
8688 (zero_extend:<DWI> (match_dup 0))
8689 (match_op_dup 3
8690 [(match_dup 2) (const_int 0)]))))
8691 (set (match_dup 1)
8692 (plus:SWI48 (plus:SWI48 (match_op_dup 4
8693 [(match_dup 2) (const_int 0)])
8694 (match_dup 1))
8695 (match_dup 0)))])])
8696
8697 (define_peephole2
8698 [(set (match_operand:SWI48 0 "general_reg_operand")
8699 (match_operand:SWI48 1 "memory_operand"))
8700 (parallel [(set (reg:CCC FLAGS_REG)
8701 (compare:CCC
8702 (zero_extend:<DWI>
8703 (plus:SWI48
8704 (plus:SWI48
8705 (match_operator:SWI48 5 "ix86_carry_flag_operator"
8706 [(match_operand 3 "flags_reg_operand")
8707 (const_int 0)])
8708 (match_dup 0))
8709 (match_operand:SWI48 2 "memory_operand")))
8710 (plus:<DWI>
8711 (zero_extend:<DWI> (match_dup 2))
8712 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
8713 [(match_dup 3) (const_int 0)]))))
8714 (set (match_dup 0)
8715 (plus:SWI48 (plus:SWI48 (match_op_dup 5
8716 [(match_dup 3) (const_int 0)])
8717 (match_dup 0))
8718 (match_dup 2)))])
8719 (set (match_dup 1) (match_dup 0))]
8720 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8721 && peep2_reg_dead_p (3, operands[0])
8722 && !reg_overlap_mentioned_p (operands[0], operands[1])
8723 && !reg_overlap_mentioned_p (operands[0], operands[2])"
8724 [(set (match_dup 0) (match_dup 2))
8725 (parallel [(set (reg:CCC FLAGS_REG)
8726 (compare:CCC
8727 (zero_extend:<DWI>
8728 (plus:SWI48
8729 (plus:SWI48
8730 (match_op_dup 5
8731 [(match_dup 3) (const_int 0)])
8732 (match_dup 1))
8733 (match_dup 0)))
8734 (plus:<DWI>
8735 (zero_extend:<DWI> (match_dup 0))
8736 (match_op_dup 4
8737 [(match_dup 3) (const_int 0)]))))
8738 (set (match_dup 1)
8739 (plus:SWI48 (plus:SWI48 (match_op_dup 5
8740 [(match_dup 3) (const_int 0)])
8741 (match_dup 1))
8742 (match_dup 0)))])])
8743
8744 (define_peephole2
8745 [(parallel [(set (reg:CCC FLAGS_REG)
8746 (compare:CCC
8747 (zero_extend:<DWI>
8748 (plus:SWI48
8749 (plus:SWI48
8750 (match_operator:SWI48 4 "ix86_carry_flag_operator"
8751 [(match_operand 2 "flags_reg_operand")
8752 (const_int 0)])
8753 (match_operand:SWI48 0 "general_reg_operand"))
8754 (match_operand:SWI48 1 "memory_operand")))
8755 (plus:<DWI>
8756 (zero_extend:<DWI> (match_dup 1))
8757 (match_operator:<DWI> 3 "ix86_carry_flag_operator"
8758 [(match_dup 2) (const_int 0)]))))
8759 (set (match_dup 0)
8760 (plus:SWI48 (plus:SWI48 (match_op_dup 4
8761 [(match_dup 2) (const_int 0)])
8762 (match_dup 0))
8763 (match_dup 1)))])
8764 (set (match_operand:QI 5 "general_reg_operand")
8765 (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
8766 (set (match_operand:SWI48 6 "general_reg_operand")
8767 (zero_extend:SWI48 (match_dup 5)))
8768 (set (match_dup 1) (match_dup 0))]
8769 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8770 && peep2_reg_dead_p (4, operands[0])
8771 && !reg_overlap_mentioned_p (operands[0], operands[1])
8772 && !reg_overlap_mentioned_p (operands[0], operands[5])
8773 && !reg_overlap_mentioned_p (operands[5], operands[1])
8774 && !reg_overlap_mentioned_p (operands[0], operands[6])
8775 && !reg_overlap_mentioned_p (operands[6], operands[1])"
8776 [(parallel [(set (reg:CCC FLAGS_REG)
8777 (compare:CCC
8778 (zero_extend:<DWI>
8779 (plus:SWI48
8780 (plus:SWI48
8781 (match_op_dup 4
8782 [(match_dup 2) (const_int 0)])
8783 (match_dup 1))
8784 (match_dup 0)))
8785 (plus:<DWI>
8786 (zero_extend:<DWI> (match_dup 0))
8787 (match_op_dup 3
8788 [(match_dup 2) (const_int 0)]))))
8789 (set (match_dup 1)
8790 (plus:SWI48 (plus:SWI48 (match_op_dup 4
8791 [(match_dup 2) (const_int 0)])
8792 (match_dup 1))
8793 (match_dup 0)))])
8794 (set (match_dup 5) (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
8795 (set (match_dup 6) (zero_extend:SWI48 (match_dup 5)))])
8796
8797 (define_expand "addcarry<mode>_0"
8798 [(parallel
8799 [(set (reg:CCC FLAGS_REG)
8800 (compare:CCC
8801 (plus:SWI48
8802 (match_operand:SWI48 1 "nonimmediate_operand")
8803 (match_operand:SWI48 2 "x86_64_general_operand"))
8804 (match_dup 1)))
8805 (set (match_operand:SWI48 0 "nonimmediate_operand")
8806 (plus:SWI48 (match_dup 1) (match_dup 2)))])]
8807 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)")
8808
8809 (define_insn "*addcarry<mode>_1"
8810 [(set (reg:CCC FLAGS_REG)
8811 (compare:CCC
8812 (zero_extend:<DWI>
8813 (plus:SWI48
8814 (plus:SWI48
8815 (match_operator:SWI48 5 "ix86_carry_flag_operator"
8816 [(match_operand 3 "flags_reg_operand") (const_int 0)])
8817 (match_operand:SWI48 1 "nonimmediate_operand" "%0,rm"))
8818 (match_operand:SWI48 2 "x86_64_immediate_operand" "e,e")))
8819 (plus:<DWI>
8820 (match_operand:<DWI> 6 "const_scalar_int_operand")
8821 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
8822 [(match_dup 3) (const_int 0)]))))
8823 (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
8824 (plus:SWI48 (plus:SWI48 (match_op_dup 5
8825 [(match_dup 3) (const_int 0)])
8826 (match_dup 1))
8827 (match_dup 2)))]
8828 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
8829 && CONST_INT_P (operands[2])
8830 /* Check that operands[6] is operands[2] zero extended from
8831 <MODE>mode to <DWI>mode. */
8832 && ((<MODE>mode == SImode || INTVAL (operands[2]) >= 0)
8833 ? (CONST_INT_P (operands[6])
8834 && UINTVAL (operands[6]) == (UINTVAL (operands[2])
8835 & GET_MODE_MASK (<MODE>mode)))
8836 : (CONST_WIDE_INT_P (operands[6])
8837 && CONST_WIDE_INT_NUNITS (operands[6]) == 2
8838 && ((unsigned HOST_WIDE_INT) CONST_WIDE_INT_ELT (operands[6], 0)
8839 == UINTVAL (operands[2]))
8840 && CONST_WIDE_INT_ELT (operands[6], 1) == 0))"
8841 "@
8842 adc{<imodesuffix>}\t{%2, %0|%0, %2}
8843 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8844 [(set_attr "isa" "*,apx_ndd")
8845 (set_attr "type" "alu")
8846 (set_attr "use_carry" "1")
8847 (set_attr "pent_pair" "pu")
8848 (set_attr "mode" "<MODE>")
8849 (set (attr "length_immediate")
8850 (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
8851 (const_string "1")
8852 (const_string "4")))])
8853
8854 (define_insn "@sub<mode>3_carry"
8855 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
8856 (minus:SWI
8857 (minus:SWI
8858 (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
8859 (match_operator:SWI 4 "ix86_carry_flag_operator"
8860 [(match_operand 3 "flags_reg_operand") (const_int 0)]))
8861 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
8862 (clobber (reg:CC FLAGS_REG))]
8863 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
8864 "@
8865 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
8866 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
8867 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8868 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8869 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8870 (set_attr "type" "alu")
8871 (set_attr "use_carry" "1")
8872 (set_attr "pent_pair" "pu")
8873 (set_attr "mode" "<MODE>")])
8874
8875 (define_peephole2
8876 [(set (match_operand:SWI 0 "general_reg_operand")
8877 (match_operand:SWI 1 "memory_operand"))
8878 (parallel [(set (match_dup 0)
8879 (minus:SWI
8880 (minus:SWI
8881 (match_dup 0)
8882 (match_operator:SWI 4 "ix86_carry_flag_operator"
8883 [(match_operand 3 "flags_reg_operand")
8884 (const_int 0)]))
8885 (match_operand:SWI 2 "memory_operand")))
8886 (clobber (reg:CC FLAGS_REG))])
8887 (set (match_dup 1) (match_dup 0))]
8888 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8889 && peep2_reg_dead_p (3, operands[0])
8890 && !reg_overlap_mentioned_p (operands[0], operands[1])
8891 && !reg_overlap_mentioned_p (operands[0], operands[2])"
8892 [(set (match_dup 0) (match_dup 2))
8893 (parallel [(set (match_dup 1)
8894 (minus:SWI (minus:SWI (match_dup 1)
8895 (match_op_dup 4
8896 [(match_dup 3) (const_int 0)]))
8897 (match_dup 0)))
8898 (clobber (reg:CC FLAGS_REG))])])
8899
8900 (define_peephole2
8901 [(set (match_operand:SWI 0 "general_reg_operand")
8902 (match_operand:SWI 1 "memory_operand"))
8903 (parallel [(set (match_dup 0)
8904 (minus:SWI
8905 (minus:SWI
8906 (match_dup 0)
8907 (match_operator:SWI 4 "ix86_carry_flag_operator"
8908 [(match_operand 3 "flags_reg_operand")
8909 (const_int 0)]))
8910 (match_operand:SWI 2 "memory_operand")))
8911 (clobber (reg:CC FLAGS_REG))])
8912 (set (match_operand:SWI 5 "general_reg_operand") (match_dup 0))
8913 (set (match_dup 1) (match_dup 5))]
8914 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8915 && peep2_reg_dead_p (3, operands[0])
8916 && peep2_reg_dead_p (4, operands[5])
8917 && !reg_overlap_mentioned_p (operands[0], operands[1])
8918 && !reg_overlap_mentioned_p (operands[0], operands[2])
8919 && !reg_overlap_mentioned_p (operands[5], operands[1])"
8920 [(set (match_dup 0) (match_dup 2))
8921 (parallel [(set (match_dup 1)
8922 (minus:SWI (minus:SWI (match_dup 1)
8923 (match_op_dup 4
8924 [(match_dup 3) (const_int 0)]))
8925 (match_dup 0)))
8926 (clobber (reg:CC FLAGS_REG))])])
8927
8928 (define_insn "*sub<mode>3_carry_0"
8929 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
8930 (minus:SWI
8931 (match_operand:SWI 1 "nonimmediate_operand" "0")
8932 (match_operator:SWI 2 "ix86_carry_flag_operator"
8933 [(reg FLAGS_REG) (const_int 0)])))
8934 (clobber (reg:CC FLAGS_REG))]
8935 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
8936 "sbb{<imodesuffix>}\t{$0, %0|%0, 0}"
8937 [(set_attr "type" "alu")
8938 (set_attr "use_carry" "1")
8939 (set_attr "pent_pair" "pu")
8940 (set_attr "mode" "<MODE>")])
8941
8942 (define_insn "*sub<mode>3_carry_0r"
8943 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
8944 (minus:SWI
8945 (match_operand:SWI 1 "nonimmediate_operand" "0")
8946 (match_operator:SWI 2 "ix86_carry_flag_unset_operator"
8947 [(reg FLAGS_REG) (const_int 0)])))
8948 (clobber (reg:CC FLAGS_REG))]
8949 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
8950 "adc{<imodesuffix>}\t{$-1, %0|%0, -1}"
8951 [(set_attr "type" "alu")
8952 (set_attr "use_carry" "1")
8953 (set_attr "pent_pair" "pu")
8954 (set_attr "mode" "<MODE>")])
8955
8956 (define_insn "*subsi3_carry_zext"
8957 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
8958 (zero_extend:DI
8959 (minus:SI
8960 (minus:SI
8961 (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
8962 (match_operator:SI 3 "ix86_carry_flag_operator"
8963 [(reg FLAGS_REG) (const_int 0)]))
8964 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))))
8965 (clobber (reg:CC FLAGS_REG))]
8966 "TARGET_64BIT
8967 && ix86_binary_operator_ok (MINUS, SImode, operands, TARGET_APX_NDD)"
8968 "@
8969 sbb{l}\t{%2, %k0|%k0, %2}
8970 sbb{l}\t{%2, %1, %k0|%k0, %1, %2}
8971 sbb{l}\t{%2, %1, %k0|%k0, %1, %2}"
8972 [(set_attr "isa" "*,apx_ndd,apx_ndd")
8973 (set_attr "type" "alu")
8974 (set_attr "use_carry" "1")
8975 (set_attr "pent_pair" "pu")
8976 (set_attr "mode" "SI")])
8977
8978 (define_insn "*subsi3_carry_zext_0"
8979 [(set (match_operand:DI 0 "register_operand" "=r")
8980 (zero_extend:DI
8981 (minus:SI
8982 (match_operand:SI 1 "register_operand" "0")
8983 (match_operator:SI 2 "ix86_carry_flag_operator"
8984 [(reg FLAGS_REG) (const_int 0)]))))
8985 (clobber (reg:CC FLAGS_REG))]
8986 "TARGET_64BIT"
8987 "sbb{l}\t{$0, %k0|%k0, 0}"
8988 [(set_attr "type" "alu")
8989 (set_attr "use_carry" "1")
8990 (set_attr "pent_pair" "pu")
8991 (set_attr "mode" "SI")])
8992
8993 (define_insn "*subsi3_carry_zext_0r"
8994 [(set (match_operand:DI 0 "register_operand" "=r")
8995 (zero_extend:DI
8996 (minus:SI
8997 (match_operand:SI 1 "register_operand" "0")
8998 (match_operator:SI 2 "ix86_carry_flag_unset_operator"
8999 [(reg FLAGS_REG) (const_int 0)]))))
9000 (clobber (reg:CC FLAGS_REG))]
9001 "TARGET_64BIT"
9002 "adc{l}\t{$-1, %k0|%k0, -1}"
9003 [(set_attr "type" "alu")
9004 (set_attr "use_carry" "1")
9005 (set_attr "pent_pair" "pu")
9006 (set_attr "mode" "SI")])
9007
9008 (define_insn "@sub<mode>3_carry_ccc"
9009 [(set (reg:CCC FLAGS_REG)
9010 (compare:CCC
9011 (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "0"))
9012 (plus:<DWI>
9013 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
9014 (zero_extend:<DWI>
9015 (match_operand:DWIH 2 "x86_64_sext_operand" "rmWe")))))
9016 (clobber (match_scratch:DWIH 0 "=r"))]
9017 ""
9018 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
9019 [(set_attr "type" "alu")
9020 (set_attr "mode" "<MODE>")])
9021
9022 (define_insn "*sub<mode>3_carry_ccc_1"
9023 [(set (reg:CCC FLAGS_REG)
9024 (compare:CCC
9025 (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "0"))
9026 (plus:<DWI>
9027 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
9028 (match_operand:<DWI> 2 "x86_64_dwzext_immediate_operand" "Wf"))))
9029 (clobber (match_scratch:DWIH 0 "=r"))]
9030 ""
9031 {
9032 operands[3] = simplify_subreg (<MODE>mode, operands[2], <DWI>mode, 0);
9033 return "sbb{<imodesuffix>}\t{%3, %0|%0, %3}";
9034 }
9035 [(set_attr "type" "alu")
9036 (set_attr "mode" "<MODE>")])
9037
9038 ;; The sign flag is set from the
9039 ;; (compare (match_dup 1) (plus:DWIH (ltu:DWIH ...) (match_dup 2)))
9040 ;; result, the overflow flag likewise, but the overflow flag is also
9041 ;; set if the (plus:DWIH (ltu:DWIH ...) (match_dup 2)) overflows.
9042 (define_insn "@sub<mode>3_carry_ccgz"
9043 [(set (reg:CCGZ FLAGS_REG)
9044 (unspec:CCGZ [(match_operand:DWIH 1 "register_operand" "0")
9045 (match_operand:DWIH 2 "x86_64_general_operand" "rBMe")
9046 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))]
9047 UNSPEC_SBB))
9048 (clobber (match_scratch:DWIH 0 "=r"))]
9049 ""
9050 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
9051 [(set_attr "type" "alu")
9052 (set_attr "mode" "<MODE>")])
9053
9054 (define_insn "subborrow<mode>"
9055 [(set (reg:CCC FLAGS_REG)
9056 (compare:CCC
9057 (zero_extend:<DWI>
9058 (match_operand:SWI48 1 "nonimmediate_operand" "0,0,r,rm"))
9059 (plus:<DWI>
9060 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
9061 [(match_operand 3 "flags_reg_operand") (const_int 0)])
9062 (zero_extend:<DWI>
9063 (match_operand:SWI48 2 "nonimmediate_operand" "r,rm,rm,r")))))
9064 (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
9065 (minus:SWI48 (minus:SWI48
9066 (match_dup 1)
9067 (match_operator:SWI48 5 "ix86_carry_flag_operator"
9068 [(match_dup 3) (const_int 0)]))
9069 (match_dup 2)))]
9070 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
9071 "@
9072 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
9073 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
9074 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9075 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9076 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
9077 (set_attr "type" "alu")
9078 (set_attr "use_carry" "1")
9079 (set_attr "pent_pair" "pu")
9080 (set_attr "mode" "<MODE>")])
9081
9082 (define_peephole2
9083 [(set (match_operand:SWI48 0 "general_reg_operand")
9084 (match_operand:SWI48 1 "memory_operand"))
9085 (parallel [(set (reg:CCC FLAGS_REG)
9086 (compare:CCC
9087 (zero_extend:<DWI> (match_dup 0))
9088 (plus:<DWI>
9089 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
9090 [(match_operand 3 "flags_reg_operand") (const_int 0)])
9091 (zero_extend:<DWI>
9092 (match_operand:SWI48 2 "memory_operand")))))
9093 (set (match_dup 0)
9094 (minus:SWI48
9095 (minus:SWI48
9096 (match_dup 0)
9097 (match_operator:SWI48 5 "ix86_carry_flag_operator"
9098 [(match_dup 3) (const_int 0)]))
9099 (match_dup 2)))])
9100 (set (match_dup 1) (match_dup 0))]
9101 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9102 && peep2_reg_dead_p (3, operands[0])
9103 && !reg_overlap_mentioned_p (operands[0], operands[1])
9104 && !reg_overlap_mentioned_p (operands[0], operands[2])"
9105 [(set (match_dup 0) (match_dup 2))
9106 (parallel [(set (reg:CCC FLAGS_REG)
9107 (compare:CCC
9108 (zero_extend:<DWI> (match_dup 1))
9109 (plus:<DWI> (match_op_dup 4
9110 [(match_dup 3) (const_int 0)])
9111 (zero_extend:<DWI> (match_dup 0)))))
9112 (set (match_dup 1)
9113 (minus:SWI48 (minus:SWI48 (match_dup 1)
9114 (match_op_dup 5
9115 [(match_dup 3) (const_int 0)]))
9116 (match_dup 0)))])])
9117
9118 (define_peephole2
9119 [(set (match_operand:SWI48 6 "general_reg_operand")
9120 (match_operand:SWI48 7 "memory_operand"))
9121 (set (match_operand:SWI48 8 "general_reg_operand")
9122 (match_operand:SWI48 9 "memory_operand"))
9123 (parallel [(set (reg:CCC FLAGS_REG)
9124 (compare:CCC
9125 (zero_extend:<DWI>
9126 (match_operand:SWI48 0 "general_reg_operand"))
9127 (plus:<DWI>
9128 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
9129 [(match_operand 3 "flags_reg_operand") (const_int 0)])
9130 (zero_extend:<DWI>
9131 (match_operand:SWI48 2 "general_reg_operand")))))
9132 (set (match_dup 0)
9133 (minus:SWI48
9134 (minus:SWI48
9135 (match_dup 0)
9136 (match_operator:SWI48 5 "ix86_carry_flag_operator"
9137 [(match_dup 3) (const_int 0)]))
9138 (match_dup 2)))])
9139 (set (match_operand:SWI48 1 "memory_operand") (match_dup 0))]
9140 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9141 && peep2_reg_dead_p (4, operands[0])
9142 && peep2_reg_dead_p (3, operands[2])
9143 && !reg_overlap_mentioned_p (operands[0], operands[1])
9144 && !reg_overlap_mentioned_p (operands[2], operands[1])
9145 && !reg_overlap_mentioned_p (operands[6], operands[9])
9146 && (rtx_equal_p (operands[6], operands[0])
9147 ? (rtx_equal_p (operands[7], operands[1])
9148 && rtx_equal_p (operands[8], operands[2]))
9149 : (rtx_equal_p (operands[8], operands[0])
9150 && rtx_equal_p (operands[9], operands[1])
9151 && rtx_equal_p (operands[6], operands[2])))"
9152 [(set (match_dup 0) (match_dup 9))
9153 (parallel [(set (reg:CCC FLAGS_REG)
9154 (compare:CCC
9155 (zero_extend:<DWI> (match_dup 1))
9156 (plus:<DWI> (match_op_dup 4
9157 [(match_dup 3) (const_int 0)])
9158 (zero_extend:<DWI> (match_dup 0)))))
9159 (set (match_dup 1)
9160 (minus:SWI48 (minus:SWI48 (match_dup 1)
9161 (match_op_dup 5
9162 [(match_dup 3) (const_int 0)]))
9163 (match_dup 0)))])]
9164 {
9165 if (!rtx_equal_p (operands[6], operands[0]))
9166 operands[9] = operands[7];
9167 })
9168
9169 (define_peephole2
9170 [(set (match_operand:SWI48 6 "general_reg_operand")
9171 (match_operand:SWI48 7 "memory_operand"))
9172 (set (match_operand:SWI48 8 "general_reg_operand")
9173 (match_operand:SWI48 9 "memory_operand"))
9174 (parallel [(set (reg:CCC FLAGS_REG)
9175 (compare:CCC
9176 (zero_extend:<DWI>
9177 (match_operand:SWI48 0 "general_reg_operand"))
9178 (plus:<DWI>
9179 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
9180 [(match_operand 3 "flags_reg_operand") (const_int 0)])
9181 (zero_extend:<DWI>
9182 (match_operand:SWI48 2 "general_reg_operand")))))
9183 (set (match_dup 0)
9184 (minus:SWI48
9185 (minus:SWI48
9186 (match_dup 0)
9187 (match_operator:SWI48 5 "ix86_carry_flag_operator"
9188 [(match_dup 3) (const_int 0)]))
9189 (match_dup 2)))])
9190 (set (match_operand:QI 10 "general_reg_operand")
9191 (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
9192 (set (match_operand:SWI48 11 "general_reg_operand")
9193 (zero_extend:SWI48 (match_dup 10)))
9194 (set (match_operand:SWI48 1 "memory_operand") (match_dup 0))]
9195 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9196 && peep2_reg_dead_p (6, operands[0])
9197 && peep2_reg_dead_p (3, operands[2])
9198 && !reg_overlap_mentioned_p (operands[0], operands[1])
9199 && !reg_overlap_mentioned_p (operands[2], operands[1])
9200 && !reg_overlap_mentioned_p (operands[6], operands[9])
9201 && !reg_overlap_mentioned_p (operands[0], operands[10])
9202 && !reg_overlap_mentioned_p (operands[10], operands[1])
9203 && !reg_overlap_mentioned_p (operands[0], operands[11])
9204 && !reg_overlap_mentioned_p (operands[11], operands[1])
9205 && (rtx_equal_p (operands[6], operands[0])
9206 ? (rtx_equal_p (operands[7], operands[1])
9207 && rtx_equal_p (operands[8], operands[2]))
9208 : (rtx_equal_p (operands[8], operands[0])
9209 && rtx_equal_p (operands[9], operands[1])
9210 && rtx_equal_p (operands[6], operands[2])))"
9211 [(set (match_dup 0) (match_dup 9))
9212 (parallel [(set (reg:CCC FLAGS_REG)
9213 (compare:CCC
9214 (zero_extend:<DWI> (match_dup 1))
9215 (plus:<DWI> (match_op_dup 4
9216 [(match_dup 3) (const_int 0)])
9217 (zero_extend:<DWI> (match_dup 0)))))
9218 (set (match_dup 1)
9219 (minus:SWI48 (minus:SWI48 (match_dup 1)
9220 (match_op_dup 5
9221 [(match_dup 3) (const_int 0)]))
9222 (match_dup 0)))])
9223 (set (match_dup 10) (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
9224 (set (match_dup 11) (zero_extend:SWI48 (match_dup 10)))]
9225 {
9226 if (!rtx_equal_p (operands[6], operands[0]))
9227 operands[9] = operands[7];
9228 })
9229
9230 (define_expand "subborrow<mode>_0"
9231 [(parallel
9232 [(set (reg:CC FLAGS_REG)
9233 (compare:CC
9234 (match_operand:SWI48 1 "nonimmediate_operand")
9235 (match_operand:SWI48 2 "<general_operand>")))
9236 (set (match_operand:SWI48 0 "register_operand")
9237 (minus:SWI48 (match_dup 1) (match_dup 2)))])]
9238 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)")
9239
9240 (define_expand "uaddc<mode>5"
9241 [(match_operand:SWI48 0 "register_operand")
9242 (match_operand:SWI48 1 "register_operand")
9243 (match_operand:SWI48 2 "register_operand")
9244 (match_operand:SWI48 3 "register_operand")
9245 (match_operand:SWI48 4 "nonmemory_operand")]
9246 ""
9247 {
9248 rtx cf = gen_rtx_REG (CCCmode, FLAGS_REG), pat, pat2;
9249 if (operands[4] == const0_rtx)
9250 emit_insn (gen_addcarry<mode>_0 (operands[0], operands[2], operands[3]));
9251 else
9252 {
9253 ix86_expand_carry (operands[4]);
9254 pat = gen_rtx_LTU (<DWI>mode, cf, const0_rtx);
9255 pat2 = gen_rtx_LTU (<MODE>mode, cf, const0_rtx);
9256 emit_insn (gen_addcarry<mode> (operands[0], operands[2], operands[3],
9257 cf, pat, pat2));
9258 }
9259 rtx cc = gen_reg_rtx (QImode);
9260 pat = gen_rtx_LTU (QImode, cf, const0_rtx);
9261 emit_insn (gen_rtx_SET (cc, pat));
9262 emit_insn (gen_zero_extendqi<mode>2 (operands[1], cc));
9263 DONE;
9264 })
9265
9266 (define_expand "usubc<mode>5"
9267 [(match_operand:SWI48 0 "register_operand")
9268 (match_operand:SWI48 1 "register_operand")
9269 (match_operand:SWI48 2 "register_operand")
9270 (match_operand:SWI48 3 "register_operand")
9271 (match_operand:SWI48 4 "nonmemory_operand")]
9272 ""
9273 {
9274 rtx cf, pat, pat2;
9275 if (operands[4] == const0_rtx)
9276 {
9277 cf = gen_rtx_REG (CCmode, FLAGS_REG);
9278 emit_insn (gen_subborrow<mode>_0 (operands[0], operands[2],
9279 operands[3]));
9280 }
9281 else
9282 {
9283 cf = gen_rtx_REG (CCCmode, FLAGS_REG);
9284 ix86_expand_carry (operands[4]);
9285 pat = gen_rtx_LTU (<DWI>mode, cf, const0_rtx);
9286 pat2 = gen_rtx_LTU (<MODE>mode, cf, const0_rtx);
9287 emit_insn (gen_subborrow<mode> (operands[0], operands[2], operands[3],
9288 cf, pat, pat2));
9289 }
9290 rtx cc = gen_reg_rtx (QImode);
9291 pat = gen_rtx_LTU (QImode, cf, const0_rtx);
9292 emit_insn (gen_rtx_SET (cc, pat));
9293 emit_insn (gen_zero_extendqi<mode>2 (operands[1], cc));
9294 DONE;
9295 })
9296
9297 (define_mode_iterator CC_CCC [CC CCC])
9298
9299 ;; Pre-reload splitter to optimize
9300 ;; *setcc_qi followed by *addqi3_cconly_overflow_1 with the same QI
9301 ;; operand and no intervening flags modifications into nothing.
9302 (define_insn_and_split "*setcc_qi_addqi3_cconly_overflow_1_<mode>"
9303 [(set (reg:CCC FLAGS_REG)
9304 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
9305 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))))]
9306 "ix86_pre_reload_split ()"
9307 "#"
9308 "&& 1"
9309 [(const_int 0)]
9310 "emit_note (NOTE_INSN_DELETED); DONE;")
9311
9312 ;; Set the carry flag from the carry flag.
9313 (define_insn_and_split "*setccc"
9314 [(set (reg:CCC FLAGS_REG)
9315 (reg:CCC FLAGS_REG))]
9316 "ix86_pre_reload_split ()"
9317 "#"
9318 "&& 1"
9319 [(const_int 0)]
9320 "emit_note (NOTE_INSN_DELETED); DONE;")
9321
9322 ;; Set the carry flag from the carry flag.
9323 (define_insn_and_split "*setcc_qi_negqi_ccc_1_<mode>"
9324 [(set (reg:CCC FLAGS_REG)
9325 (ltu:CCC (reg:CC_CCC FLAGS_REG) (const_int 0)))]
9326 "ix86_pre_reload_split ()"
9327 "#"
9328 "&& 1"
9329 [(const_int 0)]
9330 "emit_note (NOTE_INSN_DELETED); DONE;")
9331
9332 ;; Set the carry flag from the carry flag.
9333 (define_insn_and_split "*setcc_qi_negqi_ccc_2_<mode>"
9334 [(set (reg:CCC FLAGS_REG)
9335 (unspec:CCC [(ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
9336 (const_int 0)] UNSPEC_CC_NE))]
9337 "ix86_pre_reload_split ()"
9338 "#"
9339 "&& 1"
9340 [(const_int 0)]
9341 "emit_note (NOTE_INSN_DELETED); DONE;")
9342 \f
9343 ;; Overflow setting add instructions
9344
9345 (define_expand "addqi3_cconly_overflow"
9346 [(parallel
9347 [(set (reg:CCC FLAGS_REG)
9348 (compare:CCC
9349 (plus:QI
9350 (match_operand:QI 0 "nonimmediate_operand")
9351 (match_operand:QI 1 "general_operand"))
9352 (match_dup 0)))
9353 (clobber (scratch:QI))])]
9354 "!(MEM_P (operands[0]) && MEM_P (operands[1]))")
9355
9356 (define_insn "*add<mode>3_cconly_overflow_1"
9357 [(set (reg:CCC FLAGS_REG)
9358 (compare:CCC
9359 (plus:SWI
9360 (match_operand:SWI 1 "nonimmediate_operand" "%0,r,rm")
9361 (match_operand:SWI 2 "<general_operand>" "<g>,<g>,re"))
9362 (match_dup 1)))
9363 (clobber (match_scratch:SWI 0 "=<r>,r,r"))]
9364 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
9365 "@
9366 add{<imodesuffix>}\t{%2, %0|%0, %2}
9367 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9368 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9369 [(set_attr "isa" "*,apx_ndd,apx_ndd")
9370 (set_attr "type" "alu")
9371 (set_attr "mode" "<MODE>")])
9372
9373 (define_insn "@add<mode>3_cc_overflow_1"
9374 [(set (reg:CCC FLAGS_REG)
9375 (compare:CCC
9376 (plus:SWI
9377 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")
9378 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
9379 (match_dup 1)))
9380 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
9381 (plus:SWI (match_dup 1) (match_dup 2)))]
9382 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
9383 "@
9384 add{<imodesuffix>}\t{%2, %0|%0, %2}
9385 add{<imodesuffix>}\t{%2, %0|%0, %2}
9386 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9387 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9388 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
9389 (set_attr "type" "alu")
9390 (set_attr "mode" "<MODE>")])
9391
9392 (define_peephole2
9393 [(parallel [(set (reg:CCC FLAGS_REG)
9394 (compare:CCC
9395 (plus:SWI (match_operand:SWI 0 "general_reg_operand")
9396 (match_operand:SWI 1 "memory_operand"))
9397 (match_dup 0)))
9398 (set (match_dup 0) (plus:SWI (match_dup 0) (match_dup 1)))])
9399 (set (match_dup 1) (match_dup 0))]
9400 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9401 && peep2_reg_dead_p (2, operands[0])
9402 && !reg_overlap_mentioned_p (operands[0], operands[1])"
9403 [(parallel [(set (reg:CCC FLAGS_REG)
9404 (compare:CCC
9405 (plus:SWI (match_dup 1) (match_dup 0))
9406 (match_dup 1)))
9407 (set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 0)))])])
9408
9409 (define_peephole2
9410 [(set (match_operand:SWI 0 "general_reg_operand")
9411 (match_operand:SWI 1 "memory_operand"))
9412 (parallel [(set (reg:CCC FLAGS_REG)
9413 (compare:CCC
9414 (plus:SWI (match_dup 0)
9415 (match_operand:SWI 2 "memory_operand"))
9416 (match_dup 0)))
9417 (set (match_dup 0) (plus:SWI (match_dup 0) (match_dup 2)))])
9418 (set (match_dup 1) (match_dup 0))]
9419 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9420 && peep2_reg_dead_p (3, operands[0])
9421 && !reg_overlap_mentioned_p (operands[0], operands[1])
9422 && !reg_overlap_mentioned_p (operands[0], operands[2])"
9423 [(set (match_dup 0) (match_dup 2))
9424 (parallel [(set (reg:CCC FLAGS_REG)
9425 (compare:CCC
9426 (plus:SWI (match_dup 1) (match_dup 0))
9427 (match_dup 1)))
9428 (set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 0)))])])
9429
9430 (define_insn "*addsi3_zext_cc_overflow_1"
9431 [(set (reg:CCC FLAGS_REG)
9432 (compare:CCC
9433 (plus:SI
9434 (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm")
9435 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))
9436 (match_dup 1)))
9437 (set (match_operand:DI 0 "register_operand" "=r,r,r")
9438 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
9439 "TARGET_64BIT
9440 && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
9441 "@
9442 add{l}\t{%2, %k0|%k0, %2}
9443 add{l}\t{%2, %1, %k0|%k0, %1, %2}
9444 add{l}\t{%2, %1, %k0|%k0, %1, %2}"
9445 [(set_attr "isa" "*,apx_ndd,apx_ndd")
9446 (set_attr "type" "alu")
9447 (set_attr "mode" "SI")])
9448
9449 (define_insn "*add<mode>3_cconly_overflow_2"
9450 [(set (reg:CCC FLAGS_REG)
9451 (compare:CCC
9452 (plus:SWI
9453 (match_operand:SWI 1 "nonimmediate_operand" "%0,r,rm")
9454 (match_operand:SWI 2 "<general_operand>" "<g>,<g>,re"))
9455 (match_dup 2)))
9456 (clobber (match_scratch:SWI 0 "=<r>,r,r"))]
9457 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
9458 "@
9459 add{<imodesuffix>}\t{%2, %0|%0, %2}
9460 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9461 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9462 [(set_attr "isa" "*,apx_ndd,apx_ndd")
9463 (set_attr "type" "alu")
9464 (set_attr "mode" "<MODE>")])
9465
9466 (define_insn "*add<mode>3_cc_overflow_2"
9467 [(set (reg:CCC FLAGS_REG)
9468 (compare:CCC
9469 (plus:SWI
9470 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")
9471 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
9472 (match_dup 2)))
9473 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
9474 (plus:SWI (match_dup 1) (match_dup 2)))]
9475 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
9476 "@
9477 add{<imodesuffix>}\t{%2, %0|%0, %2}
9478 add{<imodesuffix>}\t{%2, %0|%0, %2}
9479 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9480 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9481 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
9482 (set_attr "type" "alu")
9483 (set_attr "mode" "<MODE>")])
9484
9485 (define_insn "*addsi3_zext_cc_overflow_2"
9486 [(set (reg:CCC FLAGS_REG)
9487 (compare:CCC
9488 (plus:SI
9489 (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm")
9490 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))
9491 (match_dup 2)))
9492 (set (match_operand:DI 0 "register_operand" "=r,r,r")
9493 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
9494 "TARGET_64BIT
9495 && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
9496 "@
9497 add{l}\t{%2, %k0|%k0, %2}
9498 add{l}\t{%2, %1, %k0|%k0, %1, %2}
9499 add{l}\t{%2, %1, %k0|%k0, %1, %2}"
9500 [(set_attr "isa" "*,apx_ndd,apx_ndd")
9501 (set_attr "type" "alu")
9502 (set_attr "mode" "SI")])
9503
9504 (define_insn_and_split "*add<dwi>3_doubleword_cc_overflow_1"
9505 [(set (reg:CCC FLAGS_REG)
9506 (compare:CCC
9507 (plus:<DWI>
9508 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r")
9509 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o"))
9510 (match_dup 1)))
9511 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
9512 (plus:<DWI> (match_dup 1) (match_dup 2)))]
9513 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands, TARGET_APX_NDD)"
9514 "#"
9515 "&& reload_completed"
9516 [(parallel [(set (reg:CCC FLAGS_REG)
9517 (compare:CCC
9518 (plus:DWIH (match_dup 1) (match_dup 2))
9519 (match_dup 1)))
9520 (set (match_dup 0)
9521 (plus:DWIH (match_dup 1) (match_dup 2)))])
9522 (parallel [(set (reg:CCC FLAGS_REG)
9523 (compare:CCC
9524 (zero_extend:<DWI>
9525 (plus:DWIH
9526 (plus:DWIH
9527 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
9528 (match_dup 4))
9529 (match_dup 5)))
9530 (plus:<DWI>
9531 (match_dup 6)
9532 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))))
9533 (set (match_dup 3)
9534 (plus:DWIH
9535 (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
9536 (match_dup 4))
9537 (match_dup 5)))])]
9538 {
9539 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
9540 if (operands[2] == const0_rtx)
9541 {
9542 if (!rtx_equal_p (operands[0], operands[1]))
9543 emit_move_insn (operands[0], operands[1]);
9544 emit_insn (gen_addcarry<mode>_0 (operands[3], operands[4], operands[5]));
9545 DONE;
9546 }
9547 if (CONST_INT_P (operands[5]))
9548 operands[6] = simplify_unary_operation (ZERO_EXTEND, <DWI>mode,
9549 operands[5], <MODE>mode);
9550 else
9551 operands[6] = gen_rtx_ZERO_EXTEND (<DWI>mode, operands[5]);
9552 }
9553 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
9554
9555 ;; x == 0 with zero flag test can be done also as x < 1U with carry flag
9556 ;; test, where the latter is preferrable if we have some carry consuming
9557 ;; instruction.
9558 ;; For x != 0, we need to use x < 1U with negation of carry, i.e.
9559 ;; + (1 - CF).
9560 (define_insn_and_split "*add<mode>3_eq"
9561 [(set (match_operand:SWI 0 "nonimmediate_operand")
9562 (plus:SWI
9563 (plus:SWI
9564 (eq:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))
9565 (match_operand:SWI 1 "nonimmediate_operand"))
9566 (match_operand:SWI 2 "<general_operand>")))
9567 (clobber (reg:CC FLAGS_REG))]
9568 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
9569 && ix86_pre_reload_split ()"
9570 "#"
9571 "&& 1"
9572 [(set (reg:CC FLAGS_REG)
9573 (compare:CC (match_dup 3) (const_int 1)))
9574 (parallel [(set (match_dup 0)
9575 (plus:SWI
9576 (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
9577 (match_dup 1))
9578 (match_dup 2)))
9579 (clobber (reg:CC FLAGS_REG))])])
9580
9581 (define_insn_and_split "*add<mode>3_ne"
9582 [(set (match_operand:SWI 0 "nonimmediate_operand")
9583 (plus:SWI
9584 (plus:SWI
9585 (ne:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))
9586 (match_operand:SWI 1 "nonimmediate_operand"))
9587 (match_operand:SWI 2 "<immediate_operand>")))
9588 (clobber (reg:CC FLAGS_REG))]
9589 "CONST_INT_P (operands[2])
9590 && (<MODE>mode != DImode
9591 || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
9592 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
9593 && ix86_pre_reload_split ()"
9594 "#"
9595 "&& 1"
9596 [(set (reg:CC FLAGS_REG)
9597 (compare:CC (match_dup 3) (const_int 1)))
9598 (parallel [(set (match_dup 0)
9599 (minus:SWI
9600 (minus:SWI (match_dup 1)
9601 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
9602 (match_dup 2)))
9603 (clobber (reg:CC FLAGS_REG))])]
9604 {
9605 operands[2] = gen_int_mode (~INTVAL (operands[2]),
9606 <MODE>mode == DImode ? SImode : <MODE>mode);
9607 })
9608
9609 (define_insn_and_split "*add<mode>3_eq_0"
9610 [(set (match_operand:SWI 0 "nonimmediate_operand")
9611 (plus:SWI
9612 (eq:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))
9613 (match_operand:SWI 1 "<general_operand>")))
9614 (clobber (reg:CC FLAGS_REG))]
9615 "ix86_unary_operator_ok (PLUS, <MODE>mode, operands)
9616 && ix86_pre_reload_split ()"
9617 "#"
9618 "&& 1"
9619 [(set (reg:CC FLAGS_REG)
9620 (compare:CC (match_dup 2) (const_int 1)))
9621 (parallel [(set (match_dup 0)
9622 (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
9623 (match_dup 1)))
9624 (clobber (reg:CC FLAGS_REG))])]
9625 {
9626 if (!nonimmediate_operand (operands[1], <MODE>mode))
9627 operands[1] = force_reg (<MODE>mode, operands[1]);
9628 })
9629
9630 (define_insn_and_split "*add<mode>3_ne_0"
9631 [(set (match_operand:SWI 0 "nonimmediate_operand")
9632 (plus:SWI
9633 (ne:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))
9634 (match_operand:SWI 1 "<general_operand>")))
9635 (clobber (reg:CC FLAGS_REG))]
9636 "ix86_unary_operator_ok (PLUS, <MODE>mode, operands)
9637 && ix86_pre_reload_split ()"
9638 "#"
9639 "&& 1"
9640 [(set (reg:CC FLAGS_REG)
9641 (compare:CC (match_dup 2) (const_int 1)))
9642 (parallel [(set (match_dup 0)
9643 (minus:SWI (minus:SWI
9644 (match_dup 1)
9645 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
9646 (const_int -1)))
9647 (clobber (reg:CC FLAGS_REG))])]
9648 {
9649 if (!nonimmediate_operand (operands[1], <MODE>mode))
9650 operands[1] = force_reg (<MODE>mode, operands[1]);
9651 })
9652
9653 (define_insn_and_split "*sub<mode>3_eq"
9654 [(set (match_operand:SWI 0 "nonimmediate_operand")
9655 (minus:SWI
9656 (minus:SWI
9657 (match_operand:SWI 1 "nonimmediate_operand")
9658 (eq:SWI (match_operand 3 "int_nonimmediate_operand")
9659 (const_int 0)))
9660 (match_operand:SWI 2 "<general_operand>")))
9661 (clobber (reg:CC FLAGS_REG))]
9662 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
9663 && ix86_pre_reload_split ()"
9664 "#"
9665 "&& 1"
9666 [(set (reg:CC FLAGS_REG)
9667 (compare:CC (match_dup 3) (const_int 1)))
9668 (parallel [(set (match_dup 0)
9669 (minus:SWI
9670 (minus:SWI (match_dup 1)
9671 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
9672 (match_dup 2)))
9673 (clobber (reg:CC FLAGS_REG))])])
9674
9675 (define_insn_and_split "*sub<mode>3_ne"
9676 [(set (match_operand:SWI 0 "nonimmediate_operand")
9677 (plus:SWI
9678 (minus:SWI
9679 (match_operand:SWI 1 "nonimmediate_operand")
9680 (ne:SWI (match_operand 3 "int_nonimmediate_operand")
9681 (const_int 0)))
9682 (match_operand:SWI 2 "<immediate_operand>")))
9683 (clobber (reg:CC FLAGS_REG))]
9684 "CONST_INT_P (operands[2])
9685 && (<MODE>mode != DImode
9686 || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
9687 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
9688 && ix86_pre_reload_split ()"
9689 "#"
9690 "&& 1"
9691 [(set (reg:CC FLAGS_REG)
9692 (compare:CC (match_dup 3) (const_int 1)))
9693 (parallel [(set (match_dup 0)
9694 (plus:SWI
9695 (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
9696 (match_dup 1))
9697 (match_dup 2)))
9698 (clobber (reg:CC FLAGS_REG))])]
9699 {
9700 operands[2] = gen_int_mode (INTVAL (operands[2]) - 1,
9701 <MODE>mode == DImode ? SImode : <MODE>mode);
9702 })
9703
9704 (define_insn_and_split "*sub<mode>3_eq_1"
9705 [(set (match_operand:SWI 0 "nonimmediate_operand")
9706 (plus:SWI
9707 (minus:SWI
9708 (match_operand:SWI 1 "nonimmediate_operand")
9709 (eq:SWI (match_operand 3 "int_nonimmediate_operand")
9710 (const_int 0)))
9711 (match_operand:SWI 2 "<immediate_operand>")))
9712 (clobber (reg:CC FLAGS_REG))]
9713 "CONST_INT_P (operands[2])
9714 && (<MODE>mode != DImode
9715 || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
9716 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
9717 && ix86_pre_reload_split ()"
9718 "#"
9719 "&& 1"
9720 [(set (reg:CC FLAGS_REG)
9721 (compare:CC (match_dup 3) (const_int 1)))
9722 (parallel [(set (match_dup 0)
9723 (minus:SWI
9724 (minus:SWI (match_dup 1)
9725 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
9726 (match_dup 2)))
9727 (clobber (reg:CC FLAGS_REG))])]
9728 {
9729 operands[2] = gen_int_mode (-INTVAL (operands[2]),
9730 <MODE>mode == DImode ? SImode : <MODE>mode);
9731 })
9732
9733 (define_insn_and_split "*sub<mode>3_eq_0"
9734 [(set (match_operand:SWI 0 "nonimmediate_operand")
9735 (minus:SWI
9736 (match_operand:SWI 1 "<general_operand>")
9737 (eq:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))))
9738 (clobber (reg:CC FLAGS_REG))]
9739 "ix86_unary_operator_ok (MINUS, <MODE>mode, operands)
9740 && ix86_pre_reload_split ()"
9741 "#"
9742 "&& 1"
9743 [(set (reg:CC FLAGS_REG)
9744 (compare:CC (match_dup 2) (const_int 1)))
9745 (parallel [(set (match_dup 0)
9746 (minus:SWI (match_dup 1)
9747 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))))
9748 (clobber (reg:CC FLAGS_REG))])]
9749 {
9750 if (!nonimmediate_operand (operands[1], <MODE>mode))
9751 operands[1] = force_reg (<MODE>mode, operands[1]);
9752 })
9753
9754 (define_insn_and_split "*sub<mode>3_ne_0"
9755 [(set (match_operand:SWI 0 "nonimmediate_operand")
9756 (minus:SWI
9757 (match_operand:SWI 1 "<general_operand>")
9758 (ne:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))))
9759 (clobber (reg:CC FLAGS_REG))]
9760 "ix86_unary_operator_ok (MINUS, <MODE>mode, operands)
9761 && ix86_pre_reload_split ()"
9762 "#"
9763 "&& 1"
9764 [(set (reg:CC FLAGS_REG)
9765 (compare:CC (match_dup 2) (const_int 1)))
9766 (parallel [(set (match_dup 0)
9767 (plus:SWI (plus:SWI
9768 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
9769 (match_dup 1))
9770 (const_int -1)))
9771 (clobber (reg:CC FLAGS_REG))])]
9772 {
9773 if (!nonimmediate_operand (operands[1], <MODE>mode))
9774 operands[1] = force_reg (<MODE>mode, operands[1]);
9775 })
9776
9777 ;; The patterns that match these are at the end of this file.
9778
9779 (define_expand "<insn>xf3"
9780 [(set (match_operand:XF 0 "register_operand")
9781 (plusminus:XF
9782 (match_operand:XF 1 "register_operand")
9783 (match_operand:XF 2 "register_operand")))]
9784 "TARGET_80387")
9785
9786 (define_expand "<insn>hf3"
9787 [(set (match_operand:HF 0 "register_operand")
9788 (plusminus:HF
9789 (match_operand:HF 1 "register_operand")
9790 (match_operand:HF 2 "nonimmediate_operand")))]
9791 "TARGET_AVX512FP16")
9792
9793 (define_expand "<insn><mode>3"
9794 [(set (match_operand:MODEF 0 "register_operand")
9795 (plusminus:MODEF
9796 (match_operand:MODEF 1 "register_operand")
9797 (match_operand:MODEF 2 "nonimmediate_operand")))]
9798 "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
9799 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")
9800 \f
9801 ;; Multiply instructions
9802
9803 (define_expand "mul<mode>3"
9804 [(parallel [(set (match_operand:SWIM248 0 "register_operand")
9805 (mult:SWIM248
9806 (match_operand:SWIM248 1 "register_operand")
9807 (match_operand:SWIM248 2 "<general_operand>")))
9808 (clobber (reg:CC FLAGS_REG))])])
9809
9810 (define_expand "mulqi3"
9811 [(parallel [(set (match_operand:QI 0 "register_operand")
9812 (mult:QI
9813 (match_operand:QI 1 "register_operand")
9814 (match_operand:QI 2 "nonimmediate_operand")))
9815 (clobber (reg:CC FLAGS_REG))])]
9816 "TARGET_QIMODE_MATH")
9817
9818 ;; On AMDFAM10
9819 ;; IMUL reg32/64, reg32/64, imm8 Direct
9820 ;; IMUL reg32/64, mem32/64, imm8 VectorPath
9821 ;; IMUL reg32/64, reg32/64, imm32 Direct
9822 ;; IMUL reg32/64, mem32/64, imm32 VectorPath
9823 ;; IMUL reg32/64, reg32/64 Direct
9824 ;; IMUL reg32/64, mem32/64 Direct
9825 ;;
9826 ;; On BDVER1, all above IMULs use DirectPath
9827 ;;
9828 ;; On AMDFAM10
9829 ;; IMUL reg16, reg16, imm8 VectorPath
9830 ;; IMUL reg16, mem16, imm8 VectorPath
9831 ;; IMUL reg16, reg16, imm16 VectorPath
9832 ;; IMUL reg16, mem16, imm16 VectorPath
9833 ;; IMUL reg16, reg16 Direct
9834 ;; IMUL reg16, mem16 Direct
9835 ;;
9836 ;; On BDVER1, all HI MULs use DoublePath
9837
9838 (define_insn "*mul<mode>3_1"
9839 [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
9840 (mult:SWIM248
9841 (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0")
9842 (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r")))
9843 (clobber (reg:CC FLAGS_REG))]
9844 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
9845 "@
9846 imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9847 imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9848 imul{<imodesuffix>}\t{%2, %0|%0, %2}"
9849 [(set_attr "type" "imul")
9850 (set_attr "prefix_0f" "0,0,1")
9851 (set (attr "athlon_decode")
9852 (cond [(eq_attr "cpu" "athlon")
9853 (const_string "vector")
9854 (eq_attr "alternative" "1")
9855 (const_string "vector")
9856 (and (eq_attr "alternative" "2")
9857 (ior (match_test "<MODE>mode == HImode")
9858 (match_operand 1 "memory_operand")))
9859 (const_string "vector")]
9860 (const_string "direct")))
9861 (set (attr "amdfam10_decode")
9862 (cond [(and (eq_attr "alternative" "0,1")
9863 (ior (match_test "<MODE>mode == HImode")
9864 (match_operand 1 "memory_operand")))
9865 (const_string "vector")]
9866 (const_string "direct")))
9867 (set (attr "bdver1_decode")
9868 (if_then_else
9869 (match_test "<MODE>mode == HImode")
9870 (const_string "double")
9871 (const_string "direct")))
9872 (set_attr "mode" "<MODE>")])
9873
9874 (define_insn "*mulsi3_1_zext"
9875 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
9876 (zero_extend:DI
9877 (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
9878 (match_operand:SI 2 "x86_64_general_operand" "K,e,BMr"))))
9879 (clobber (reg:CC FLAGS_REG))]
9880 "TARGET_64BIT
9881 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9882 "@
9883 imul{l}\t{%2, %1, %k0|%k0, %1, %2}
9884 imul{l}\t{%2, %1, %k0|%k0, %1, %2}
9885 imul{l}\t{%2, %k0|%k0, %2}"
9886 [(set_attr "type" "imul")
9887 (set_attr "prefix_0f" "0,0,1")
9888 (set (attr "athlon_decode")
9889 (cond [(eq_attr "cpu" "athlon")
9890 (const_string "vector")
9891 (eq_attr "alternative" "1")
9892 (const_string "vector")
9893 (and (eq_attr "alternative" "2")
9894 (match_operand 1 "memory_operand"))
9895 (const_string "vector")]
9896 (const_string "direct")))
9897 (set (attr "amdfam10_decode")
9898 (cond [(and (eq_attr "alternative" "0,1")
9899 (match_operand 1 "memory_operand"))
9900 (const_string "vector")]
9901 (const_string "direct")))
9902 (set_attr "bdver1_decode" "direct")
9903 (set_attr "mode" "SI")])
9904
9905 ;;On AMDFAM10 and BDVER1
9906 ;; MUL reg8 Direct
9907 ;; MUL mem8 Direct
9908
9909 (define_insn "*mulqi3_1"
9910 [(set (match_operand:QI 0 "register_operand" "=a")
9911 (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
9912 (match_operand:QI 2 "nonimmediate_operand" "qm")))
9913 (clobber (reg:CC FLAGS_REG))]
9914 "TARGET_QIMODE_MATH
9915 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9916 "mul{b}\t%2"
9917 [(set_attr "type" "imul")
9918 (set_attr "length_immediate" "0")
9919 (set (attr "athlon_decode")
9920 (if_then_else (eq_attr "cpu" "athlon")
9921 (const_string "vector")
9922 (const_string "direct")))
9923 (set_attr "amdfam10_decode" "direct")
9924 (set_attr "bdver1_decode" "direct")
9925 (set_attr "mode" "QI")])
9926
9927 ;; Multiply with jump on overflow.
9928 (define_expand "mulv<mode>4"
9929 [(parallel [(set (reg:CCO FLAGS_REG)
9930 (eq:CCO (mult:<DWI>
9931 (sign_extend:<DWI>
9932 (match_operand:SWI248 1 "register_operand"))
9933 (match_dup 4))
9934 (sign_extend:<DWI>
9935 (mult:SWI248 (match_dup 1)
9936 (match_operand:SWI248 2
9937 "<general_operand>")))))
9938 (set (match_operand:SWI248 0 "register_operand")
9939 (mult:SWI248 (match_dup 1) (match_dup 2)))])
9940 (set (pc) (if_then_else
9941 (eq (reg:CCO FLAGS_REG) (const_int 0))
9942 (label_ref (match_operand 3))
9943 (pc)))]
9944 ""
9945 {
9946 if (CONST_INT_P (operands[2]))
9947 operands[4] = operands[2];
9948 else
9949 operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]);
9950 })
9951
9952 (define_insn "*mulv<mode>4"
9953 [(set (reg:CCO FLAGS_REG)
9954 (eq:CCO (mult:<DWI>
9955 (sign_extend:<DWI>
9956 (match_operand:SWI48 1 "nonimmediate_operand" "%rm,0"))
9957 (sign_extend:<DWI>
9958 (match_operand:SWI48 2 "x86_64_sext_operand" "We,mr")))
9959 (sign_extend:<DWI>
9960 (mult:SWI48 (match_dup 1) (match_dup 2)))))
9961 (set (match_operand:SWI48 0 "register_operand" "=r,r")
9962 (mult:SWI48 (match_dup 1) (match_dup 2)))]
9963 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
9964 "@
9965 imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9966 imul{<imodesuffix>}\t{%2, %0|%0, %2}"
9967 [(set_attr "type" "imul")
9968 (set_attr "prefix_0f" "0,1")
9969 (set (attr "athlon_decode")
9970 (cond [(eq_attr "cpu" "athlon")
9971 (const_string "vector")
9972 (eq_attr "alternative" "0")
9973 (const_string "vector")
9974 (and (eq_attr "alternative" "1")
9975 (match_operand 1 "memory_operand"))
9976 (const_string "vector")]
9977 (const_string "direct")))
9978 (set (attr "amdfam10_decode")
9979 (cond [(and (eq_attr "alternative" "1")
9980 (match_operand 1 "memory_operand"))
9981 (const_string "vector")]
9982 (const_string "direct")))
9983 (set_attr "bdver1_decode" "direct")
9984 (set_attr "mode" "<MODE>")])
9985
9986 (define_insn "*mulvhi4"
9987 [(set (reg:CCO FLAGS_REG)
9988 (eq:CCO (mult:SI
9989 (sign_extend:SI
9990 (match_operand:HI 1 "nonimmediate_operand" "%0"))
9991 (sign_extend:SI
9992 (match_operand:HI 2 "nonimmediate_operand" "mr")))
9993 (sign_extend:SI
9994 (mult:HI (match_dup 1) (match_dup 2)))))
9995 (set (match_operand:HI 0 "register_operand" "=r")
9996 (mult:HI (match_dup 1) (match_dup 2)))]
9997 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
9998 "imul{w}\t{%2, %0|%0, %2}"
9999 [(set_attr "type" "imul")
10000 (set_attr "prefix_0f" "1")
10001 (set_attr "athlon_decode" "vector")
10002 (set_attr "amdfam10_decode" "direct")
10003 (set_attr "bdver1_decode" "double")
10004 (set_attr "mode" "HI")])
10005
10006 (define_insn "*mulv<mode>4_1"
10007 [(set (reg:CCO FLAGS_REG)
10008 (eq:CCO (mult:<DWI>
10009 (sign_extend:<DWI>
10010 (match_operand:SWI248 1 "nonimmediate_operand" "rm,rm"))
10011 (match_operand:<DWI> 3 "const_int_operand" "K,i"))
10012 (sign_extend:<DWI>
10013 (mult:SWI248 (match_dup 1)
10014 (match_operand:SWI248 2
10015 "<immediate_operand>" "K,<i>")))))
10016 (set (match_operand:SWI248 0 "register_operand" "=r,r")
10017 (mult:SWI248 (match_dup 1) (match_dup 2)))]
10018 "!(MEM_P (operands[1]) && MEM_P (operands[2]))
10019 && CONST_INT_P (operands[2])
10020 && INTVAL (operands[2]) == INTVAL (operands[3])"
10021 "imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
10022 [(set_attr "type" "imul")
10023 (set (attr "prefix_0f")
10024 (if_then_else
10025 (match_test "<MODE>mode == HImode")
10026 (const_string "0")
10027 (const_string "*")))
10028 (set (attr "athlon_decode")
10029 (cond [(eq_attr "cpu" "athlon")
10030 (const_string "vector")
10031 (eq_attr "alternative" "1")
10032 (const_string "vector")]
10033 (const_string "direct")))
10034 (set (attr "amdfam10_decode")
10035 (cond [(ior (match_test "<MODE>mode == HImode")
10036 (match_operand 1 "memory_operand"))
10037 (const_string "vector")]
10038 (const_string "direct")))
10039 (set (attr "bdver1_decode")
10040 (if_then_else
10041 (match_test "<MODE>mode == HImode")
10042 (const_string "double")
10043 (const_string "direct")))
10044 (set_attr "mode" "<MODE>")
10045 (set (attr "length_immediate")
10046 (cond [(eq_attr "alternative" "0")
10047 (const_string "1")
10048 (match_test "<MODE_SIZE> == 8")
10049 (const_string "4")]
10050 (const_string "<MODE_SIZE>")))])
10051
10052 (define_expand "umulv<mode>4"
10053 [(parallel [(set (reg:CCO FLAGS_REG)
10054 (eq:CCO (mult:<DWI>
10055 (zero_extend:<DWI>
10056 (match_operand:SWI248 1
10057 "nonimmediate_operand"))
10058 (zero_extend:<DWI>
10059 (match_operand:SWI248 2
10060 "nonimmediate_operand")))
10061 (zero_extend:<DWI>
10062 (mult:SWI248 (match_dup 1) (match_dup 2)))))
10063 (set (match_operand:SWI248 0 "register_operand")
10064 (mult:SWI248 (match_dup 1) (match_dup 2)))
10065 (clobber (scratch:SWI248))])
10066 (set (pc) (if_then_else
10067 (eq (reg:CCO FLAGS_REG) (const_int 0))
10068 (label_ref (match_operand 3))
10069 (pc)))]
10070 ""
10071 {
10072 if (MEM_P (operands[1]) && MEM_P (operands[2]))
10073 operands[1] = force_reg (<MODE>mode, operands[1]);
10074 })
10075
10076 (define_insn "*umulv<mode>4"
10077 [(set (reg:CCO FLAGS_REG)
10078 (eq:CCO (mult:<DWI>
10079 (zero_extend:<DWI>
10080 (match_operand:SWI248 1 "nonimmediate_operand" "%0"))
10081 (zero_extend:<DWI>
10082 (match_operand:SWI248 2 "nonimmediate_operand" "rm")))
10083 (zero_extend:<DWI>
10084 (mult:SWI248 (match_dup 1) (match_dup 2)))))
10085 (set (match_operand:SWI248 0 "register_operand" "=a")
10086 (mult:SWI248 (match_dup 1) (match_dup 2)))
10087 (clobber (match_scratch:SWI248 3 "=d"))]
10088 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
10089 "mul{<imodesuffix>}\t%2"
10090 [(set_attr "type" "imul")
10091 (set_attr "length_immediate" "0")
10092 (set (attr "athlon_decode")
10093 (if_then_else (eq_attr "cpu" "athlon")
10094 (const_string "vector")
10095 (const_string "double")))
10096 (set_attr "amdfam10_decode" "double")
10097 (set_attr "bdver1_decode" "direct")
10098 (set_attr "mode" "<MODE>")])
10099
10100 (define_expand "<u>mulvqi4"
10101 [(parallel [(set (reg:CCO FLAGS_REG)
10102 (eq:CCO (mult:HI
10103 (any_extend:HI
10104 (match_operand:QI 1 "nonimmediate_operand"))
10105 (any_extend:HI
10106 (match_operand:QI 2 "nonimmediate_operand")))
10107 (any_extend:HI
10108 (mult:QI (match_dup 1) (match_dup 2)))))
10109 (set (match_operand:QI 0 "register_operand")
10110 (mult:QI (match_dup 1) (match_dup 2)))])
10111 (set (pc) (if_then_else
10112 (eq (reg:CCO FLAGS_REG) (const_int 0))
10113 (label_ref (match_operand 3))
10114 (pc)))]
10115 "TARGET_QIMODE_MATH"
10116 {
10117 if (MEM_P (operands[1]) && MEM_P (operands[2]))
10118 operands[1] = force_reg (QImode, operands[1]);
10119 })
10120
10121 (define_insn "*<u>mulvqi4"
10122 [(set (reg:CCO FLAGS_REG)
10123 (eq:CCO (mult:HI
10124 (any_extend:HI
10125 (match_operand:QI 1 "nonimmediate_operand" "%0"))
10126 (any_extend:HI
10127 (match_operand:QI 2 "nonimmediate_operand" "qm")))
10128 (any_extend:HI
10129 (mult:QI (match_dup 1) (match_dup 2)))))
10130 (set (match_operand:QI 0 "register_operand" "=a")
10131 (mult:QI (match_dup 1) (match_dup 2)))]
10132 "TARGET_QIMODE_MATH
10133 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10134 "<sgnprefix>mul{b}\t%2"
10135 [(set_attr "type" "imul")
10136 (set_attr "length_immediate" "0")
10137 (set (attr "athlon_decode")
10138 (if_then_else (eq_attr "cpu" "athlon")
10139 (const_string "vector")
10140 (const_string "direct")))
10141 (set_attr "amdfam10_decode" "direct")
10142 (set_attr "bdver1_decode" "direct")
10143 (set_attr "mode" "QI")])
10144
10145 (define_expand "<u>mul<mode><dwi>3"
10146 [(parallel [(set (match_operand:<DWI> 0 "register_operand")
10147 (mult:<DWI>
10148 (any_extend:<DWI>
10149 (match_operand:DWIH 1 "register_operand"))
10150 (any_extend:<DWI>
10151 (match_operand:DWIH 2 "nonimmediate_operand"))))
10152 (clobber (reg:CC FLAGS_REG))])])
10153
10154 (define_expand "<u>mulqihi3"
10155 [(parallel [(set (match_operand:HI 0 "register_operand")
10156 (mult:HI
10157 (any_extend:HI
10158 (match_operand:QI 1 "register_operand"))
10159 (any_extend:HI
10160 (match_operand:QI 2 "nonimmediate_operand"))))
10161 (clobber (reg:CC FLAGS_REG))])]
10162 "TARGET_QIMODE_MATH")
10163
10164 (define_insn "*bmi2_umul<mode><dwi>3_1"
10165 [(set (match_operand:DWIH 0 "register_operand" "=r")
10166 (mult:DWIH
10167 (match_operand:DWIH 2 "register_operand" "%d")
10168 (match_operand:DWIH 3 "nonimmediate_operand" "rm")))
10169 (set (match_operand:DWIH 1 "register_operand" "=r")
10170 (umul_highpart:DWIH (match_dup 2) (match_dup 3)))]
10171 "TARGET_BMI2"
10172 "mulx\t{%3, %0, %1|%1, %0, %3}"
10173 [(set_attr "type" "imulx")
10174 (set_attr "prefix" "vex")
10175 (set_attr "mode" "<MODE>")])
10176
10177 ;; Tweak *bmi2_umul<mode><dwi>3_1 to eliminate following mov.
10178 (define_peephole2
10179 [(parallel [(set (match_operand:DWIH 0 "general_reg_operand")
10180 (mult:DWIH (match_operand:DWIH 2 "register_operand")
10181 (match_operand:DWIH 3 "nonimmediate_operand")))
10182 (set (match_operand:DWIH 1 "general_reg_operand")
10183 (umul_highpart:DWIH (match_dup 2) (match_dup 3)))])
10184 (set (match_operand:DWIH 4 "general_reg_operand")
10185 (match_operand:DWIH 5 "general_reg_operand"))]
10186 "TARGET_BMI2
10187 && ((REGNO (operands[5]) == REGNO (operands[0])
10188 && REGNO (operands[1]) != REGNO (operands[4]))
10189 || (REGNO (operands[5]) == REGNO (operands[1])
10190 && REGNO (operands[0]) != REGNO (operands[4])))
10191 && peep2_reg_dead_p (2, operands[5])"
10192 [(parallel [(set (match_dup 0) (mult:DWIH (match_dup 2) (match_dup 3)))
10193 (set (match_dup 1)
10194 (umul_highpart:DWIH (match_dup 2) (match_dup 3)))])]
10195 {
10196 if (REGNO (operands[5]) == REGNO (operands[0]))
10197 operands[0] = operands[4];
10198 else
10199 operands[1] = operands[4];
10200 })
10201
10202 (define_insn "*umul<mode><dwi>3_1"
10203 [(set (match_operand:<DWI> 0 "register_operand" "=r,A")
10204 (mult:<DWI>
10205 (zero_extend:<DWI>
10206 (match_operand:DWIH 1 "register_operand" "%d,a"))
10207 (zero_extend:<DWI>
10208 (match_operand:DWIH 2 "nonimmediate_operand" "rm,rm"))))
10209 (clobber (reg:CC FLAGS_REG))]
10210 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
10211 "@
10212 #
10213 mul{<imodesuffix>}\t%2"
10214 [(set_attr "isa" "bmi2,*")
10215 (set_attr "type" "imulx,imul")
10216 (set_attr "length_immediate" "*,0")
10217 (set (attr "athlon_decode")
10218 (cond [(eq_attr "alternative" "1")
10219 (if_then_else (eq_attr "cpu" "athlon")
10220 (const_string "vector")
10221 (const_string "double"))]
10222 (const_string "*")))
10223 (set_attr "amdfam10_decode" "*,double")
10224 (set_attr "bdver1_decode" "*,direct")
10225 (set_attr "prefix" "vex,orig")
10226 (set_attr "mode" "<MODE>")])
10227
10228 ;; Convert mul to the mulx pattern to avoid flags dependency.
10229 (define_split
10230 [(set (match_operand:<DWI> 0 "register_operand")
10231 (mult:<DWI>
10232 (zero_extend:<DWI>
10233 (match_operand:DWIH 1 "register_operand"))
10234 (zero_extend:<DWI>
10235 (match_operand:DWIH 2 "nonimmediate_operand"))))
10236 (clobber (reg:CC FLAGS_REG))]
10237 "TARGET_BMI2 && reload_completed
10238 && REGNO (operands[1]) == DX_REG"
10239 [(parallel [(set (match_dup 3)
10240 (mult:DWIH (match_dup 1) (match_dup 2)))
10241 (set (match_dup 4)
10242 (umul_highpart:DWIH (match_dup 1) (match_dup 2)))])]
10243 {
10244 split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
10245
10246 operands[5] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
10247 })
10248
10249 (define_insn "*mul<mode><dwi>3_1"
10250 [(set (match_operand:<DWI> 0 "register_operand" "=A")
10251 (mult:<DWI>
10252 (sign_extend:<DWI>
10253 (match_operand:DWIH 1 "register_operand" "%a"))
10254 (sign_extend:<DWI>
10255 (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
10256 (clobber (reg:CC FLAGS_REG))]
10257 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
10258 "imul{<imodesuffix>}\t%2"
10259 [(set_attr "type" "imul")
10260 (set_attr "length_immediate" "0")
10261 (set (attr "athlon_decode")
10262 (if_then_else (eq_attr "cpu" "athlon")
10263 (const_string "vector")
10264 (const_string "double")))
10265 (set_attr "amdfam10_decode" "double")
10266 (set_attr "bdver1_decode" "direct")
10267 (set_attr "mode" "<MODE>")])
10268
10269 (define_insn "*<u>mulqihi3_1"
10270 [(set (match_operand:HI 0 "register_operand" "=a")
10271 (mult:HI
10272 (any_extend:HI
10273 (match_operand:QI 1 "register_operand" "%0"))
10274 (any_extend:HI
10275 (match_operand:QI 2 "nonimmediate_operand" "qm"))))
10276 (clobber (reg:CC FLAGS_REG))]
10277 "TARGET_QIMODE_MATH
10278 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10279 "<sgnprefix>mul{b}\t%2"
10280 [(set_attr "type" "imul")
10281 (set_attr "length_immediate" "0")
10282 (set (attr "athlon_decode")
10283 (if_then_else (eq_attr "cpu" "athlon")
10284 (const_string "vector")
10285 (const_string "direct")))
10286 (set_attr "amdfam10_decode" "direct")
10287 (set_attr "bdver1_decode" "direct")
10288 (set_attr "mode" "QI")])
10289
10290 ;; Widening multiplication peephole2s to tweak register allocation.
10291 ;; mov imm,%rdx; mov %rdi,%rax; mulq %rdx -> mov imm,%rax; mulq %rdi
10292 (define_peephole2
10293 [(set (match_operand:DWIH 0 "general_reg_operand")
10294 (match_operand:DWIH 1 "immediate_operand"))
10295 (set (match_operand:DWIH 2 "general_reg_operand")
10296 (match_operand:DWIH 3 "general_reg_operand"))
10297 (parallel [(set (match_operand:<DWI> 4 "general_reg_operand")
10298 (mult:<DWI> (zero_extend:<DWI> (match_dup 2))
10299 (zero_extend:<DWI> (match_dup 0))))
10300 (clobber (reg:CC FLAGS_REG))])]
10301 "REGNO (operands[3]) != AX_REG
10302 && REGNO (operands[0]) != REGNO (operands[2])
10303 && REGNO (operands[0]) != REGNO (operands[3])
10304 && (REGNO (operands[0]) == REGNO (operands[4])
10305 || REGNO (operands[0]) == DX_REG
10306 || peep2_reg_dead_p (3, operands[0]))"
10307 [(set (match_dup 2) (match_dup 1))
10308 (parallel [(set (match_dup 4)
10309 (mult:<DWI> (zero_extend:<DWI> (match_dup 2))
10310 (zero_extend:<DWI> (match_dup 3))))
10311 (clobber (reg:CC FLAGS_REG))])])
10312
10313 ;; mov imm,%rax; mov %rdi,%rdx; mulx %rax -> mov imm,%rdx; mulx %rdi
10314 (define_peephole2
10315 [(set (match_operand:DWIH 0 "general_reg_operand")
10316 (match_operand:DWIH 1 "immediate_operand"))
10317 (set (match_operand:DWIH 2 "general_reg_operand")
10318 (match_operand:DWIH 3 "general_reg_operand"))
10319 (parallel [(set (match_operand:DWIH 4 "general_reg_operand")
10320 (mult:DWIH (match_dup 2) (match_dup 0)))
10321 (set (match_operand:DWIH 5 "general_reg_operand")
10322 (umul_highpart:DWIH (match_dup 2) (match_dup 0)))])]
10323 "REGNO (operands[3]) != DX_REG
10324 && REGNO (operands[0]) != REGNO (operands[2])
10325 && REGNO (operands[0]) != REGNO (operands[3])
10326 && (REGNO (operands[0]) == REGNO (operands[4])
10327 || REGNO (operands[0]) == REGNO (operands[5])
10328 || peep2_reg_dead_p (3, operands[0]))
10329 && (REGNO (operands[2]) == REGNO (operands[4])
10330 || REGNO (operands[2]) == REGNO (operands[5])
10331 || peep2_reg_dead_p (3, operands[2]))"
10332 [(set (match_dup 2) (match_dup 1))
10333 (parallel [(set (match_dup 4)
10334 (mult:DWIH (match_dup 2) (match_dup 3)))
10335 (set (match_dup 5)
10336 (umul_highpart:DWIH (match_dup 2) (match_dup 3)))])])
10337
10338 ;; Highpart multiplication patterns
10339 (define_insn "<s>mul<mode>3_highpart"
10340 [(set (match_operand:DWIH 0 "register_operand" "=d")
10341 (any_mul_highpart:DWIH
10342 (match_operand:DWIH 1 "register_operand" "%a")
10343 (match_operand:DWIH 2 "nonimmediate_operand" "rm")))
10344 (clobber (match_scratch:DWIH 3 "=1"))
10345 (clobber (reg:CC FLAGS_REG))]
10346 ""
10347 "<sgnprefix>mul{<imodesuffix>}\t%2"
10348 [(set_attr "type" "imul")
10349 (set_attr "length_immediate" "0")
10350 (set (attr "athlon_decode")
10351 (if_then_else (eq_attr "cpu" "athlon")
10352 (const_string "vector")
10353 (const_string "double")))
10354 (set_attr "amdfam10_decode" "double")
10355 (set_attr "bdver1_decode" "direct")
10356 (set_attr "mode" "<MODE>")])
10357
10358 (define_insn "*<s>mulsi3_highpart_zext"
10359 [(set (match_operand:DI 0 "register_operand" "=d")
10360 (zero_extend:DI
10361 (any_mul_highpart:SI
10362 (match_operand:SI 1 "register_operand" "%a")
10363 (match_operand:SI 2 "nonimmediate_operand" "rm"))))
10364 (clobber (match_scratch:SI 3 "=1"))
10365 (clobber (reg:CC FLAGS_REG))]
10366 "TARGET_64BIT"
10367 "<sgnprefix>mul{l}\t%2"
10368 [(set_attr "type" "imul")
10369 (set_attr "length_immediate" "0")
10370 (set (attr "athlon_decode")
10371 (if_then_else (eq_attr "cpu" "athlon")
10372 (const_string "vector")
10373 (const_string "double")))
10374 (set_attr "amdfam10_decode" "double")
10375 (set_attr "bdver1_decode" "direct")
10376 (set_attr "mode" "SI")])
10377
10378 (define_insn "*<s>muldi3_highpart_1"
10379 [(set (match_operand:DI 0 "register_operand" "=d")
10380 (truncate:DI
10381 (lshiftrt:TI
10382 (mult:TI
10383 (any_extend:TI
10384 (match_operand:DI 1 "nonimmediate_operand" "%a"))
10385 (any_extend:TI
10386 (match_operand:DI 2 "nonimmediate_operand" "rm")))
10387 (const_int 64))))
10388 (clobber (match_scratch:DI 3 "=1"))
10389 (clobber (reg:CC FLAGS_REG))]
10390 "TARGET_64BIT
10391 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10392 "<sgnprefix>mul{q}\t%2"
10393 [(set_attr "type" "imul")
10394 (set_attr "length_immediate" "0")
10395 (set (attr "athlon_decode")
10396 (if_then_else (eq_attr "cpu" "athlon")
10397 (const_string "vector")
10398 (const_string "double")))
10399 (set_attr "amdfam10_decode" "double")
10400 (set_attr "bdver1_decode" "direct")
10401 (set_attr "mode" "DI")])
10402
10403 (define_insn "*<s>mulsi3_highpart_zext"
10404 [(set (match_operand:DI 0 "register_operand" "=d")
10405 (zero_extend:DI (truncate:SI
10406 (lshiftrt:DI
10407 (mult:DI (any_extend:DI
10408 (match_operand:SI 1 "nonimmediate_operand" "%a"))
10409 (any_extend:DI
10410 (match_operand:SI 2 "nonimmediate_operand" "rm")))
10411 (const_int 32)))))
10412 (clobber (match_scratch:SI 3 "=1"))
10413 (clobber (reg:CC FLAGS_REG))]
10414 "TARGET_64BIT
10415 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10416 "<sgnprefix>mul{l}\t%2"
10417 [(set_attr "type" "imul")
10418 (set_attr "length_immediate" "0")
10419 (set (attr "athlon_decode")
10420 (if_then_else (eq_attr "cpu" "athlon")
10421 (const_string "vector")
10422 (const_string "double")))
10423 (set_attr "amdfam10_decode" "double")
10424 (set_attr "bdver1_decode" "direct")
10425 (set_attr "mode" "SI")])
10426
10427 (define_insn "*<s>mulsi3_highpart_1"
10428 [(set (match_operand:SI 0 "register_operand" "=d")
10429 (truncate:SI
10430 (lshiftrt:DI
10431 (mult:DI
10432 (any_extend:DI
10433 (match_operand:SI 1 "nonimmediate_operand" "%a"))
10434 (any_extend:DI
10435 (match_operand:SI 2 "nonimmediate_operand" "rm")))
10436 (const_int 32))))
10437 (clobber (match_scratch:SI 3 "=1"))
10438 (clobber (reg:CC FLAGS_REG))]
10439 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
10440 "<sgnprefix>mul{l}\t%2"
10441 [(set_attr "type" "imul")
10442 (set_attr "length_immediate" "0")
10443 (set (attr "athlon_decode")
10444 (if_then_else (eq_attr "cpu" "athlon")
10445 (const_string "vector")
10446 (const_string "double")))
10447 (set_attr "amdfam10_decode" "double")
10448 (set_attr "bdver1_decode" "direct")
10449 (set_attr "mode" "SI")])
10450
10451 ;; Highpart multiplication peephole2s to tweak register allocation.
10452 ;; mov imm,%rdx; mov %rdi,%rax; imulq %rdx -> mov imm,%rax; imulq %rdi
10453 (define_peephole2
10454 [(set (match_operand:SWI48 0 "general_reg_operand")
10455 (match_operand:SWI48 1 "immediate_operand"))
10456 (set (match_operand:SWI48 2 "general_reg_operand")
10457 (match_operand:SWI48 3 "general_reg_operand"))
10458 (parallel [(set (match_operand:SWI48 4 "general_reg_operand")
10459 (any_mul_highpart:SWI48 (match_dup 2) (match_dup 0)))
10460 (clobber (match_dup 2))
10461 (clobber (reg:CC FLAGS_REG))])]
10462 "REGNO (operands[3]) != AX_REG
10463 && REGNO (operands[0]) != REGNO (operands[2])
10464 && REGNO (operands[0]) != REGNO (operands[3])
10465 && (REGNO (operands[0]) == REGNO (operands[4])
10466 || peep2_reg_dead_p (3, operands[0]))"
10467 [(set (match_dup 2) (match_dup 1))
10468 (parallel [(set (match_dup 4)
10469 (any_mul_highpart:SWI48 (match_dup 2) (match_dup 3)))
10470 (clobber (match_dup 2))
10471 (clobber (reg:CC FLAGS_REG))])])
10472
10473 (define_peephole2
10474 [(set (match_operand:SI 0 "general_reg_operand")
10475 (match_operand:SI 1 "immediate_operand"))
10476 (set (match_operand:SI 2 "general_reg_operand")
10477 (match_operand:SI 3 "general_reg_operand"))
10478 (parallel [(set (match_operand:DI 4 "general_reg_operand")
10479 (zero_extend:DI
10480 (any_mul_highpart:SI (match_dup 2) (match_dup 0))))
10481 (clobber (match_dup 2))
10482 (clobber (reg:CC FLAGS_REG))])]
10483 "TARGET_64BIT
10484 && REGNO (operands[3]) != AX_REG
10485 && REGNO (operands[0]) != REGNO (operands[2])
10486 && REGNO (operands[2]) != REGNO (operands[3])
10487 && REGNO (operands[0]) != REGNO (operands[3])
10488 && (REGNO (operands[0]) == REGNO (operands[4])
10489 || peep2_reg_dead_p (3, operands[0]))"
10490 [(set (match_dup 2) (match_dup 1))
10491 (parallel [(set (match_dup 4)
10492 (zero_extend:DI
10493 (any_mul_highpart:SI (match_dup 2) (match_dup 3))))
10494 (clobber (match_dup 2))
10495 (clobber (reg:CC FLAGS_REG))])])
10496
10497 ;; The patterns that match these are at the end of this file.
10498
10499 (define_expand "mulxf3"
10500 [(set (match_operand:XF 0 "register_operand")
10501 (mult:XF (match_operand:XF 1 "register_operand")
10502 (match_operand:XF 2 "register_operand")))]
10503 "TARGET_80387")
10504
10505 (define_expand "mulhf3"
10506 [(set (match_operand:HF 0 "register_operand")
10507 (mult:HF (match_operand:HF 1 "register_operand")
10508 (match_operand:HF 2 "nonimmediate_operand")))]
10509 "TARGET_AVX512FP16")
10510
10511 (define_expand "mul<mode>3"
10512 [(set (match_operand:MODEF 0 "register_operand")
10513 (mult:MODEF (match_operand:MODEF 1 "register_operand")
10514 (match_operand:MODEF 2 "nonimmediate_operand")))]
10515 "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
10516 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")
10517 \f
10518 ;; Divide instructions
10519
10520 ;; The patterns that match these are at the end of this file.
10521
10522 (define_expand "divxf3"
10523 [(set (match_operand:XF 0 "register_operand")
10524 (div:XF (match_operand:XF 1 "register_operand")
10525 (match_operand:XF 2 "register_operand")))]
10526 "TARGET_80387")
10527
10528 /* There is no more precision loss than Newton-Rhapson approximation
10529 when using HFmode rcp/rsqrt, so do the transformation directly under
10530 TARGET_RECIP_DIV and fast-math. */
10531 (define_expand "divhf3"
10532 [(set (match_operand:HF 0 "register_operand")
10533 (div:HF (match_operand:HF 1 "register_operand")
10534 (match_operand:HF 2 "nonimmediate_operand")))]
10535 "TARGET_AVX512FP16"
10536 {
10537 if (TARGET_RECIP_DIV
10538 && optimize_insn_for_speed_p ()
10539 && flag_finite_math_only && !flag_trapping_math
10540 && flag_unsafe_math_optimizations)
10541 {
10542 rtx op = gen_reg_rtx (HFmode);
10543 operands[2] = force_reg (HFmode, operands[2]);
10544 emit_insn (gen_rcphf2 (op, operands[2]));
10545 emit_insn (gen_mulhf3 (operands[0], operands[1], op));
10546 DONE;
10547 }
10548 })
10549
10550 (define_expand "div<mode>3"
10551 [(set (match_operand:MODEF 0 "register_operand")
10552 (div:MODEF (match_operand:MODEF 1 "register_operand")
10553 (match_operand:MODEF 2 "nonimmediate_operand")))]
10554 "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
10555 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
10556 {
10557 if (<MODE>mode == SFmode
10558 && TARGET_SSE && TARGET_SSE_MATH
10559 && TARGET_RECIP_DIV
10560 && optimize_insn_for_speed_p ()
10561 && flag_finite_math_only && !flag_trapping_math
10562 && flag_unsafe_math_optimizations)
10563 {
10564 ix86_emit_swdivsf (operands[0], operands[1],
10565 operands[2], SFmode);
10566 DONE;
10567 }
10568 })
10569 \f
10570 ;; Divmod instructions.
10571
10572 (define_code_iterator any_div [div udiv])
10573 (define_code_attr paired_mod [(div "mod") (udiv "umod")])
10574
10575 (define_expand "<u>divmod<mode>4"
10576 [(parallel [(set (match_operand:SWIM248 0 "register_operand")
10577 (any_div:SWIM248
10578 (match_operand:SWIM248 1 "register_operand")
10579 (match_operand:SWIM248 2 "nonimmediate_operand")))
10580 (set (match_operand:SWIM248 3 "register_operand")
10581 (<paired_mod>:SWIM248 (match_dup 1) (match_dup 2)))
10582 (clobber (reg:CC FLAGS_REG))])])
10583
10584 ;; Split with 8bit unsigned divide:
10585 ;; if (dividend an divisor are in [0-255])
10586 ;; use 8bit unsigned integer divide
10587 ;; else
10588 ;; use original integer divide
10589 (define_split
10590 [(set (match_operand:SWI48 0 "register_operand")
10591 (any_div:SWI48 (match_operand:SWI48 2 "register_operand")
10592 (match_operand:SWI48 3 "nonimmediate_operand")))
10593 (set (match_operand:SWI48 1 "register_operand")
10594 (<paired_mod>:SWI48 (match_dup 2) (match_dup 3)))
10595 (clobber (reg:CC FLAGS_REG))]
10596 "TARGET_USE_8BIT_IDIV
10597 && TARGET_QIMODE_MATH
10598 && can_create_pseudo_p ()
10599 && !optimize_insn_for_size_p ()"
10600 [(const_int 0)]
10601 "ix86_split_idivmod (<MODE>mode, operands, <u_bool>); DONE;")
10602
10603 (define_split
10604 [(set (match_operand:DI 0 "register_operand")
10605 (zero_extend:DI
10606 (any_div:SI (match_operand:SI 2 "register_operand")
10607 (match_operand:SI 3 "nonimmediate_operand"))))
10608 (set (match_operand:SI 1 "register_operand")
10609 (<paired_mod>:SI (match_dup 2) (match_dup 3)))
10610 (clobber (reg:CC FLAGS_REG))]
10611 "TARGET_64BIT
10612 && TARGET_USE_8BIT_IDIV
10613 && TARGET_QIMODE_MATH
10614 && can_create_pseudo_p ()
10615 && !optimize_insn_for_size_p ()"
10616 [(const_int 0)]
10617 "ix86_split_idivmod (SImode, operands, <u_bool>); DONE;")
10618
10619 (define_split
10620 [(set (match_operand:DI 1 "register_operand")
10621 (zero_extend:DI
10622 (<paired_mod>:SI (match_operand:SI 2 "register_operand")
10623 (match_operand:SI 3 "nonimmediate_operand"))))
10624 (set (match_operand:SI 0 "register_operand")
10625 (any_div:SI (match_dup 2) (match_dup 3)))
10626 (clobber (reg:CC FLAGS_REG))]
10627 "TARGET_64BIT
10628 && TARGET_USE_8BIT_IDIV
10629 && TARGET_QIMODE_MATH
10630 && can_create_pseudo_p ()
10631 && !optimize_insn_for_size_p ()"
10632 [(const_int 0)]
10633 "ix86_split_idivmod (SImode, operands, <u_bool>); DONE;")
10634
10635 (define_insn_and_split "divmod<mode>4_1"
10636 [(set (match_operand:SWI48 0 "register_operand" "=a")
10637 (div:SWI48 (match_operand:SWI48 2 "register_operand" "0")
10638 (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
10639 (set (match_operand:SWI48 1 "register_operand" "=&d")
10640 (mod:SWI48 (match_dup 2) (match_dup 3)))
10641 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
10642 (clobber (reg:CC FLAGS_REG))]
10643 ""
10644 "#"
10645 "reload_completed"
10646 [(parallel [(set (match_dup 1)
10647 (ashiftrt:SWI48 (match_dup 4) (match_dup 5)))
10648 (clobber (reg:CC FLAGS_REG))])
10649 (parallel [(set (match_dup 0)
10650 (div:SWI48 (match_dup 2) (match_dup 3)))
10651 (set (match_dup 1)
10652 (mod:SWI48 (match_dup 2) (match_dup 3)))
10653 (use (match_dup 1))
10654 (clobber (reg:CC FLAGS_REG))])]
10655 {
10656 operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
10657
10658 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
10659 operands[4] = operands[2];
10660 else
10661 {
10662 /* Avoid use of cltd in favor of a mov+shift. */
10663 emit_move_insn (operands[1], operands[2]);
10664 operands[4] = operands[1];
10665 }
10666 }
10667 [(set_attr "type" "multi")
10668 (set_attr "mode" "<MODE>")])
10669
10670 (define_insn_and_split "udivmod<mode>4_1"
10671 [(set (match_operand:SWI48 0 "register_operand" "=a")
10672 (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
10673 (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
10674 (set (match_operand:SWI48 1 "register_operand" "=&d")
10675 (umod:SWI48 (match_dup 2) (match_dup 3)))
10676 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
10677 (clobber (reg:CC FLAGS_REG))]
10678 ""
10679 "#"
10680 "reload_completed"
10681 [(set (match_dup 1) (const_int 0))
10682 (parallel [(set (match_dup 0)
10683 (udiv:SWI48 (match_dup 2) (match_dup 3)))
10684 (set (match_dup 1)
10685 (umod:SWI48 (match_dup 2) (match_dup 3)))
10686 (use (match_dup 1))
10687 (clobber (reg:CC FLAGS_REG))])]
10688 ""
10689 [(set_attr "type" "multi")
10690 (set_attr "mode" "<MODE>")])
10691
10692 (define_insn_and_split "divmodsi4_zext_1"
10693 [(set (match_operand:DI 0 "register_operand" "=a")
10694 (zero_extend:DI
10695 (div:SI (match_operand:SI 2 "register_operand" "0")
10696 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
10697 (set (match_operand:SI 1 "register_operand" "=&d")
10698 (mod:SI (match_dup 2) (match_dup 3)))
10699 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
10700 (clobber (reg:CC FLAGS_REG))]
10701 "TARGET_64BIT"
10702 "#"
10703 "&& reload_completed"
10704 [(parallel [(set (match_dup 1)
10705 (ashiftrt:SI (match_dup 4) (match_dup 5)))
10706 (clobber (reg:CC FLAGS_REG))])
10707 (parallel [(set (match_dup 0)
10708 (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
10709 (set (match_dup 1)
10710 (mod:SI (match_dup 2) (match_dup 3)))
10711 (use (match_dup 1))
10712 (clobber (reg:CC FLAGS_REG))])]
10713 {
10714 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
10715
10716 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
10717 operands[4] = operands[2];
10718 else
10719 {
10720 /* Avoid use of cltd in favor of a mov+shift. */
10721 emit_move_insn (operands[1], operands[2]);
10722 operands[4] = operands[1];
10723 }
10724 }
10725 [(set_attr "type" "multi")
10726 (set_attr "mode" "SI")])
10727
10728 (define_insn_and_split "udivmodsi4_zext_1"
10729 [(set (match_operand:DI 0 "register_operand" "=a")
10730 (zero_extend:DI
10731 (udiv:SI (match_operand:SI 2 "register_operand" "0")
10732 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
10733 (set (match_operand:SI 1 "register_operand" "=&d")
10734 (umod:SI (match_dup 2) (match_dup 3)))
10735 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
10736 (clobber (reg:CC FLAGS_REG))]
10737 "TARGET_64BIT"
10738 "#"
10739 "&& reload_completed"
10740 [(set (match_dup 1) (const_int 0))
10741 (parallel [(set (match_dup 0)
10742 (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
10743 (set (match_dup 1)
10744 (umod:SI (match_dup 2) (match_dup 3)))
10745 (use (match_dup 1))
10746 (clobber (reg:CC FLAGS_REG))])]
10747 ""
10748 [(set_attr "type" "multi")
10749 (set_attr "mode" "SI")])
10750
10751 (define_insn_and_split "divmodsi4_zext_2"
10752 [(set (match_operand:DI 1 "register_operand" "=&d")
10753 (zero_extend:DI
10754 (mod:SI (match_operand:SI 2 "register_operand" "0")
10755 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
10756 (set (match_operand:SI 0 "register_operand" "=a")
10757 (div:SI (match_dup 2) (match_dup 3)))
10758 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
10759 (clobber (reg:CC FLAGS_REG))]
10760 "TARGET_64BIT"
10761 "#"
10762 "&& reload_completed"
10763 [(parallel [(set (match_dup 6)
10764 (ashiftrt:SI (match_dup 4) (match_dup 5)))
10765 (clobber (reg:CC FLAGS_REG))])
10766 (parallel [(set (match_dup 1)
10767 (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
10768 (set (match_dup 0)
10769 (div:SI (match_dup 2) (match_dup 3)))
10770 (use (match_dup 6))
10771 (clobber (reg:CC FLAGS_REG))])]
10772 {
10773 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
10774 operands[6] = gen_lowpart (SImode, operands[1]);
10775
10776 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
10777 operands[4] = operands[2];
10778 else
10779 {
10780 /* Avoid use of cltd in favor of a mov+shift. */
10781 emit_move_insn (operands[6], operands[2]);
10782 operands[4] = operands[6];
10783 }
10784 }
10785 [(set_attr "type" "multi")
10786 (set_attr "mode" "SI")])
10787
10788 (define_insn_and_split "udivmodsi4_zext_2"
10789 [(set (match_operand:DI 1 "register_operand" "=&d")
10790 (zero_extend:DI
10791 (umod:SI (match_operand:SI 2 "register_operand" "0")
10792 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
10793 (set (match_operand:SI 0 "register_operand" "=a")
10794 (udiv:SI (match_dup 2) (match_dup 3)))
10795 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
10796 (clobber (reg:CC FLAGS_REG))]
10797 "TARGET_64BIT"
10798 "#"
10799 "&& reload_completed"
10800 [(set (match_dup 4) (const_int 0))
10801 (parallel [(set (match_dup 1)
10802 (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
10803 (set (match_dup 0)
10804 (udiv:SI (match_dup 2) (match_dup 3)))
10805 (use (match_dup 4))
10806 (clobber (reg:CC FLAGS_REG))])]
10807 "operands[4] = gen_lowpart (SImode, operands[1]);"
10808 [(set_attr "type" "multi")
10809 (set_attr "mode" "SI")])
10810
10811 (define_insn_and_split "*divmod<mode>4"
10812 [(set (match_operand:SWIM248 0 "register_operand" "=a")
10813 (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
10814 (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
10815 (set (match_operand:SWIM248 1 "register_operand" "=&d")
10816 (mod:SWIM248 (match_dup 2) (match_dup 3)))
10817 (clobber (reg:CC FLAGS_REG))]
10818 ""
10819 "#"
10820 "reload_completed"
10821 [(parallel [(set (match_dup 1)
10822 (ashiftrt:SWIM248 (match_dup 4) (match_dup 5)))
10823 (clobber (reg:CC FLAGS_REG))])
10824 (parallel [(set (match_dup 0)
10825 (div:SWIM248 (match_dup 2) (match_dup 3)))
10826 (set (match_dup 1)
10827 (mod:SWIM248 (match_dup 2) (match_dup 3)))
10828 (use (match_dup 1))
10829 (clobber (reg:CC FLAGS_REG))])]
10830 {
10831 operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
10832
10833 if (<MODE>mode != HImode
10834 && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD))
10835 operands[4] = operands[2];
10836 else
10837 {
10838 /* Avoid use of cltd in favor of a mov+shift. */
10839 emit_move_insn (operands[1], operands[2]);
10840 operands[4] = operands[1];
10841 }
10842 }
10843 [(set_attr "type" "multi")
10844 (set_attr "mode" "<MODE>")])
10845
10846 (define_insn_and_split "*udivmod<mode>4"
10847 [(set (match_operand:SWIM248 0 "register_operand" "=a")
10848 (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
10849 (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
10850 (set (match_operand:SWIM248 1 "register_operand" "=&d")
10851 (umod:SWIM248 (match_dup 2) (match_dup 3)))
10852 (clobber (reg:CC FLAGS_REG))]
10853 ""
10854 "#"
10855 "reload_completed"
10856 [(set (match_dup 1) (const_int 0))
10857 (parallel [(set (match_dup 0)
10858 (udiv:SWIM248 (match_dup 2) (match_dup 3)))
10859 (set (match_dup 1)
10860 (umod:SWIM248 (match_dup 2) (match_dup 3)))
10861 (use (match_dup 1))
10862 (clobber (reg:CC FLAGS_REG))])]
10863 ""
10864 [(set_attr "type" "multi")
10865 (set_attr "mode" "<MODE>")])
10866
10867 ;; Optimize division or modulo by constant power of 2, if the constant
10868 ;; materializes only after expansion.
10869 (define_insn_and_split "*udivmod<mode>4_pow2"
10870 [(set (match_operand:SWI48 0 "register_operand" "=r")
10871 (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
10872 (match_operand:SWI48 3 "const_int_operand")))
10873 (set (match_operand:SWI48 1 "register_operand" "=r")
10874 (umod:SWI48 (match_dup 2) (match_dup 3)))
10875 (clobber (reg:CC FLAGS_REG))]
10876 "IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
10877 "#"
10878 "&& reload_completed"
10879 [(set (match_dup 1) (match_dup 2))
10880 (parallel [(set (match_dup 0) (lshiftrt:<MODE> (match_dup 2) (match_dup 4)))
10881 (clobber (reg:CC FLAGS_REG))])
10882 (parallel [(set (match_dup 1) (and:<MODE> (match_dup 1) (match_dup 5)))
10883 (clobber (reg:CC FLAGS_REG))])]
10884 {
10885 int v = exact_log2 (UINTVAL (operands[3]));
10886 operands[4] = GEN_INT (v);
10887 operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
10888 }
10889 [(set_attr "type" "multi")
10890 (set_attr "mode" "<MODE>")])
10891
10892 (define_insn_and_split "*divmodsi4_zext_1"
10893 [(set (match_operand:DI 0 "register_operand" "=a")
10894 (zero_extend:DI
10895 (div:SI (match_operand:SI 2 "register_operand" "0")
10896 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
10897 (set (match_operand:SI 1 "register_operand" "=&d")
10898 (mod:SI (match_dup 2) (match_dup 3)))
10899 (clobber (reg:CC FLAGS_REG))]
10900 "TARGET_64BIT"
10901 "#"
10902 "&& reload_completed"
10903 [(parallel [(set (match_dup 1)
10904 (ashiftrt:SI (match_dup 4) (match_dup 5)))
10905 (clobber (reg:CC FLAGS_REG))])
10906 (parallel [(set (match_dup 0)
10907 (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
10908 (set (match_dup 1)
10909 (mod:SI (match_dup 2) (match_dup 3)))
10910 (use (match_dup 1))
10911 (clobber (reg:CC FLAGS_REG))])]
10912 {
10913 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
10914
10915 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
10916 operands[4] = operands[2];
10917 else
10918 {
10919 /* Avoid use of cltd in favor of a mov+shift. */
10920 emit_move_insn (operands[1], operands[2]);
10921 operands[4] = operands[1];
10922 }
10923 }
10924 [(set_attr "type" "multi")
10925 (set_attr "mode" "SI")])
10926
10927 (define_insn_and_split "*udivmodsi4_zext_1"
10928 [(set (match_operand:DI 0 "register_operand" "=a")
10929 (zero_extend:DI
10930 (udiv:SI (match_operand:SI 2 "register_operand" "0")
10931 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
10932 (set (match_operand:SI 1 "register_operand" "=&d")
10933 (umod:SI (match_dup 2) (match_dup 3)))
10934 (clobber (reg:CC FLAGS_REG))]
10935 "TARGET_64BIT"
10936 "#"
10937 "&& reload_completed"
10938 [(set (match_dup 1) (const_int 0))
10939 (parallel [(set (match_dup 0)
10940 (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
10941 (set (match_dup 1)
10942 (umod:SI (match_dup 2) (match_dup 3)))
10943 (use (match_dup 1))
10944 (clobber (reg:CC FLAGS_REG))])]
10945 ""
10946 [(set_attr "type" "multi")
10947 (set_attr "mode" "SI")])
10948
10949 (define_insn_and_split "*udivmodsi4_pow2_zext_1"
10950 [(set (match_operand:DI 0 "register_operand" "=r")
10951 (zero_extend:DI
10952 (udiv:SI (match_operand:SI 2 "register_operand" "0")
10953 (match_operand:SI 3 "const_int_operand"))))
10954 (set (match_operand:SI 1 "register_operand" "=r")
10955 (umod:SI (match_dup 2) (match_dup 3)))
10956 (clobber (reg:CC FLAGS_REG))]
10957 "TARGET_64BIT
10958 && IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
10959 "#"
10960 "&& reload_completed"
10961 [(set (match_dup 1) (match_dup 2))
10962 (parallel [(set (match_dup 0)
10963 (zero_extend:DI (lshiftrt:SI (match_dup 2) (match_dup 4))))
10964 (clobber (reg:CC FLAGS_REG))])
10965 (parallel [(set (match_dup 1) (and:SI (match_dup 1) (match_dup 5)))
10966 (clobber (reg:CC FLAGS_REG))])]
10967 {
10968 int v = exact_log2 (UINTVAL (operands[3]));
10969 operands[4] = GEN_INT (v);
10970 operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
10971 }
10972 [(set_attr "type" "multi")
10973 (set_attr "mode" "SI")])
10974
10975 (define_insn_and_split "*divmodsi4_zext_2"
10976 [(set (match_operand:DI 1 "register_operand" "=&d")
10977 (zero_extend:DI
10978 (mod:SI (match_operand:SI 2 "register_operand" "0")
10979 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
10980 (set (match_operand:SI 0 "register_operand" "=a")
10981 (div:SI (match_dup 2) (match_dup 3)))
10982 (clobber (reg:CC FLAGS_REG))]
10983 "TARGET_64BIT"
10984 "#"
10985 "&& reload_completed"
10986 [(parallel [(set (match_dup 6)
10987 (ashiftrt:SI (match_dup 4) (match_dup 5)))
10988 (clobber (reg:CC FLAGS_REG))])
10989 (parallel [(set (match_dup 1)
10990 (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
10991 (set (match_dup 0)
10992 (div:SI (match_dup 2) (match_dup 3)))
10993 (use (match_dup 6))
10994 (clobber (reg:CC FLAGS_REG))])]
10995 {
10996 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
10997 operands[6] = gen_lowpart (SImode, operands[1]);
10998
10999 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
11000 operands[4] = operands[2];
11001 else
11002 {
11003 /* Avoid use of cltd in favor of a mov+shift. */
11004 emit_move_insn (operands[6], operands[2]);
11005 operands[4] = operands[6];
11006 }
11007 }
11008 [(set_attr "type" "multi")
11009 (set_attr "mode" "SI")])
11010
11011 (define_insn_and_split "*udivmodsi4_zext_2"
11012 [(set (match_operand:DI 1 "register_operand" "=&d")
11013 (zero_extend:DI
11014 (umod:SI (match_operand:SI 2 "register_operand" "0")
11015 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11016 (set (match_operand:SI 0 "register_operand" "=a")
11017 (udiv:SI (match_dup 2) (match_dup 3)))
11018 (clobber (reg:CC FLAGS_REG))]
11019 "TARGET_64BIT"
11020 "#"
11021 "&& reload_completed"
11022 [(set (match_dup 4) (const_int 0))
11023 (parallel [(set (match_dup 1)
11024 (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
11025 (set (match_dup 0)
11026 (udiv:SI (match_dup 2) (match_dup 3)))
11027 (use (match_dup 4))
11028 (clobber (reg:CC FLAGS_REG))])]
11029 "operands[4] = gen_lowpart (SImode, operands[1]);"
11030 [(set_attr "type" "multi")
11031 (set_attr "mode" "SI")])
11032
11033 (define_insn_and_split "*udivmodsi4_pow2_zext_2"
11034 [(set (match_operand:DI 1 "register_operand" "=r")
11035 (zero_extend:DI
11036 (umod:SI (match_operand:SI 2 "register_operand" "0")
11037 (match_operand:SI 3 "const_int_operand"))))
11038 (set (match_operand:SI 0 "register_operand" "=r")
11039 (udiv:SI (match_dup 2) (match_dup 3)))
11040 (clobber (reg:CC FLAGS_REG))]
11041 "TARGET_64BIT
11042 && IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
11043 "#"
11044 "&& reload_completed"
11045 [(set (match_dup 1) (match_dup 2))
11046 (parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 2) (match_dup 4)))
11047 (clobber (reg:CC FLAGS_REG))])
11048 (parallel [(set (match_dup 1)
11049 (zero_extend:DI (and:SI (match_dup 1) (match_dup 5))))
11050 (clobber (reg:CC FLAGS_REG))])]
11051 {
11052 int v = exact_log2 (UINTVAL (operands[3]));
11053 operands[4] = GEN_INT (v);
11054 operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
11055 }
11056 [(set_attr "type" "multi")
11057 (set_attr "mode" "SI")])
11058
11059 (define_insn "*<u>divmod<mode>4_noext"
11060 [(set (match_operand:SWIM248 0 "register_operand" "=a")
11061 (any_div:SWIM248
11062 (match_operand:SWIM248 2 "register_operand" "0")
11063 (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
11064 (set (match_operand:SWIM248 1 "register_operand" "=d")
11065 (<paired_mod>:SWIM248 (match_dup 2) (match_dup 3)))
11066 (use (match_operand:SWIM248 4 "register_operand" "1"))
11067 (clobber (reg:CC FLAGS_REG))]
11068 ""
11069 "<sgnprefix>div{<imodesuffix>}\t%3"
11070 [(set_attr "type" "idiv")
11071 (set_attr "mode" "<MODE>")])
11072
11073 (define_insn "*<u>divmodsi4_noext_zext_1"
11074 [(set (match_operand:DI 0 "register_operand" "=a")
11075 (zero_extend:DI
11076 (any_div:SI (match_operand:SI 2 "register_operand" "0")
11077 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11078 (set (match_operand:SI 1 "register_operand" "=d")
11079 (<paired_mod>:SI (match_dup 2) (match_dup 3)))
11080 (use (match_operand:SI 4 "register_operand" "1"))
11081 (clobber (reg:CC FLAGS_REG))]
11082 "TARGET_64BIT"
11083 "<sgnprefix>div{l}\t%3"
11084 [(set_attr "type" "idiv")
11085 (set_attr "mode" "SI")])
11086
11087 (define_insn "*<u>divmodsi4_noext_zext_2"
11088 [(set (match_operand:DI 1 "register_operand" "=d")
11089 (zero_extend:DI
11090 (<paired_mod>:SI (match_operand:SI 2 "register_operand" "0")
11091 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11092 (set (match_operand:SI 0 "register_operand" "=a")
11093 (any_div:SI (match_dup 2) (match_dup 3)))
11094 (use (match_operand:SI 4 "register_operand" "1"))
11095 (clobber (reg:CC FLAGS_REG))]
11096 "TARGET_64BIT"
11097 "<sgnprefix>div{l}\t%3"
11098 [(set_attr "type" "idiv")
11099 (set_attr "mode" "SI")])
11100
11101 ;; Avoid sign-extension (using cdq) for constant numerators.
11102 (define_insn_and_split "*divmodsi4_const"
11103 [(set (match_operand:SI 0 "register_operand" "=&a")
11104 (div:SI (match_operand:SI 2 "const_int_operand")
11105 (match_operand:SI 3 "nonimmediate_operand" "rm")))
11106 (set (match_operand:SI 1 "register_operand" "=&d")
11107 (mod:SI (match_dup 2) (match_dup 3)))
11108 (clobber (reg:CC FLAGS_REG))]
11109 "!optimize_function_for_size_p (cfun)"
11110 "#"
11111 "&& reload_completed"
11112 [(set (match_dup 0) (match_dup 2))
11113 (set (match_dup 1) (match_dup 4))
11114 (parallel [(set (match_dup 0)
11115 (div:SI (match_dup 0) (match_dup 3)))
11116 (set (match_dup 1)
11117 (mod:SI (match_dup 0) (match_dup 3)))
11118 (use (match_dup 1))
11119 (clobber (reg:CC FLAGS_REG))])]
11120 {
11121 operands[4] = INTVAL (operands[2]) < 0 ? constm1_rtx : const0_rtx;
11122 }
11123 [(set_attr "type" "multi")
11124 (set_attr "mode" "SI")])
11125
11126 (define_expand "divmodqi4"
11127 [(parallel [(set (match_operand:QI 0 "register_operand")
11128 (div:QI
11129 (match_operand:QI 1 "register_operand")
11130 (match_operand:QI 2 "nonimmediate_operand")))
11131 (set (match_operand:QI 3 "register_operand")
11132 (mod:QI (match_dup 1) (match_dup 2)))
11133 (clobber (reg:CC FLAGS_REG))])]
11134 "TARGET_QIMODE_MATH"
11135 {
11136 rtx div, mod;
11137 rtx tmp0, tmp1;
11138
11139 tmp0 = gen_reg_rtx (HImode);
11140 tmp1 = gen_reg_rtx (HImode);
11141
11142 /* Extend operands[1] to HImode. Generate 8bit divide. Result is in AX. */
11143 emit_insn (gen_extendqihi2 (tmp1, operands[1]));
11144 emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2]));
11145
11146 /* Extract remainder from AH. */
11147 tmp1 = gen_rtx_ZERO_EXTRACT (HImode, tmp0, GEN_INT (8), GEN_INT (8));
11148 tmp1 = lowpart_subreg (QImode, tmp1, HImode);
11149 rtx_insn *insn = emit_move_insn (operands[3], tmp1);
11150
11151 mod = gen_rtx_MOD (QImode, operands[1], operands[2]);
11152 set_unique_reg_note (insn, REG_EQUAL, mod);
11153
11154 /* Extract quotient from AL. */
11155 insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
11156
11157 div = gen_rtx_DIV (QImode, operands[1], operands[2]);
11158 set_unique_reg_note (insn, REG_EQUAL, div);
11159
11160 DONE;
11161 })
11162
11163 (define_expand "udivmodqi4"
11164 [(parallel [(set (match_operand:QI 0 "register_operand")
11165 (udiv:QI
11166 (match_operand:QI 1 "register_operand")
11167 (match_operand:QI 2 "nonimmediate_operand")))
11168 (set (match_operand:QI 3 "register_operand")
11169 (umod:QI (match_dup 1) (match_dup 2)))
11170 (clobber (reg:CC FLAGS_REG))])]
11171 "TARGET_QIMODE_MATH"
11172 {
11173 rtx div, mod;
11174 rtx tmp0, tmp1;
11175
11176 tmp0 = gen_reg_rtx (HImode);
11177 tmp1 = gen_reg_rtx (HImode);
11178
11179 /* Extend operands[1] to HImode. Generate 8bit divide. Result is in AX. */
11180 emit_insn (gen_zero_extendqihi2 (tmp1, operands[1]));
11181 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, operands[2]));
11182
11183 /* Extract remainder from AH. */
11184 tmp1 = gen_rtx_ZERO_EXTRACT (HImode, tmp0, GEN_INT (8), GEN_INT (8));
11185 tmp1 = lowpart_subreg (QImode, tmp1, HImode);
11186 rtx_insn *insn = emit_move_insn (operands[3], tmp1);
11187
11188 mod = gen_rtx_UMOD (QImode, operands[1], operands[2]);
11189 set_unique_reg_note (insn, REG_EQUAL, mod);
11190
11191 /* Extract quotient from AL. */
11192 insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
11193
11194 div = gen_rtx_UDIV (QImode, operands[1], operands[2]);
11195 set_unique_reg_note (insn, REG_EQUAL, div);
11196
11197 DONE;
11198 })
11199
11200 ;; Divide AX by r/m8, with result stored in
11201 ;; AL <- Quotient
11202 ;; AH <- Remainder
11203 ;; Change div/mod to HImode and extend the second argument to HImode
11204 ;; so that mode of div/mod matches with mode of arguments. Otherwise
11205 ;; combine may fail.
11206 (define_insn "<u>divmodhiqi3"
11207 [(set (match_operand:HI 0 "register_operand" "=a")
11208 (ior:HI
11209 (ashift:HI
11210 (zero_extend:HI
11211 (truncate:QI
11212 (mod:HI (match_operand:HI 1 "register_operand" "0")
11213 (any_extend:HI
11214 (match_operand:QI 2 "nonimmediate_operand" "qm")))))
11215 (const_int 8))
11216 (zero_extend:HI
11217 (truncate:QI
11218 (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))))))
11219 (clobber (reg:CC FLAGS_REG))]
11220 "TARGET_QIMODE_MATH"
11221 "<sgnprefix>div{b}\t%2"
11222 [(set_attr "type" "idiv")
11223 (set_attr "mode" "QI")])
11224
11225 ;; We cannot use div/idiv for double division, because it causes
11226 ;; "division by zero" on the overflow and that's not what we expect
11227 ;; from truncate. Because true (non truncating) double division is
11228 ;; never generated, we can't create this insn anyway.
11229 ;
11230 ;(define_insn ""
11231 ; [(set (match_operand:SI 0 "register_operand" "=a")
11232 ; (truncate:SI
11233 ; (udiv:DI (match_operand:DI 1 "register_operand" "A")
11234 ; (zero_extend:DI
11235 ; (match_operand:SI 2 "nonimmediate_operand" "rm")))))
11236 ; (set (match_operand:SI 3 "register_operand" "=d")
11237 ; (truncate:SI
11238 ; (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2)))))
11239 ; (clobber (reg:CC FLAGS_REG))]
11240 ; ""
11241 ; "div{l}\t{%2, %0|%0, %2}"
11242 ; [(set_attr "type" "idiv")])
11243 \f
11244 ;;- Logical AND instructions
11245
11246 ;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al.
11247 ;; Note that this excludes ah.
11248
11249 (define_expand "@test<mode>_ccno_1"
11250 [(set (reg:CCNO FLAGS_REG)
11251 (compare:CCNO
11252 (and:SWI48
11253 (match_operand:SWI48 0 "nonimmediate_operand")
11254 (match_operand:SWI48 1 "<nonmemory_szext_operand>"))
11255 (const_int 0)))])
11256
11257 (define_expand "testqi_ccz_1"
11258 [(set (reg:CCZ FLAGS_REG)
11259 (compare:CCZ
11260 (and:QI
11261 (match_operand:QI 0 "nonimmediate_operand")
11262 (match_operand:QI 1 "nonmemory_operand"))
11263 (const_int 0)))])
11264
11265 (define_insn "*testdi_1"
11266 [(set (reg FLAGS_REG)
11267 (compare
11268 (and:DI
11269 (match_operand:DI 0 "nonimmediate_operand" "%r,rm")
11270 (match_operand:DI 1 "x86_64_szext_nonmemory_operand" "Z,re"))
11271 (const_int 0)))]
11272 "TARGET_64BIT
11273 && ix86_match_ccmode
11274 (insn,
11275 /* If we are going to emit testl instead of testq, and the operands[1]
11276 constant might have the SImode sign bit set, make sure the sign
11277 flag isn't tested, because the instruction will set the sign flag
11278 based on bit 31 rather than bit 63. If it isn't CONST_INT,
11279 conservatively assume it might have bit 31 set. */
11280 (satisfies_constraint_Z (operands[1])
11281 && (!CONST_INT_P (operands[1])
11282 || val_signbit_known_set_p (SImode, INTVAL (operands[1]))))
11283 ? CCZmode : CCNOmode)"
11284 "@
11285 test{l}\t{%k1, %k0|%k0, %k1}
11286 test{q}\t{%1, %0|%0, %1}"
11287 [(set_attr "type" "test")
11288 (set_attr "mode" "SI,DI")])
11289
11290 (define_insn "*testqi_1_maybe_si"
11291 [(set (reg FLAGS_REG)
11292 (compare
11293 (and:QI
11294 (match_operand:QI 0 "nonimmediate_operand" "%qm,qm,r")
11295 (match_operand:QI 1 "nonmemory_operand" "q,n,n"))
11296 (const_int 0)))]
11297 "ix86_match_ccmode (insn,
11298 CONST_INT_P (operands[1])
11299 && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)"
11300 {
11301 if (get_attr_mode (insn) == MODE_SI)
11302 {
11303 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0)
11304 operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
11305 return "test{l}\t{%1, %k0|%k0, %1}";
11306 }
11307 return "test{b}\t{%1, %0|%0, %1}";
11308 }
11309 [(set_attr "type" "test")
11310 (set (attr "mode")
11311 (cond [(eq_attr "alternative" "2")
11312 (const_string "SI")
11313 (and (match_test "optimize_insn_for_size_p ()")
11314 (and (match_operand 0 "ext_QIreg_operand")
11315 (match_operand 1 "const_0_to_127_operand")))
11316 (const_string "SI")
11317 ]
11318 (const_string "QI")))
11319 (set_attr "pent_pair" "uv,np,np")])
11320
11321 (define_insn "*test<mode>_1"
11322 [(set (reg FLAGS_REG)
11323 (compare
11324 (and:SWI124
11325 (match_operand:SWI124 0 "nonimmediate_operand" "%<r>m,*a,<r>m")
11326 (match_operand:SWI124 1 "<nonmemory_szext_operand>" "<r>,<i>,<i>"))
11327 (const_int 0)))]
11328 "ix86_match_ccmode (insn, CCNOmode)"
11329 "test{<imodesuffix>}\t{%1, %0|%0, %1}"
11330 [(set_attr "type" "test")
11331 (set_attr "mode" "<MODE>")
11332 (set_attr "pent_pair" "uv,uv,np")])
11333
11334 (define_expand "testqi_ext_1_ccno"
11335 [(set (reg:CCNO FLAGS_REG)
11336 (compare:CCNO
11337 (and:QI
11338 (subreg:QI
11339 (zero_extract:HI
11340 (match_operand:HI 0 "register_operand")
11341 (const_int 8)
11342 (const_int 8)) 0)
11343 (match_operand:QI 1 "const_int_operand"))
11344 (const_int 0)))])
11345
11346 (define_insn "*testqi_ext<mode>_1"
11347 [(set (reg FLAGS_REG)
11348 (compare
11349 (and:QI
11350 (subreg:QI
11351 (match_operator:SWI248 2 "extract_operator"
11352 [(match_operand 0 "int248_register_operand" "Q")
11353 (const_int 8)
11354 (const_int 8)]) 0)
11355 (match_operand:QI 1 "general_operand" "QnBn"))
11356 (const_int 0)))]
11357 "ix86_match_ccmode (insn, CCNOmode)"
11358 "test{b}\t{%1, %h0|%h0, %1}"
11359 [(set_attr "addr" "gpr8")
11360 (set_attr "type" "test")
11361 (set_attr "mode" "QI")])
11362
11363 (define_insn "*testqi_ext<mode>_2"
11364 [(set (reg FLAGS_REG)
11365 (compare
11366 (and:QI
11367 (subreg:QI
11368 (match_operator:SWI248 2 "extract_operator"
11369 [(match_operand 0 "int248_register_operand" "Q")
11370 (const_int 8)
11371 (const_int 8)]) 0)
11372 (subreg:QI
11373 (match_operator:SWI248 3 "extract_operator"
11374 [(match_operand 1 "int248_register_operand" "Q")
11375 (const_int 8)
11376 (const_int 8)]) 0))
11377 (const_int 0)))]
11378 "ix86_match_ccmode (insn, CCNOmode)"
11379 "test{b}\t{%h1, %h0|%h0, %h1}"
11380 [(set_attr "type" "test")
11381 (set_attr "mode" "QI")])
11382
11383 ;; Provide a *testti instruction that STV can implement using ptest.
11384 ;; This pattern splits into *andti3_doubleword and *cmpti_doubleword.
11385 (define_insn_and_split "*testti_doubleword"
11386 [(set (reg:CCZ FLAGS_REG)
11387 (compare:CCZ
11388 (and:TI (match_operand:TI 0 "register_operand")
11389 (match_operand:TI 1 "general_operand"))
11390 (const_int 0)))]
11391 "TARGET_64BIT
11392 && ix86_pre_reload_split ()"
11393 "#"
11394 "&& 1"
11395 [(parallel [(set (match_dup 2) (and:TI (match_dup 0) (match_dup 1)))
11396 (clobber (reg:CC FLAGS_REG))])
11397 (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 2) (const_int 0)))]
11398 {
11399 operands[2] = gen_reg_rtx (TImode);
11400 if (!x86_64_hilo_general_operand (operands[1], TImode))
11401 operands[1] = force_reg (TImode, operands[1]);
11402 })
11403
11404 ;; Combine likes to form bit extractions for some tests. Humor it.
11405 (define_insn_and_split "*testqi_ext_3"
11406 [(set (match_operand 0 "flags_reg_operand")
11407 (match_operator 1 "compare_operator"
11408 [(zero_extract:SWI248
11409 (match_operand 2 "int_nonimmediate_operand" "rm")
11410 (match_operand:QI 3 "const_int_operand")
11411 (match_operand:QI 4 "const_int_operand"))
11412 (const_int 0)]))]
11413 "/* Ensure that resulting mask is zero or sign extended operand. */
11414 INTVAL (operands[4]) >= 0
11415 && ((INTVAL (operands[3]) > 0
11416 && INTVAL (operands[3]) + INTVAL (operands[4]) <= 32)
11417 || (<MODE>mode == DImode
11418 && INTVAL (operands[3]) > 32
11419 && INTVAL (operands[3]) + INTVAL (operands[4]) == 64))
11420 && ix86_match_ccmode (insn,
11421 /* If zero_extract mode precision is the same
11422 as len, the SF of the zero_extract
11423 comparison will be the most significant
11424 extracted bit, but this could be matched
11425 after splitting only for pos 0 len all bits
11426 trivial extractions. Require CCZmode. */
11427 (GET_MODE_PRECISION (<MODE>mode)
11428 == INTVAL (operands[3]))
11429 /* Otherwise, require CCZmode if we'd use a mask
11430 with the most significant bit set and can't
11431 widen it to wider mode. *testdi_1 also
11432 requires CCZmode if the mask has bit
11433 31 set and all bits above it clear. */
11434 || (INTVAL (operands[3]) + INTVAL (operands[4])
11435 >= 32)
11436 /* We can't widen also if val is not a REG. */
11437 || (INTVAL (operands[3]) + INTVAL (operands[4])
11438 == GET_MODE_PRECISION (GET_MODE (operands[2]))
11439 && !register_operand (operands[2],
11440 GET_MODE (operands[2])))
11441 /* And we shouldn't widen if
11442 TARGET_PARTIAL_REG_STALL. */
11443 || (TARGET_PARTIAL_REG_STALL
11444 && (INTVAL (operands[3]) + INTVAL (operands[4])
11445 >= (paradoxical_subreg_p (operands[2])
11446 && (GET_MODE_CLASS
11447 (GET_MODE (SUBREG_REG (operands[2])))
11448 == MODE_INT)
11449 ? GET_MODE_PRECISION
11450 (GET_MODE (SUBREG_REG (operands[2])))
11451 : GET_MODE_PRECISION
11452 (GET_MODE (operands[2])))))
11453 ? CCZmode : CCNOmode)"
11454 "#"
11455 "&& 1"
11456 [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))]
11457 {
11458 rtx val = operands[2];
11459 HOST_WIDE_INT len = INTVAL (operands[3]);
11460 HOST_WIDE_INT pos = INTVAL (operands[4]);
11461 machine_mode mode = GET_MODE (val);
11462
11463 if (SUBREG_P (val))
11464 {
11465 machine_mode submode = GET_MODE (SUBREG_REG (val));
11466
11467 /* Narrow paradoxical subregs to prevent partial register stalls. */
11468 if (GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode)
11469 && GET_MODE_CLASS (submode) == MODE_INT
11470 && (GET_MODE (operands[0]) == CCZmode
11471 || pos + len < GET_MODE_PRECISION (submode)
11472 || REG_P (SUBREG_REG (val))))
11473 {
11474 val = SUBREG_REG (val);
11475 mode = submode;
11476 }
11477 }
11478
11479 /* Small HImode tests can be converted to QImode. */
11480 if (pos + len <= 8
11481 && register_operand (val, HImode))
11482 {
11483 rtx nval = gen_lowpart (QImode, val);
11484 if (!MEM_P (nval)
11485 || GET_MODE (operands[0]) == CCZmode
11486 || pos + len < 8)
11487 {
11488 val = nval;
11489 mode = QImode;
11490 }
11491 }
11492
11493 gcc_assert (pos + len <= GET_MODE_PRECISION (mode));
11494
11495 /* If the mask is going to have the sign bit set in the mode
11496 we want to do the comparison in and user isn't interested just
11497 in the zero flag, then we must widen the target mode. */
11498 if (pos + len == GET_MODE_PRECISION (mode)
11499 && GET_MODE (operands[0]) != CCZmode)
11500 {
11501 gcc_assert (pos + len < 32 && !MEM_P (val));
11502 mode = SImode;
11503 val = gen_lowpart (mode, val);
11504 }
11505
11506 wide_int mask
11507 = wi::shifted_mask (pos, len, false, GET_MODE_PRECISION (mode));
11508
11509 operands[2] = gen_rtx_AND (mode, val, immed_wide_int_const (mask, mode));
11510 })
11511
11512 ;; Split and;cmp (as optimized by combine) into not;test
11513 ;; Except when TARGET_BMI provides andn (*andn_<mode>_ccno).
11514 (define_insn_and_split "*test<mode>_not"
11515 [(set (reg:CCZ FLAGS_REG)
11516 (compare:CCZ
11517 (and:SWI
11518 (not:SWI (match_operand:SWI 0 "register_operand"))
11519 (match_operand:SWI 1 "<nonmemory_szext_operand>"))
11520 (const_int 0)))]
11521 "ix86_pre_reload_split ()
11522 && (!TARGET_BMI || !REG_P (operands[1]))"
11523 "#"
11524 "&& 1"
11525 [(set (match_dup 2) (not:SWI (match_dup 0)))
11526 (set (reg:CCZ FLAGS_REG)
11527 (compare:CCZ (and:SWI (match_dup 2) (match_dup 1))
11528 (const_int 0)))]
11529 "operands[2] = gen_reg_rtx (<MODE>mode);")
11530
11531 ;; Split and;cmp (as optimized by combine) into andn;cmp $0
11532 (define_insn_and_split "*test<mode>_not_doubleword"
11533 [(set (reg:CCZ FLAGS_REG)
11534 (compare:CCZ
11535 (and:DWI
11536 (not:DWI (match_operand:DWI 0 "nonimmediate_operand"))
11537 (match_operand:DWI 1 "nonimmediate_operand"))
11538 (const_int 0)))]
11539 "ix86_pre_reload_split ()"
11540 "#"
11541 "&& 1"
11542 [(parallel
11543 [(set (match_dup 2) (and:DWI (not:DWI (match_dup 0)) (match_dup 1)))
11544 (clobber (reg:CC FLAGS_REG))])
11545 (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 2) (const_int 0)))]
11546 {
11547 operands[0] = force_reg (<MODE>mode, operands[0]);
11548 operands[2] = gen_reg_rtx (<MODE>mode);
11549 })
11550
11551 ;; Convert HImode/SImode test instructions with immediate to QImode ones.
11552 ;; i386 does not allow to encode test with 8bit sign extended immediate, so
11553 ;; this is relatively important trick.
11554 ;; Do the conversion only post-reload to avoid limiting of the register class
11555 ;; to QI regs.
11556 (define_split
11557 [(set (match_operand 0 "flags_reg_operand")
11558 (match_operator 1 "compare_operator"
11559 [(and (match_operand 2 "QIreg_operand")
11560 (match_operand 3 "const_int_operand"))
11561 (const_int 0)]))]
11562 "reload_completed
11563 && GET_MODE (operands[2]) != QImode
11564 && ((ix86_match_ccmode (insn, CCZmode)
11565 && !(INTVAL (operands[3]) & ~(255 << 8)))
11566 || (ix86_match_ccmode (insn, CCNOmode)
11567 && !(INTVAL (operands[3]) & ~(127 << 8))))"
11568 [(set (match_dup 0)
11569 (match_op_dup 1
11570 [(and:QI
11571 (subreg:QI
11572 (zero_extract:HI (match_dup 2)
11573 (const_int 8)
11574 (const_int 8)) 0)
11575 (match_dup 3))
11576 (const_int 0)]))]
11577 {
11578 operands[2] = gen_lowpart (HImode, operands[2]);
11579 operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, QImode);
11580 })
11581
11582 (define_split
11583 [(set (match_operand 0 "flags_reg_operand")
11584 (match_operator 1 "compare_operator"
11585 [(and (match_operand 2 "nonimmediate_operand")
11586 (match_operand 3 "const_int_operand"))
11587 (const_int 0)]))]
11588 "reload_completed
11589 && GET_MODE (operands[2]) != QImode
11590 && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2]))
11591 && ((ix86_match_ccmode (insn, CCZmode)
11592 && !(INTVAL (operands[3]) & ~255))
11593 || (ix86_match_ccmode (insn, CCNOmode)
11594 && !(INTVAL (operands[3]) & ~127)))"
11595 [(set (match_dup 0)
11596 (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
11597 (const_int 0)]))]
11598 {
11599 operands[2] = gen_lowpart (QImode, operands[2]);
11600 operands[3] = gen_int_mode (INTVAL (operands[3]), QImode);
11601 })
11602
11603 ;; Narrow test instructions with immediate operands that test
11604 ;; memory locations for zero. E.g. testl $0x00aa0000, mem can be
11605 ;; converted to testb $0xaa, mem+2. Reject volatile locations and
11606 ;; targets where reading (possibly unaligned) part of memory
11607 ;; location after a large write to the same address causes
11608 ;; store-to-load forwarding stall.
11609 (define_peephole2
11610 [(set (reg:CCZ FLAGS_REG)
11611 (compare:CCZ
11612 (and:SWI248 (match_operand:SWI248 0 "memory_operand")
11613 (match_operand 1 "const_int_operand"))
11614 (const_int 0)))]
11615 "!TARGET_PARTIAL_MEMORY_READ_STALL && !MEM_VOLATILE_P (operands[0])"
11616 [(set (reg:CCZ FLAGS_REG)
11617 (compare:CCZ (match_dup 2) (const_int 0)))]
11618 {
11619 unsigned HOST_WIDE_INT ival = UINTVAL (operands[1]);
11620 int first_nonzero_byte, bitsize;
11621 rtx new_addr, new_const;
11622 machine_mode new_mode;
11623
11624 if (ival == 0)
11625 FAIL;
11626
11627 /* Clear bits outside mode width. */
11628 ival &= GET_MODE_MASK (<MODE>mode);
11629
11630 first_nonzero_byte = ctz_hwi (ival) / BITS_PER_UNIT;
11631
11632 ival >>= first_nonzero_byte * BITS_PER_UNIT;
11633
11634 bitsize = sizeof (ival) * BITS_PER_UNIT - clz_hwi (ival);
11635
11636 if (bitsize <= GET_MODE_BITSIZE (QImode))
11637 new_mode = QImode;
11638 else if (bitsize <= GET_MODE_BITSIZE (HImode))
11639 new_mode = HImode;
11640 else if (bitsize <= GET_MODE_BITSIZE (SImode))
11641 new_mode = SImode;
11642 else
11643 new_mode = DImode;
11644
11645 if (GET_MODE_SIZE (new_mode) >= GET_MODE_SIZE (<MODE>mode))
11646 FAIL;
11647
11648 new_addr = adjust_address (operands[0], new_mode, first_nonzero_byte);
11649 new_const = gen_int_mode (ival, new_mode);
11650
11651 operands[2] = gen_rtx_AND (new_mode, new_addr, new_const);
11652 })
11653
11654 ;; %%% This used to optimize known byte-wide and operations to memory,
11655 ;; and sometimes to QImode registers. If this is considered useful,
11656 ;; it should be done with splitters.
11657
11658 (define_expand "and<mode>3"
11659 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
11660 (and:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
11661 (match_operand:SDWIM 2 "<general_szext_operand>")))]
11662 ""
11663 {
11664 machine_mode mode = <MODE>mode;
11665
11666 if (GET_MODE_SIZE (<MODE>mode) > UNITS_PER_WORD
11667 && !x86_64_hilo_general_operand (operands[2], <MODE>mode))
11668 operands[2] = force_reg (<MODE>mode, operands[2]);
11669
11670 if (GET_MODE_SIZE (<MODE>mode) <= UNITS_PER_WORD
11671 && const_int_operand (operands[2], <MODE>mode)
11672 && register_operand (operands[0], <MODE>mode)
11673 && !(TARGET_ZERO_EXTEND_WITH_AND
11674 && optimize_function_for_speed_p (cfun)))
11675 {
11676 unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
11677
11678 if (ival == GET_MODE_MASK (SImode))
11679 mode = SImode;
11680 else if (ival == GET_MODE_MASK (HImode))
11681 mode = HImode;
11682 else if (ival == GET_MODE_MASK (QImode))
11683 mode = QImode;
11684 }
11685
11686 if (mode != <MODE>mode)
11687 emit_insn (gen_extend_insn
11688 (operands[0], gen_lowpart (mode, operands[1]),
11689 <MODE>mode, mode, 1));
11690 else
11691 ix86_expand_binary_operator (AND, <MODE>mode, operands, TARGET_APX_NDD);
11692
11693 DONE;
11694 })
11695
11696 (define_insn_and_split "*and<dwi>3_doubleword"
11697 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
11698 (and:<DWI>
11699 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r")
11700 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o")))
11701 (clobber (reg:CC FLAGS_REG))]
11702 "ix86_binary_operator_ok (AND, <DWI>mode, operands, TARGET_APX_NDD)"
11703 "#"
11704 "&& reload_completed"
11705 [(const_int:DWIH 0)]
11706 {
11707 bool emit_insn_deleted_note_p = false;
11708
11709 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
11710
11711 if (operands[2] == const0_rtx)
11712 emit_move_insn (operands[0], const0_rtx);
11713 else if (operands[2] == constm1_rtx)
11714 {
11715 if (!rtx_equal_p (operands[0], operands[1]))
11716 emit_move_insn (operands[0], operands[1]);
11717 else
11718 emit_insn_deleted_note_p = true;
11719 }
11720 else
11721 ix86_expand_binary_operator (AND, <MODE>mode, &operands[0], TARGET_APX_NDD);
11722
11723 if (operands[5] == const0_rtx)
11724 emit_move_insn (operands[3], const0_rtx);
11725 else if (operands[5] == constm1_rtx)
11726 {
11727 if (!rtx_equal_p (operands[3], operands[4]))
11728 emit_move_insn (operands[3], operands[4]);
11729 else if (emit_insn_deleted_note_p)
11730 emit_note (NOTE_INSN_DELETED);
11731 }
11732 else
11733 ix86_expand_binary_operator (AND, <MODE>mode, &operands[3], TARGET_APX_NDD);
11734
11735 DONE;
11736 }
11737 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
11738
11739 (define_insn "*anddi_1"
11740 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r,r,r,?k")
11741 (and:DI
11742 (match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,rm,r,qm,k")
11743 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,Z,re,m,re,m,L,k")))
11744 (clobber (reg:CC FLAGS_REG))]
11745 "TARGET_64BIT
11746 && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)"
11747 "@
11748 and{l}\t{%k2, %k0|%k0, %k2}
11749 and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
11750 and{q}\t{%2, %0|%0, %2}
11751 and{q}\t{%2, %0|%0, %2}
11752 and{q}\t{%2, %1, %0|%0, %1, %2}
11753 and{q}\t{%2, %1, %0|%0, %1, %2}
11754 #
11755 #"
11756 [(set_attr "isa" "x64,apx_ndd,x64,x64,apx_ndd,apx_ndd,x64,avx512bw")
11757 (set_attr "type" "alu,alu,alu,alu,alu,alu,imovx,msklog")
11758 (set_attr "length_immediate" "*,*,*,*,*,*,0,*")
11759 (set (attr "prefix_rex")
11760 (if_then_else
11761 (and (eq_attr "type" "imovx")
11762 (and (match_test "INTVAL (operands[2]) == 0xff")
11763 (match_operand 1 "ext_QIreg_operand")))
11764 (const_string "1")
11765 (const_string "*")))
11766 (set_attr "mode" "SI,SI,DI,DI,DI,DI,SI,DI")])
11767
11768 (define_insn_and_split "*anddi_1_btr"
11769 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
11770 (and:DI
11771 (match_operand:DI 1 "nonimmediate_operand" "%0")
11772 (match_operand:DI 2 "const_int_operand" "n")))
11773 (clobber (reg:CC FLAGS_REG))]
11774 "TARGET_64BIT && TARGET_USE_BT
11775 && ix86_binary_operator_ok (AND, DImode, operands)
11776 && IN_RANGE (exact_log2 (~INTVAL (operands[2])), 31, 63)"
11777 "#"
11778 "&& reload_completed"
11779 [(parallel [(set (zero_extract:DI (match_dup 0)
11780 (const_int 1)
11781 (match_dup 3))
11782 (const_int 0))
11783 (clobber (reg:CC FLAGS_REG))])]
11784 "operands[3] = GEN_INT (exact_log2 (~INTVAL (operands[2])));"
11785 [(set_attr "type" "alu1")
11786 (set_attr "prefix_0f" "1")
11787 (set_attr "znver1_decode" "double")
11788 (set_attr "mode" "DI")])
11789
11790 ;; Turn *anddi_1 into *andsi_1_zext if possible.
11791 (define_split
11792 [(set (match_operand:DI 0 "register_operand")
11793 (and:DI (subreg:DI (match_operand:SI 1 "register_operand") 0)
11794 (match_operand:DI 2 "x86_64_zext_immediate_operand")))
11795 (clobber (reg:CC FLAGS_REG))]
11796 "TARGET_64BIT"
11797 [(parallel [(set (match_dup 0)
11798 (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))
11799 (clobber (reg:CC FLAGS_REG))])]
11800 {
11801 if (GET_CODE (operands[2]) == SYMBOL_REF
11802 || GET_CODE (operands[2]) == LABEL_REF)
11803 {
11804 operands[2] = shallow_copy_rtx (operands[2]);
11805 PUT_MODE (operands[2], SImode);
11806 }
11807 else if (GET_CODE (operands[2]) == CONST)
11808 {
11809 /* (const:DI (plus:DI (symbol_ref:DI ("...")) (const_int N))) */
11810 operands[2] = copy_rtx (operands[2]);
11811 PUT_MODE (operands[2], SImode);
11812 PUT_MODE (XEXP (operands[2], 0), SImode);
11813 PUT_MODE (XEXP (XEXP (operands[2], 0), 0), SImode);
11814 }
11815 else
11816 operands[2] = gen_lowpart (SImode, operands[2]);
11817 })
11818
11819 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
11820 (define_insn "*andsi_1_zext"
11821 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
11822 (zero_extend:DI
11823 (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
11824 (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))))
11825 (clobber (reg:CC FLAGS_REG))]
11826 "TARGET_64BIT
11827 && ix86_binary_operator_ok (AND, SImode, operands, TARGET_APX_NDD)"
11828 "@
11829 and{l}\t{%2, %k0|%k0, %2}
11830 and{l}\t{%2, %1, %k0|%k0, %1, %2}
11831 and{l}\t{%2, %1, %k0|%k0, %1, %2}"
11832 [(set_attr "type" "alu")
11833 (set_attr "isa" "*,apx_ndd,apx_ndd")
11834 (set_attr "mode" "SI")])
11835
11836 (define_insn "*and<mode>_1"
11837 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,r,r,Ya,?k")
11838 (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,rm,r,qm,k")
11839 (match_operand:SWI24 2 "<general_operand>" "r<i>,<m>,r<i>,<m>,L,k")))
11840 (clobber (reg:CC FLAGS_REG))]
11841 "ix86_binary_operator_ok (AND, <MODE>mode, operands, TARGET_APX_NDD)"
11842 "@
11843 and{<imodesuffix>}\t{%2, %0|%0, %2}
11844 and{<imodesuffix>}\t{%2, %0|%0, %2}
11845 and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
11846 and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
11847 #
11848 #"
11849 [(set (attr "isa")
11850 (cond [(eq_attr "alternative" "2,3")
11851 (const_string "apx_ndd")
11852 (eq_attr "alternative" "5")
11853 (if_then_else (eq_attr "mode" "SI")
11854 (const_string "avx512bw")
11855 (const_string "avx512f"))
11856 ]
11857 (const_string "*")))
11858 (set_attr "type" "alu,alu,alu,alu,imovx,msklog")
11859 (set_attr "length_immediate" "*,*,*,*,0,*")
11860 (set (attr "prefix_rex")
11861 (if_then_else
11862 (and (eq_attr "type" "imovx")
11863 (and (match_test "INTVAL (operands[2]) == 0xff")
11864 (match_operand 1 "ext_QIreg_operand")))
11865 (const_string "1")
11866 (const_string "*")))
11867 (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,SI,<MODE>")])
11868
11869 (define_insn "*andqi_1"
11870 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
11871 (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
11872 (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))
11873 (clobber (reg:CC FLAGS_REG))]
11874 "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD)"
11875 "@
11876 and{b}\t{%2, %0|%0, %2}
11877 and{b}\t{%2, %0|%0, %2}
11878 and{l}\t{%k2, %k0|%k0, %k2}
11879 and{b}\t{%2, %1, %0|%0, %1, %2}
11880 and{b}\t{%2, %1, %0|%0, %1, %2}
11881 #"
11882 [(set_attr "type" "alu,alu,alu,alu,alu,msklog")
11883 (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,*")
11884 (set (attr "mode")
11885 (cond [(eq_attr "alternative" "2")
11886 (const_string "SI")
11887 (and (eq_attr "alternative" "5")
11888 (match_test "!TARGET_AVX512DQ"))
11889 (const_string "HI")
11890 ]
11891 (const_string "QI")))
11892 ;; Potential partial reg stall on alternative 2.
11893 (set (attr "preferred_for_speed")
11894 (cond [(eq_attr "alternative" "2")
11895 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
11896 (symbol_ref "true")))])
11897
11898 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
11899 (define_insn_and_split "*<code><mode>_1_slp"
11900 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
11901 (any_logic:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
11902 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
11903 (clobber (reg:CC FLAGS_REG))]
11904 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
11905 "@
11906 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
11907 #"
11908 "&& reload_completed
11909 && !(rtx_equal_p (operands[0], operands[1])
11910 || rtx_equal_p (operands[0], operands[2]))"
11911 [(set (strict_low_part (match_dup 0)) (match_dup 1))
11912 (parallel
11913 [(set (strict_low_part (match_dup 0))
11914 (any_logic:SWI12 (match_dup 0) (match_dup 2)))
11915 (clobber (reg:CC FLAGS_REG))])]
11916 ""
11917 [(set_attr "type" "alu")
11918 (set_attr "mode" "<MODE>")])
11919
11920 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
11921 (define_insn_and_split "*<code>qi_ext<mode>_1_slp"
11922 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q"))
11923 (any_logic:QI
11924 (subreg:QI
11925 (match_operator:SWI248 3 "extract_operator"
11926 [(match_operand 2 "int248_register_operand" "Q,Q")
11927 (const_int 8)
11928 (const_int 8)]) 0)
11929 (match_operand:QI 1 "nonimmediate_operand" "0,!qm")))
11930 (clobber (reg:CC FLAGS_REG))]
11931 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
11932 "@
11933 <logic>{b}\t{%h2, %0|%0, %h2}
11934 #"
11935 "&& reload_completed
11936 && !rtx_equal_p (operands[0], operands[1])"
11937 [(set (strict_low_part (match_dup 0)) (match_dup 1))
11938 (parallel
11939 [(set (strict_low_part (match_dup 0))
11940 (any_logic:QI
11941 (subreg:QI
11942 (match_op_dup 3
11943 [(match_dup 2) (const_int 8) (const_int 8)]) 0)
11944 (match_dup 0)))
11945 (clobber (reg:CC FLAGS_REG))])]
11946 ""
11947 [(set_attr "type" "alu")
11948 (set_attr "mode" "QI")])
11949
11950 (define_insn_and_split "*<code>qi_ext<mode>_2_slp"
11951 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q"))
11952 (any_logic:QI
11953 (subreg:QI
11954 (match_operator:SWI248 3 "extract_operator"
11955 [(match_operand 1 "int248_register_operand" "Q")
11956 (const_int 8)
11957 (const_int 8)]) 0)
11958 (subreg:QI
11959 (match_operator:SWI248 4 "extract_operator"
11960 [(match_operand 2 "int248_register_operand" "Q")
11961 (const_int 8)
11962 (const_int 8)]) 0)))
11963 (clobber (reg:CC FLAGS_REG))]
11964 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
11965 "#"
11966 "&& reload_completed"
11967 [(set (strict_low_part (match_dup 0))
11968 (subreg:QI
11969 (match_op_dup 4
11970 [(match_dup 2) (const_int 8) (const_int 8)]) 0))
11971 (parallel
11972 [(set (strict_low_part (match_dup 0))
11973 (any_logic:QI
11974 (subreg:QI
11975 (match_op_dup 3
11976 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
11977 (match_dup 0)))
11978 (clobber (reg:CC FLAGS_REG))])]
11979 ""
11980 [(set_attr "type" "alu")
11981 (set_attr "mode" "QI")])
11982
11983 (define_split
11984 [(set (match_operand:SWI248 0 "register_operand")
11985 (and:SWI248 (match_operand:SWI248 1 "nonimmediate_operand")
11986 (match_operand:SWI248 2 "const_int_operand")))
11987 (clobber (reg:CC FLAGS_REG))]
11988 "reload_completed
11989 && (!REG_P (operands[1])
11990 || REGNO (operands[0]) != REGNO (operands[1]))
11991 && (UINTVAL (operands[2]) == GET_MODE_MASK (SImode)
11992 || UINTVAL (operands[2]) == GET_MODE_MASK (HImode)
11993 || UINTVAL (operands[2]) == GET_MODE_MASK (QImode))"
11994 [(const_int 0)]
11995 {
11996 unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
11997 machine_mode mode;
11998
11999 if (ival == GET_MODE_MASK (SImode))
12000 mode = SImode;
12001 else if (ival == GET_MODE_MASK (HImode))
12002 mode = HImode;
12003 else if (ival == GET_MODE_MASK (QImode))
12004 mode = QImode;
12005 else
12006 gcc_unreachable ();
12007
12008 /* Zero extend to SImode to avoid partial register stalls. */
12009 if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
12010 operands[0] = gen_lowpart (SImode, operands[0]);
12011
12012 emit_insn (gen_extend_insn
12013 (operands[0], gen_lowpart (mode, operands[1]),
12014 GET_MODE (operands[0]), mode, 1));
12015 DONE;
12016 })
12017
12018 (define_split
12019 [(set (match_operand:SWI48 0 "register_operand")
12020 (and:SWI48 (match_dup 0)
12021 (const_int -65536)))
12022 (clobber (reg:CC FLAGS_REG))]
12023 "(TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)
12024 || optimize_function_for_size_p (cfun)"
12025 [(set (strict_low_part (match_dup 1)) (const_int 0))]
12026 "operands[1] = gen_lowpart (HImode, operands[0]);")
12027
12028 (define_split
12029 [(set (match_operand:SWI248 0 "any_QIreg_operand")
12030 (and:SWI248 (match_dup 0)
12031 (const_int -256)))
12032 (clobber (reg:CC FLAGS_REG))]
12033 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
12034 && reload_completed"
12035 [(set (strict_low_part (match_dup 1)) (const_int 0))]
12036 "operands[1] = gen_lowpart (QImode, operands[0]);")
12037
12038 (define_split
12039 [(set (match_operand:SWI248 0 "QIreg_operand")
12040 (and:SWI248 (match_dup 0)
12041 (const_int -65281)))
12042 (clobber (reg:CC FLAGS_REG))]
12043 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
12044 && reload_completed"
12045 [(parallel
12046 [(set (zero_extract:HI (match_dup 0)
12047 (const_int 8)
12048 (const_int 8))
12049 (subreg:HI
12050 (xor:QI
12051 (subreg:QI
12052 (zero_extract:HI (match_dup 0)
12053 (const_int 8)
12054 (const_int 8)) 0)
12055 (subreg:QI
12056 (zero_extract:HI (match_dup 0)
12057 (const_int 8)
12058 (const_int 8)) 0)) 0))
12059 (clobber (reg:CC FLAGS_REG))])]
12060 "operands[0] = gen_lowpart (HImode, operands[0]);")
12061
12062 (define_insn "*anddi_2"
12063 [(set (reg FLAGS_REG)
12064 (compare
12065 (and:DI
12066 (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,r,rm,r")
12067 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,Z,re,m"))
12068 (const_int 0)))
12069 (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,r,r")
12070 (and:DI (match_dup 1) (match_dup 2)))]
12071 "TARGET_64BIT
12072 && ix86_match_ccmode
12073 (insn,
12074 /* If we are going to emit andl instead of andq, and the operands[2]
12075 constant might have the SImode sign bit set, make sure the sign
12076 flag isn't tested, because the instruction will set the sign flag
12077 based on bit 31 rather than bit 63. If it isn't CONST_INT,
12078 conservatively assume it might have bit 31 set. */
12079 (satisfies_constraint_Z (operands[2])
12080 && (!CONST_INT_P (operands[2])
12081 || val_signbit_known_set_p (SImode, INTVAL (operands[2]))))
12082 ? CCZmode : CCNOmode)
12083 && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)"
12084 "@
12085 and{l}\t{%k2, %k0|%k0, %k2}
12086 and{q}\t{%2, %0|%0, %2}
12087 and{q}\t{%2, %0|%0, %2}
12088 and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
12089 and{q}\t{%2, %1, %0|%0, %1, %2}
12090 and{q}\t{%2, %1, %0|%0, %1, %2}"
12091 [(set_attr "type" "alu")
12092 (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd")
12093 (set_attr "mode" "SI,DI,DI,SI,DI,DI")])
12094
12095 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
12096 (define_insn "*andsi_2_zext"
12097 [(set (reg FLAGS_REG)
12098 (compare (and:SI
12099 (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
12100 (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))
12101 (const_int 0)))
12102 (set (match_operand:DI 0 "register_operand" "=r,r,r")
12103 (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
12104 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
12105 && ix86_binary_operator_ok (AND, SImode, operands, TARGET_APX_NDD)"
12106 "@
12107 and{l}\t{%2, %k0|%k0, %2}
12108 and{l}\t{%2, %1, %k0|%k0, %1, %2}
12109 and{l}\t{%2, %1, %k0|%k0, %1, %2}"
12110 [(set_attr "type" "alu")
12111 (set_attr "isa" "*,apx_ndd,apx_ndd")
12112 (set_attr "mode" "SI")])
12113
12114 (define_insn "*andqi_2_maybe_si"
12115 [(set (reg FLAGS_REG)
12116 (compare (and:QI
12117 (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r")
12118 (match_operand:QI 2 "general_operand" "qn,m,n,rn,m"))
12119 (const_int 0)))
12120 (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r")
12121 (and:QI (match_dup 1) (match_dup 2)))]
12122 "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD)
12123 && ix86_match_ccmode (insn,
12124 CONST_INT_P (operands[2])
12125 && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)"
12126 {
12127 if (get_attr_mode (insn) == MODE_SI)
12128 {
12129 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
12130 operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
12131 return "and{l}\t{%2, %k0|%k0, %2}";
12132 }
12133 if (which_alternative > 2)
12134 return "and{b}\t{%2, %1, %0|%0, %1, %2}";
12135 return "and{b}\t{%2, %0|%0, %2}";
12136 }
12137 [(set_attr "type" "alu")
12138 (set_attr "isa" "*,*,*,apx_ndd,apx_ndd")
12139 (set (attr "mode")
12140 (cond [(eq_attr "alternative" "3,4")
12141 (const_string "QI")
12142 (eq_attr "alternative" "2")
12143 (const_string "SI")
12144 (and (match_test "optimize_insn_for_size_p ()")
12145 (and (match_operand 0 "ext_QIreg_operand")
12146 (match_operand 2 "const_0_to_127_operand")))
12147 (const_string "SI")
12148 ]
12149 (const_string "QI")))
12150 ;; Potential partial reg stall on alternative 2.
12151 (set (attr "preferred_for_speed")
12152 (cond [(eq_attr "alternative" "2")
12153 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
12154 (symbol_ref "true")))])
12155
12156 (define_insn "*and<mode>_2"
12157 [(set (reg FLAGS_REG)
12158 (compare (and:SWI124
12159 (match_operand:SWI124 1 "nonimmediate_operand" "%0,0,rm,r")
12160 (match_operand:SWI124 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
12161 (const_int 0)))
12162 (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
12163 (and:SWI124 (match_dup 1) (match_dup 2)))]
12164 "ix86_match_ccmode (insn, CCNOmode)
12165 && ix86_binary_operator_ok (AND, <MODE>mode, operands, TARGET_APX_NDD)"
12166 "@
12167 and{<imodesuffix>}\t{%2, %0|%0, %2}
12168 and{<imodesuffix>}\t{%2, %0|%0, %2}
12169 and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
12170 and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
12171 [(set_attr "type" "alu")
12172 (set_attr "isa" "*,*,apx_ndd,apx_ndd")
12173 (set_attr "mode" "<MODE>")])
12174
12175 (define_insn "*<code>qi_ext<mode>_0"
12176 [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
12177 (any_logic:QI
12178 (subreg:QI
12179 (match_operator:SWI248 3 "extract_operator"
12180 [(match_operand 2 "int248_register_operand" "Q")
12181 (const_int 8)
12182 (const_int 8)]) 0)
12183 (match_operand:QI 1 "nonimmediate_operand" "0")))
12184 (clobber (reg:CC FLAGS_REG))]
12185 ""
12186 "<logic>{b}\t{%h2, %0|%0, %h2}"
12187 [(set_attr "addr" "gpr8")
12188 (set_attr "type" "alu")
12189 (set_attr "mode" "QI")])
12190
12191 (define_insn_and_split "*<code>qi_ext2<mode>_0"
12192 [(set (match_operand:QI 0 "register_operand" "=&Q")
12193 (any_logic:QI
12194 (subreg:QI
12195 (match_operator:SWI248 3 "extract_operator"
12196 [(match_operand 1 "int248_register_operand" "Q")
12197 (const_int 8)
12198 (const_int 8)]) 0)
12199 (subreg:QI
12200 (match_operator:SWI248 4 "extract_operator"
12201 [(match_operand 2 "int248_register_operand" "Q")
12202 (const_int 8)
12203 (const_int 8)]) 0)))
12204 (clobber (reg:CC FLAGS_REG))]
12205 ""
12206 "#"
12207 "&& reload_completed"
12208 [(set (match_dup 0)
12209 (subreg:QI
12210 (match_op_dup 4
12211 [(match_dup 2) (const_int 8) (const_int 8)]) 0))
12212 (parallel
12213 [(set (match_dup 0)
12214 (any_logic:QI
12215 (subreg:QI
12216 (match_op_dup 3
12217 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
12218 (match_dup 0)))
12219 (clobber (reg:CC FLAGS_REG))])]
12220 ""
12221 [(set_attr "type" "alu")
12222 (set_attr "mode" "QI")])
12223
12224 (define_expand "andqi_ext_1"
12225 [(parallel
12226 [(set (zero_extract:HI (match_operand:HI 0 "register_operand")
12227 (const_int 8)
12228 (const_int 8))
12229 (subreg:HI
12230 (and:QI
12231 (subreg:QI
12232 (zero_extract:HI (match_operand:HI 1 "register_operand")
12233 (const_int 8)
12234 (const_int 8)) 0)
12235 (match_operand:QI 2 "const_int_operand")) 0))
12236 (clobber (reg:CC FLAGS_REG))])])
12237
12238 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
12239 (define_insn_and_split "*<code>qi_ext<mode>_1"
12240 [(set (zero_extract:SWI248
12241 (match_operand 0 "int248_register_operand" "+Q,&Q")
12242 (const_int 8)
12243 (const_int 8))
12244 (subreg:SWI248
12245 (any_logic:QI
12246 (subreg:QI
12247 (match_operator:SWI248 3 "extract_operator"
12248 [(match_operand 1 "int248_register_operand" "0,!Q")
12249 (const_int 8)
12250 (const_int 8)]) 0)
12251 (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
12252 (clobber (reg:CC FLAGS_REG))]
12253 ""
12254 "@
12255 <logic>{b}\t{%2, %h0|%h0, %2}
12256 #"
12257 "reload_completed
12258 && !(rtx_equal_p (operands[0], operands[1]))"
12259 [(set (zero_extract:SWI248
12260 (match_dup 0) (const_int 8) (const_int 8))
12261 (zero_extract:SWI248
12262 (match_dup 1) (const_int 8) (const_int 8)))
12263 (parallel
12264 [(set (zero_extract:SWI248
12265 (match_dup 0) (const_int 8) (const_int 8))
12266 (subreg:SWI248
12267 (any_logic:QI
12268 (subreg:QI
12269 (match_op_dup 3
12270 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
12271 (match_dup 2)) 0))
12272 (clobber (reg:CC FLAGS_REG))])]
12273 ""
12274 [(set_attr "addr" "gpr8")
12275 (set_attr "type" "alu")
12276 (set_attr "mode" "QI")])
12277
12278 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
12279 (define_insn_and_split "*<code>qi_ext<mode>_1_cc"
12280 [(set (match_operand 4 "flags_reg_operand")
12281 (match_operator 5 "compare_operator"
12282 [(any_logic:QI
12283 (subreg:QI
12284 (match_operator:SWI248 3 "extract_operator"
12285 [(match_operand 1 "int248_register_operand" "0,!Q")
12286 (const_int 8)
12287 (const_int 8)]) 0)
12288 (match_operand:QI 2 "general_operand" "QnBn,QnBn"))
12289 (const_int 0)]))
12290 (set (zero_extract:SWI248
12291 (match_operand 0 "int248_register_operand" "+Q,&Q")
12292 (const_int 8)
12293 (const_int 8))
12294 (subreg:SWI248
12295 (any_logic:QI
12296 (subreg:QI
12297 (match_op_dup 3
12298 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
12299 (match_dup 2)) 0))]
12300 "ix86_match_ccmode (insn, CCNOmode)"
12301 "@
12302 <logic>{b}\t{%2, %h0|%h0, %2}
12303 #"
12304 "&& reload_completed
12305 && !(rtx_equal_p (operands[0], operands[1]))"
12306 [(set (zero_extract:SWI248
12307 (match_dup 0) (const_int 8) (const_int 8))
12308 (zero_extract:SWI248
12309 (match_dup 1) (const_int 8) (const_int 8)))
12310 (parallel
12311 [(set (match_dup 4)
12312 (match_op_dup 5
12313 [(any_logic:QI
12314 (subreg:QI
12315 (match_op_dup 3
12316 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
12317 (match_dup 2))
12318 (const_int 0)]))
12319 (set (zero_extract:SWI248
12320 (match_dup 0) (const_int 8) (const_int 8))
12321 (subreg:SWI248
12322 (any_logic:QI
12323 (subreg:QI
12324 (match_op_dup 3
12325 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
12326 (match_dup 2)) 0))])]
12327 ""
12328 [(set_attr "addr" "gpr8")
12329 (set_attr "type" "alu")
12330 (set_attr "mode" "QI")])
12331
12332 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
12333 (define_insn_and_split "*<code>qi_ext<mode>_2"
12334 [(set (zero_extract:SWI248
12335 (match_operand 0 "int248_register_operand" "+Q,&Q")
12336 (const_int 8)
12337 (const_int 8))
12338 (subreg:SWI248
12339 (any_logic:QI
12340 (subreg:QI
12341 (match_operator:SWI248 3 "extract_operator"
12342 [(match_operand 1 "int248_register_operand" "%0,!Q")
12343 (const_int 8)
12344 (const_int 8)]) 0)
12345 (subreg:QI
12346 (match_operator:SWI248 4 "extract_operator"
12347 [(match_operand 2 "int248_register_operand" "Q,Q")
12348 (const_int 8)
12349 (const_int 8)]) 0)) 0))
12350 (clobber (reg:CC FLAGS_REG))]
12351 ""
12352 "@
12353 <logic>{b}\t{%h2, %h0|%h0, %h2}
12354 #"
12355 "reload_completed
12356 && !(rtx_equal_p (operands[0], operands[1])
12357 || rtx_equal_p (operands[0], operands[2]))"
12358 [(set (zero_extract:SWI248
12359 (match_dup 0) (const_int 8) (const_int 8))
12360 (zero_extract:SWI248
12361 (match_dup 1) (const_int 8) (const_int 8)))
12362 (parallel
12363 [(set (zero_extract:SWI248
12364 (match_dup 0) (const_int 8) (const_int 8))
12365 (subreg:SWI248
12366 (any_logic:QI
12367 (subreg:QI
12368 (match_op_dup 3
12369 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
12370 (subreg:QI
12371 (match_op_dup 4
12372 [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0))
12373 (clobber (reg:CC FLAGS_REG))])]
12374 ""
12375 [(set_attr "type" "alu")
12376 (set_attr "mode" "QI")])
12377
12378 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
12379 (define_insn_and_split "*<code>qi_ext<mode>_3"
12380 [(set (zero_extract:SWI248
12381 (match_operand 0 "int248_register_operand" "+Q,&Q")
12382 (const_int 8)
12383 (const_int 8))
12384 (match_operator:SWI248 3 "extract_operator"
12385 [(any_logic
12386 (match_operand 1 "int248_register_operand" "%0,!Q")
12387 (match_operand 2 "int248_register_operand" "Q,Q"))
12388 (const_int 8)
12389 (const_int 8)]))
12390 (clobber (reg:CC FLAGS_REG))]
12391 "GET_MODE (operands[1]) == GET_MODE (operands[2])"
12392 "@
12393 <logic>{b}\t{%h2, %h0|%h0, %h2}
12394 #"
12395 "&& reload_completed
12396 && !(rtx_equal_p (operands[0], operands[1])
12397 || rtx_equal_p (operands[0], operands[2]))"
12398 [(set (zero_extract:SWI248
12399 (match_dup 0) (const_int 8) (const_int 8))
12400 (zero_extract:SWI248
12401 (match_dup 1) (const_int 8) (const_int 8)))
12402 (parallel
12403 [(set (zero_extract:SWI248
12404 (match_dup 0) (const_int 8) (const_int 8))
12405 (match_op_dup 3
12406 [(any_logic (match_dup 4) (match_dup 2))
12407 (const_int 8) (const_int 8)]))
12408 (clobber (reg:CC FLAGS_REG))])]
12409 "operands[4] = gen_lowpart (GET_MODE (operands[1]), operands[0]);"
12410 [(set_attr "type" "alu")
12411 (set_attr "mode" "QI")])
12412
12413 ;; Convert wide AND instructions with immediate operand to shorter QImode
12414 ;; equivalents when possible.
12415 ;; Don't do the splitting with memory operands, since it introduces risk
12416 ;; of memory mismatch stalls. We may want to do the splitting for optimizing
12417 ;; for size, but that can (should?) be handled by generic code instead.
12418 ;; Don't do the splitting for APX NDD as NDD does not support *h registers.
12419 (define_split
12420 [(set (match_operand:SWI248 0 "QIreg_operand")
12421 (and:SWI248 (match_operand:SWI248 1 "register_operand")
12422 (match_operand:SWI248 2 "const_int_operand")))
12423 (clobber (reg:CC FLAGS_REG))]
12424 "reload_completed
12425 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
12426 && !(~INTVAL (operands[2]) & ~(255 << 8))
12427 && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))"
12428 [(parallel
12429 [(set (zero_extract:HI (match_dup 0)
12430 (const_int 8)
12431 (const_int 8))
12432 (subreg:HI
12433 (and:QI
12434 (subreg:QI
12435 (zero_extract:HI (match_dup 1)
12436 (const_int 8)
12437 (const_int 8)) 0)
12438 (match_dup 2)) 0))
12439 (clobber (reg:CC FLAGS_REG))])]
12440 {
12441 operands[0] = gen_lowpart (HImode, operands[0]);
12442 operands[1] = gen_lowpart (HImode, operands[1]);
12443 operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode);
12444 })
12445
12446 ;; Since AND can be encoded with sign extended immediate, this is only
12447 ;; profitable when 7th bit is not set.
12448 (define_split
12449 [(set (match_operand:SWI248 0 "any_QIreg_operand")
12450 (and:SWI248 (match_operand:SWI248 1 "general_operand")
12451 (match_operand:SWI248 2 "const_int_operand")))
12452 (clobber (reg:CC FLAGS_REG))]
12453 "reload_completed
12454 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
12455 && !(~INTVAL (operands[2]) & ~255)
12456 && !(INTVAL (operands[2]) & 128)
12457 && !(TARGET_APX_NDD
12458 && !rtx_equal_p (operands[0], operands[1]))"
12459 [(parallel [(set (strict_low_part (match_dup 0))
12460 (and:QI (match_dup 1)
12461 (match_dup 2)))
12462 (clobber (reg:CC FLAGS_REG))])]
12463 {
12464 operands[0] = gen_lowpart (QImode, operands[0]);
12465 operands[1] = gen_lowpart (QImode, operands[1]);
12466 operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
12467 })
12468
12469 (define_insn_and_split "*andn<dwi>3_doubleword_bmi"
12470 [(set (match_operand:<DWI> 0 "register_operand" "=&r,r,r")
12471 (and:<DWI>
12472 (not:<DWI> (match_operand:<DWI> 1 "register_operand" "r,0,r"))
12473 (match_operand:<DWI> 2 "nonimmediate_operand" "ro,ro,0")))
12474 (clobber (reg:CC FLAGS_REG))]
12475 "TARGET_BMI"
12476 "#"
12477 "&& reload_completed"
12478 [(parallel [(set (match_dup 0)
12479 (and:DWIH (not:DWIH (match_dup 1)) (match_dup 2)))
12480 (clobber (reg:CC FLAGS_REG))])
12481 (parallel [(set (match_dup 3)
12482 (and:DWIH (not:DWIH (match_dup 4)) (match_dup 5)))
12483 (clobber (reg:CC FLAGS_REG))])]
12484 "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);")
12485
12486 (define_insn_and_split "*andn<mode>3_doubleword"
12487 [(set (match_operand:DWI 0 "register_operand")
12488 (and:DWI
12489 (not:DWI (match_operand:DWI 1 "register_operand"))
12490 (match_operand:DWI 2 "nonimmediate_operand")))
12491 (clobber (reg:CC FLAGS_REG))]
12492 "!TARGET_BMI
12493 && ix86_pre_reload_split ()"
12494 "#"
12495 "&& 1"
12496 [(set (match_dup 3) (not:DWI (match_dup 1)))
12497 (parallel [(set (match_dup 0)
12498 (and:DWI (match_dup 3) (match_dup 2)))
12499 (clobber (reg:CC FLAGS_REG))])]
12500 "operands[3] = gen_reg_rtx (<MODE>mode);")
12501
12502 (define_insn "*andn<mode>_1"
12503 [(set (match_operand:SWI48 0 "register_operand" "=r,r,?k")
12504 (and:SWI48
12505 (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r,k"))
12506 (match_operand:SWI48 2 "nonimmediate_operand" "r,m,k")))
12507 (clobber (reg:CC FLAGS_REG))]
12508 "TARGET_BMI || TARGET_AVX512BW"
12509 "@
12510 andn\t{%2, %1, %0|%0, %1, %2}
12511 andn\t{%2, %1, %0|%0, %1, %2}
12512 #"
12513 [(set_attr "isa" "bmi,bmi,avx512bw")
12514 (set_attr "type" "bitmanip,bitmanip,msklog")
12515 (set_attr "btver2_decode" "direct, double,*")
12516 (set_attr "mode" "<MODE>")])
12517
12518 (define_insn "*andn<mode>_1"
12519 [(set (match_operand:SWI12 0 "register_operand" "=r,?k")
12520 (and:SWI12
12521 (not:SWI12 (match_operand:SWI12 1 "register_operand" "r,k"))
12522 (match_operand:SWI12 2 "register_operand" "r,k")))
12523 (clobber (reg:CC FLAGS_REG))]
12524 "TARGET_BMI || TARGET_AVX512BW"
12525 "@
12526 andn\t{%k2, %k1, %k0|%k0, %k1, %k2}
12527 #"
12528 [(set_attr "isa" "bmi,avx512f")
12529 (set_attr "type" "bitmanip,msklog")
12530 (set_attr "btver2_decode" "direct,*")
12531 (set (attr "mode")
12532 (cond [(eq_attr "alternative" "0")
12533 (const_string "SI")
12534 (and (eq_attr "alternative" "1")
12535 (match_test "!TARGET_AVX512DQ"))
12536 (const_string "HI")
12537 ]
12538 (const_string "<MODE>")))])
12539
12540 (define_insn "*andn_<mode>_ccno"
12541 [(set (reg FLAGS_REG)
12542 (compare
12543 (and:SWI48
12544 (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r"))
12545 (match_operand:SWI48 2 "nonimmediate_operand" "r,m"))
12546 (const_int 0)))
12547 (clobber (match_scratch:SWI48 0 "=r,r"))]
12548 "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
12549 "andn\t{%2, %1, %0|%0, %1, %2}"
12550 [(set_attr "type" "bitmanip")
12551 (set_attr "btver2_decode" "direct, double")
12552 (set_attr "mode" "<MODE>")])
12553
12554 ;; Split *andnsi_1 after reload with -Oz when not;and is shorter.
12555 (define_split
12556 [(set (match_operand:SI 0 "register_operand")
12557 (and:SI (not:SI (match_operand:SI 1 "register_operand"))
12558 (match_operand:SI 2 "nonimmediate_operand")))
12559 (clobber (reg:CC FLAGS_REG))]
12560 "reload_completed
12561 && optimize_insn_for_size_p () && optimize_size > 1
12562 && REGNO (operands[0]) == REGNO (operands[1])
12563 && LEGACY_INT_REG_P (operands[0])
12564 && !REX_INT_REG_P (operands[2])
12565 && !reg_overlap_mentioned_p (operands[0], operands[2])"
12566 [(set (match_dup 0) (not:SI (match_dup 1)))
12567 (parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
12568 (clobber (reg:CC FLAGS_REG))])])
12569
12570 ;; Split *andn_si_ccno with -Oz when not;test is shorter.
12571 (define_split
12572 [(set (match_operand 0 "flags_reg_operand")
12573 (match_operator 1 "compare_operator"
12574 [(and:SI (not:SI (match_operand:SI 2 "general_reg_operand"))
12575 (match_operand:SI 3 "nonimmediate_operand"))
12576 (const_int 0)]))
12577 (clobber (match_dup 2))]
12578 "reload_completed
12579 && optimize_insn_for_size_p () && optimize_size > 1
12580 && LEGACY_INT_REG_P (operands[2])
12581 && !REX_INT_REG_P (operands[3])
12582 && !reg_overlap_mentioned_p (operands[2], operands[3])"
12583 [(set (match_dup 2) (not:SI (match_dup 2)))
12584 (set (match_dup 0) (match_op_dup 1
12585 [(and:SI (match_dup 3) (match_dup 2))
12586 (const_int 0)]))])
12587
12588 ;; Variant 1 of 4: Split ((A | B) ^ A) ^ C as (B & ~A) ^ C.
12589 (define_split
12590 [(set (match_operand:SWI48 0 "register_operand")
12591 (xor:SWI48
12592 (xor:SWI48
12593 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
12594 (match_operand:SWI48 2 "nonimmediate_operand"))
12595 (match_dup 1))
12596 (match_operand:SWI48 3 "nonimmediate_operand")))
12597 (clobber (reg:CC FLAGS_REG))]
12598 "TARGET_BMI"
12599 [(parallel
12600 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 1)) (match_dup 2)))
12601 (clobber (reg:CC FLAGS_REG))])
12602 (parallel
12603 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
12604 (clobber (reg:CC FLAGS_REG))])]
12605 "operands[4] = gen_reg_rtx (<MODE>mode);")
12606
12607 ;; Variant 2 of 4: Split ((A | B) ^ B) ^ C as (A & ~B) ^ C.
12608 (define_split
12609 [(set (match_operand:SWI48 0 "register_operand")
12610 (xor:SWI48
12611 (xor:SWI48
12612 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
12613 (match_operand:SWI48 2 "register_operand"))
12614 (match_dup 2))
12615 (match_operand:SWI48 3 "nonimmediate_operand")))
12616 (clobber (reg:CC FLAGS_REG))]
12617 "TARGET_BMI"
12618 [(parallel
12619 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 2)) (match_dup 1)))
12620 (clobber (reg:CC FLAGS_REG))])
12621 (parallel
12622 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
12623 (clobber (reg:CC FLAGS_REG))])]
12624 "operands[4] = gen_reg_rtx (<MODE>mode);")
12625
12626 ;; Variant 3 of 4: Split ((A | B) ^ C) ^ A as (B & ~A) ^ C.
12627 (define_split
12628 [(set (match_operand:SWI48 0 "register_operand")
12629 (xor:SWI48
12630 (xor:SWI48
12631 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
12632 (match_operand:SWI48 2 "nonimmediate_operand"))
12633 (match_operand:SWI48 3 "nonimmediate_operand"))
12634 (match_dup 1)))
12635 (clobber (reg:CC FLAGS_REG))]
12636 "TARGET_BMI"
12637 [(parallel
12638 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 1)) (match_dup 2)))
12639 (clobber (reg:CC FLAGS_REG))])
12640 (parallel
12641 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
12642 (clobber (reg:CC FLAGS_REG))])]
12643 "operands[4] = gen_reg_rtx (<MODE>mode);")
12644
12645 ;; Variant 4 of 4: Split ((A | B) ^ C) ^ B as (A & ~B) ^ C.
12646 (define_split
12647 [(set (match_operand:SWI48 0 "register_operand")
12648 (xor:SWI48
12649 (xor:SWI48
12650 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
12651 (match_operand:SWI48 2 "register_operand"))
12652 (match_operand:SWI48 3 "nonimmediate_operand"))
12653 (match_dup 2)))
12654 (clobber (reg:CC FLAGS_REG))]
12655 "TARGET_BMI"
12656 [(parallel
12657 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 2)) (match_dup 1)))
12658 (clobber (reg:CC FLAGS_REG))])
12659 (parallel
12660 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
12661 (clobber (reg:CC FLAGS_REG))])]
12662 "operands[4] = gen_reg_rtx (<MODE>mode);")
12663 \f
12664 ;; Logical inclusive and exclusive OR instructions
12665
12666 ;; %%% This used to optimize known byte-wide and operations to memory.
12667 ;; If this is considered useful, it should be done with splitters.
12668
12669 (define_expand "<code><mode>3"
12670 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
12671 (any_or:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
12672 (match_operand:SDWIM 2 "<general_operand>")))]
12673 ""
12674 {
12675 if (GET_MODE_SIZE (<MODE>mode) > UNITS_PER_WORD
12676 && !x86_64_hilo_general_operand (operands[2], <MODE>mode))
12677 operands[2] = force_reg (<MODE>mode, operands[2]);
12678
12679 ix86_expand_binary_operator (<CODE>, <MODE>mode, operands, TARGET_APX_NDD);
12680 DONE;
12681 })
12682
12683 (define_insn_and_split "*<code><dwi>3_doubleword"
12684 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
12685 (any_or:<DWI>
12686 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r")
12687 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o")))
12688 (clobber (reg:CC FLAGS_REG))]
12689 "ix86_binary_operator_ok (<CODE>, <DWI>mode, operands, TARGET_APX_NDD)"
12690 "#"
12691 "&& reload_completed"
12692 [(const_int:DWIH 0)]
12693 {
12694 /* This insn may disappear completely when operands[2] == const0_rtx
12695 and operands[0] == operands[1], which requires a NOTE_INSN_DELETED. */
12696 bool emit_insn_deleted_note_p = false;
12697
12698 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
12699
12700 if (operands[2] == const0_rtx)
12701 {
12702 if (!rtx_equal_p (operands[0], operands[1]))
12703 emit_move_insn (operands[0], operands[1]);
12704 else
12705 emit_insn_deleted_note_p = true;
12706 }
12707 else if (operands[2] == constm1_rtx)
12708 {
12709 if (<CODE> == IOR)
12710 emit_move_insn (operands[0], constm1_rtx);
12711 else
12712 ix86_expand_unary_operator (NOT, <MODE>mode, &operands[0],
12713 TARGET_APX_NDD);
12714 }
12715 else
12716 ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[0],
12717 TARGET_APX_NDD);
12718
12719 if (operands[5] == const0_rtx)
12720 {
12721 if (!rtx_equal_p (operands[3], operands[4]))
12722 emit_move_insn (operands[3], operands[4]);
12723 else if (emit_insn_deleted_note_p)
12724 emit_note (NOTE_INSN_DELETED);
12725 }
12726 else if (operands[5] == constm1_rtx)
12727 {
12728 if (<CODE> == IOR)
12729 emit_move_insn (operands[3], constm1_rtx);
12730 else
12731 ix86_expand_unary_operator (NOT, <MODE>mode, &operands[3],
12732 TARGET_APX_NDD);
12733 }
12734 else
12735 ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[3],
12736 TARGET_APX_NDD);
12737
12738 DONE;
12739 }
12740 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
12741
12742 (define_insn "*<code><mode>_1"
12743 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,?k")
12744 (any_or:SWI248
12745 (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,r,k")
12746 (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,r<i>,<m>,k")))
12747 (clobber (reg:CC FLAGS_REG))]
12748 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
12749 "@
12750 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
12751 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
12752 <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
12753 <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
12754 #"
12755 [(set_attr "isa" "*,*,apx_ndd,apx_ndd,<kmov_isa>")
12756 (set_attr "type" "alu, alu, alu, alu, msklog")
12757 (set_attr "mode" "<MODE>")])
12758
12759 (define_insn_and_split "*notxor<mode>_1"
12760 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,?k")
12761 (not:SWI248
12762 (xor:SWI248
12763 (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,r,k")
12764 (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,r<i>,<m>,k"))))
12765 (clobber (reg:CC FLAGS_REG))]
12766 "ix86_binary_operator_ok (XOR, <MODE>mode, operands, TARGET_APX_NDD)"
12767 "#"
12768 "&& reload_completed"
12769 [(parallel
12770 [(set (match_dup 0)
12771 (xor:SWI248 (match_dup 1) (match_dup 2)))
12772 (clobber (reg:CC FLAGS_REG))])
12773 (set (match_dup 0)
12774 (not:SWI248 (match_dup 0)))]
12775 {
12776 if (MASK_REG_P (operands[0]))
12777 {
12778 emit_insn (gen_kxnor<mode> (operands[0], operands[1], operands[2]));
12779 DONE;
12780 }
12781 }
12782 [(set_attr "isa" "*,*,apx_ndd,apx_ndd,<kmov_isa>")
12783 (set_attr "type" "alu, alu, alu, alu, msklog")
12784 (set_attr "mode" "<MODE>")])
12785
12786 (define_insn_and_split "*iordi_1_bts"
12787 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
12788 (ior:DI
12789 (match_operand:DI 1 "nonimmediate_operand" "%0")
12790 (match_operand:DI 2 "const_int_operand" "n")))
12791 (clobber (reg:CC FLAGS_REG))]
12792 "TARGET_64BIT && TARGET_USE_BT
12793 && ix86_binary_operator_ok (IOR, DImode, operands)
12794 && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)"
12795 "#"
12796 "&& reload_completed"
12797 [(parallel [(set (zero_extract:DI (match_dup 0)
12798 (const_int 1)
12799 (match_dup 3))
12800 (const_int 1))
12801 (clobber (reg:CC FLAGS_REG))])]
12802 "operands[3] = GEN_INT (exact_log2 (INTVAL (operands[2])));"
12803 [(set_attr "type" "alu1")
12804 (set_attr "prefix_0f" "1")
12805 (set_attr "znver1_decode" "double")
12806 (set_attr "mode" "DI")])
12807
12808 (define_insn_and_split "*xordi_1_btc"
12809 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
12810 (xor:DI
12811 (match_operand:DI 1 "nonimmediate_operand" "%0")
12812 (match_operand:DI 2 "const_int_operand" "n")))
12813 (clobber (reg:CC FLAGS_REG))]
12814 "TARGET_64BIT && TARGET_USE_BT
12815 && ix86_binary_operator_ok (XOR, DImode, operands)
12816 && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)"
12817 "#"
12818 "&& reload_completed"
12819 [(parallel [(set (zero_extract:DI (match_dup 0)
12820 (const_int 1)
12821 (match_dup 3))
12822 (not:DI (zero_extract:DI (match_dup 0)
12823 (const_int 1)
12824 (match_dup 3))))
12825 (clobber (reg:CC FLAGS_REG))])]
12826 "operands[3] = GEN_INT (exact_log2 (INTVAL (operands[2])));"
12827 [(set_attr "type" "alu1")
12828 (set_attr "prefix_0f" "1")
12829 (set_attr "znver1_decode" "double")
12830 (set_attr "mode" "DI")])
12831
12832 ;; Optimize a ^ ((a ^ b) & mask) to (~mask & a) | (b & mask)
12833 (define_insn_and_split "*xor2andn"
12834 [(set (match_operand:SWI248 0 "register_operand")
12835 (xor:SWI248
12836 (and:SWI248
12837 (xor:SWI248
12838 (match_operand:SWI248 1 "nonimmediate_operand")
12839 (match_operand:SWI248 2 "nonimmediate_operand"))
12840 (match_operand:SWI248 3 "nonimmediate_operand"))
12841 (match_dup 1)))
12842 (clobber (reg:CC FLAGS_REG))]
12843 "TARGET_BMI && ix86_pre_reload_split ()"
12844 "#"
12845 "&& 1"
12846 [(parallel [(set (match_dup 4)
12847 (and:SWI248
12848 (not:SWI248
12849 (match_dup 3))
12850 (match_dup 1)))
12851 (clobber (reg:CC FLAGS_REG))])
12852 (parallel [(set (match_dup 5)
12853 (and:SWI248
12854 (match_dup 3)
12855 (match_dup 2)))
12856 (clobber (reg:CC FLAGS_REG))])
12857 (parallel [(set (match_dup 0)
12858 (ior:SWI248
12859 (match_dup 4)
12860 (match_dup 5)))
12861 (clobber (reg:CC FLAGS_REG))])]
12862 {
12863 operands[1] = force_reg (<MODE>mode, operands[1]);
12864 operands[3] = force_reg (<MODE>mode, operands[3]);
12865 operands[4] = gen_reg_rtx (<MODE>mode);
12866 operands[5] = gen_reg_rtx (<MODE>mode);
12867 })
12868
12869 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
12870 (define_insn "*<code>si_1_zext"
12871 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
12872 (zero_extend:DI
12873 (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
12874 (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))))
12875 (clobber (reg:CC FLAGS_REG))]
12876 "TARGET_64BIT
12877 && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
12878 "@
12879 <logic>{l}\t{%2, %k0|%k0, %2}
12880 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}
12881 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
12882 [(set_attr "type" "alu")
12883 (set_attr "isa" "*,apx_ndd,apx_ndd")
12884 (set_attr "mode" "SI")])
12885
12886 (define_insn "*<code>si_1_zext_imm"
12887 [(set (match_operand:DI 0 "register_operand" "=r,r")
12888 (any_or:DI
12889 (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0,rm"))
12890 (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z,Z")))
12891 (clobber (reg:CC FLAGS_REG))]
12892 "TARGET_64BIT
12893 && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
12894 "@
12895 <logic>{l}\t{%2, %k0|%k0, %2}
12896 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
12897 [(set_attr "type" "alu")
12898 (set_attr "isa" "*,apx_ndd")
12899 (set_attr "mode" "SI")])
12900
12901 (define_insn "*<code>qi_1"
12902 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
12903 (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
12904 (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))
12905 (clobber (reg:CC FLAGS_REG))]
12906 "ix86_binary_operator_ok (<CODE>, QImode, operands, TARGET_APX_NDD)"
12907 "@
12908 <logic>{b}\t{%2, %0|%0, %2}
12909 <logic>{b}\t{%2, %0|%0, %2}
12910 <logic>{l}\t{%k2, %k0|%k0, %k2}
12911 <logic>{b}\t{%2, %1, %0|%0, %1, %2}
12912 <logic>{b}\t{%2, %1, %0|%0, %1, %2}
12913 #"
12914 [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f")
12915 (set_attr "type" "alu,alu,alu,alu,alu,msklog")
12916 (set (attr "mode")
12917 (cond [(eq_attr "alternative" "2")
12918 (const_string "SI")
12919 (and (eq_attr "alternative" "5")
12920 (match_test "!TARGET_AVX512DQ"))
12921 (const_string "HI")
12922 ]
12923 (const_string "QI")))
12924 ;; Potential partial reg stall on alternative 2.
12925 (set (attr "preferred_for_speed")
12926 (cond [(eq_attr "alternative" "2")
12927 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
12928 (symbol_ref "true")))])
12929
12930 (define_insn_and_split "*notxorqi_1"
12931 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
12932 (not:QI
12933 (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
12934 (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k"))))
12935 (clobber (reg:CC FLAGS_REG))]
12936 "ix86_binary_operator_ok (XOR, QImode, operands, TARGET_APX_NDD)"
12937 "#"
12938 "&& reload_completed"
12939 [(parallel
12940 [(set (match_dup 0)
12941 (xor:QI (match_dup 1) (match_dup 2)))
12942 (clobber (reg:CC FLAGS_REG))])
12943 (set (match_dup 0)
12944 (not:QI (match_dup 0)))]
12945 {
12946 if (mask_reg_operand (operands[0], QImode))
12947 {
12948 emit_insn (gen_kxnorqi (operands[0], operands[1], operands[2]));
12949 DONE;
12950 }
12951 }
12952 [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f")
12953 (set_attr "type" "alu,alu,alu,alu,alu,msklog")
12954 (set (attr "mode")
12955 (cond [(eq_attr "alternative" "2")
12956 (const_string "SI")
12957 (and (eq_attr "alternative" "5")
12958 (match_test "!TARGET_AVX512DQ"))
12959 (const_string "HI")
12960 ]
12961 (const_string "QI")))
12962 ;; Potential partial reg stall on alternative 2.
12963 (set (attr "preferred_for_speed")
12964 (cond [(eq_attr "alternative" "2")
12965 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
12966 (symbol_ref "true")))])
12967
12968 ;; convert (sign_extend:WIDE (any_logic:NARROW (memory, immediate)))
12969 ;; to (any_logic:WIDE (sign_extend (memory)), (sign_extend (immediate))).
12970 ;; This eliminates sign extension after logic operation.
12971
12972 (define_split
12973 [(set (match_operand:SWI248 0 "register_operand")
12974 (sign_extend:SWI248
12975 (any_logic:QI (match_operand:QI 1 "memory_operand")
12976 (match_operand:QI 2 "const_int_operand"))))]
12977 ""
12978 [(set (match_dup 3) (sign_extend:SWI248 (match_dup 1)))
12979 (set (match_dup 0) (any_logic:SWI248 (match_dup 3) (match_dup 2)))]
12980 "operands[3] = gen_reg_rtx (<MODE>mode);")
12981
12982 (define_split
12983 [(set (match_operand:SWI48 0 "register_operand")
12984 (sign_extend:SWI48
12985 (any_logic:HI (match_operand:HI 1 "memory_operand")
12986 (match_operand:HI 2 "const_int_operand"))))]
12987 ""
12988 [(set (match_dup 3) (sign_extend:SWI48 (match_dup 1)))
12989 (set (match_dup 0) (any_logic:SWI48 (match_dup 3) (match_dup 2)))]
12990 "operands[3] = gen_reg_rtx (<MODE>mode);")
12991
12992 (define_split
12993 [(set (match_operand:DI 0 "register_operand")
12994 (sign_extend:DI
12995 (any_logic:SI (match_operand:SI 1 "memory_operand")
12996 (match_operand:SI 2 "const_int_operand"))))]
12997 "TARGET_64BIT"
12998 [(set (match_dup 3) (sign_extend:DI (match_dup 1)))
12999 (set (match_dup 0) (any_logic:DI (match_dup 3) (match_dup 2)))]
13000 "operands[3] = gen_reg_rtx (DImode);")
13001
13002 (define_insn "*<code><mode>_2"
13003 [(set (reg FLAGS_REG)
13004 (compare (any_or:SWI
13005 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")
13006 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
13007 (const_int 0)))
13008 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
13009 (any_or:SWI (match_dup 1) (match_dup 2)))]
13010 "ix86_match_ccmode (insn, CCNOmode)
13011 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
13012 "@
13013 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
13014 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
13015 <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
13016 <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
13017 [(set_attr "type" "alu")
13018 (set_attr "isa" "*,*,apx_ndd,apx_ndd")
13019 (set_attr "mode" "<MODE>")])
13020
13021 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
13022 ;; ??? Special case for immediate operand is missing - it is tricky.
13023 (define_insn "*<code>si_2_zext"
13024 [(set (reg FLAGS_REG)
13025 (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
13026 (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))
13027 (const_int 0)))
13028 (set (match_operand:DI 0 "register_operand" "=r,r,r")
13029 (zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))]
13030 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
13031 && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
13032 "@
13033 <logic>{l}\t{%2, %k0|%k0, %2}
13034 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}
13035 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
13036 [(set_attr "type" "alu")
13037 (set_attr "isa" "*,apx_ndd,apx_ndd")
13038 (set_attr "mode" "SI")])
13039
13040 (define_insn "*<code>si_2_zext_imm"
13041 [(set (reg FLAGS_REG)
13042 (compare (any_or:SI
13043 (match_operand:SI 1 "nonimmediate_operand" "%0,rm")
13044 (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z,Z"))
13045 (const_int 0)))
13046 (set (match_operand:DI 0 "register_operand" "=r,r")
13047 (any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
13048 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
13049 && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
13050 "@
13051 <logic>{l}\t{%2, %k0|%k0, %2}
13052 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
13053 [(set_attr "type" "alu")
13054 (set_attr "isa" "*,apx_ndd")
13055 (set_attr "mode" "SI")])
13056
13057 (define_insn "*<code><mode>_3"
13058 [(set (reg FLAGS_REG)
13059 (compare (any_or:SWI
13060 (match_operand:SWI 1 "nonimmediate_operand" "%0")
13061 (match_operand:SWI 2 "<general_operand>" "<g>"))
13062 (const_int 0)))
13063 (clobber (match_scratch:SWI 0 "=<r>"))]
13064 "ix86_match_ccmode (insn, CCNOmode)
13065 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13066 "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
13067 [(set_attr "type" "alu")
13068 (set_attr "mode" "<MODE>")])
13069
13070 ;; Convert wide OR instructions with immediate operand to shorter QImode
13071 ;; equivalents when possible.
13072 ;; Don't do the splitting with memory operands, since it introduces risk
13073 ;; of memory mismatch stalls. We may want to do the splitting for optimizing
13074 ;; for size, but that can (should?) be handled by generic code instead.
13075 ;; Don't do the splitting for APX NDD as NDD does not support *h registers.
13076 (define_split
13077 [(set (match_operand:SWI248 0 "QIreg_operand")
13078 (any_or:SWI248 (match_operand:SWI248 1 "register_operand")
13079 (match_operand:SWI248 2 "const_int_operand")))
13080 (clobber (reg:CC FLAGS_REG))]
13081 "reload_completed
13082 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
13083 && !(INTVAL (operands[2]) & ~(255 << 8))
13084 && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))"
13085 [(parallel
13086 [(set (zero_extract:HI (match_dup 0)
13087 (const_int 8)
13088 (const_int 8))
13089 (subreg:HI
13090 (any_or:QI
13091 (subreg:QI
13092 (zero_extract:HI (match_dup 1)
13093 (const_int 8)
13094 (const_int 8)) 0)
13095 (match_dup 2)) 0))
13096 (clobber (reg:CC FLAGS_REG))])]
13097 {
13098 /* Handle the case where INTVAL (operands[2]) == 0. */
13099 if (operands[2] == const0_rtx)
13100 {
13101 if (!rtx_equal_p (operands[0], operands[1]))
13102 emit_move_insn (operands[0], operands[1]);
13103 else
13104 emit_note (NOTE_INSN_DELETED);
13105 DONE;
13106 }
13107 operands[0] = gen_lowpart (HImode, operands[0]);
13108 operands[1] = gen_lowpart (HImode, operands[1]);
13109 operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode);
13110 })
13111
13112 ;; Since OR can be encoded with sign extended immediate, this is only
13113 ;; profitable when 7th bit is set.
13114 (define_split
13115 [(set (match_operand:SWI248 0 "any_QIreg_operand")
13116 (any_or:SWI248 (match_operand:SWI248 1 "general_operand")
13117 (match_operand:SWI248 2 "const_int_operand")))
13118 (clobber (reg:CC FLAGS_REG))]
13119 "reload_completed
13120 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
13121 && !(INTVAL (operands[2]) & ~255)
13122 && (INTVAL (operands[2]) & 128)
13123 && !(TARGET_APX_NDD
13124 && !rtx_equal_p (operands[0], operands[1]))"
13125 [(parallel [(set (strict_low_part (match_dup 0))
13126 (any_or:QI (match_dup 1)
13127 (match_dup 2)))
13128 (clobber (reg:CC FLAGS_REG))])]
13129 {
13130 operands[0] = gen_lowpart (QImode, operands[0]);
13131 operands[1] = gen_lowpart (QImode, operands[1]);
13132 operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
13133 })
13134
13135 (define_expand "xorqi_ext_1_cc"
13136 [(parallel
13137 [(set (reg:CCNO FLAGS_REG)
13138 (compare:CCNO
13139 (xor:QI
13140 (subreg:QI
13141 (zero_extract:HI (match_operand:HI 1 "register_operand")
13142 (const_int 8)
13143 (const_int 8)) 0)
13144 (match_operand:QI 2 "const_int_operand"))
13145 (const_int 0)))
13146 (set (zero_extract:HI (match_operand:HI 0 "register_operand")
13147 (const_int 8)
13148 (const_int 8))
13149 (subreg:HI
13150 (xor:QI
13151 (subreg:QI
13152 (zero_extract:HI (match_dup 1)
13153 (const_int 8)
13154 (const_int 8)) 0)
13155 (match_dup 2)) 0))])])
13156
13157 ;; Peephole2 rega = 0; rega op= regb into rega = regb.
13158 (define_peephole2
13159 [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
13160 (const_int 0))
13161 (clobber (reg:CC FLAGS_REG))])
13162 (parallel [(set (match_dup 0)
13163 (any_or_plus:SWI (match_dup 0)
13164 (match_operand:SWI 1 "<general_operand>")))
13165 (clobber (reg:CC FLAGS_REG))])]
13166 "!reg_mentioned_p (operands[0], operands[1])"
13167 [(set (match_dup 0) (match_dup 1))])
13168
13169 ;; Peephole2 dead instruction in rega = 0; rega op= rega.
13170 (define_peephole2
13171 [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
13172 (const_int 0))
13173 (clobber (reg:CC FLAGS_REG))])
13174 (parallel [(set (match_dup 0)
13175 (any_or_plus:SWI (match_dup 0) (match_dup 0)))
13176 (clobber (reg:CC FLAGS_REG))])]
13177 ""
13178 [(parallel [(set (match_dup 0) (const_int 0))
13179 (clobber (reg:CC FLAGS_REG))])])
13180
13181 ;; Split DST = (HI<<32)|LO early to minimize register usage.
13182 (define_insn_and_split "*concat<mode><dwi>3_1"
13183 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
13184 (any_or_plus:<DWI>
13185 (ashift:<DWI> (match_operand:<DWI> 1 "register_operand" "r,r")
13186 (match_operand:QI 2 "const_int_operand"))
13187 (zero_extend:<DWI>
13188 (match_operand:DWIH 3 "nonimmediate_operand" "r,m"))))]
13189 "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT"
13190 "#"
13191 "&& reload_completed"
13192 [(const_int 0)]
13193 {
13194 split_double_concat (<DWI>mode, operands[0], operands[3],
13195 gen_lowpart (<MODE>mode, operands[1]));
13196 DONE;
13197 })
13198
13199 (define_insn_and_split "*concat<mode><dwi>3_2"
13200 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
13201 (any_or_plus:<DWI>
13202 (zero_extend:<DWI>
13203 (match_operand:DWIH 1 "nonimmediate_operand" "r,m"))
13204 (ashift:<DWI> (match_operand:<DWI> 2 "register_operand" "r,r")
13205 (match_operand:QI 3 "const_int_operand"))))]
13206 "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
13207 "#"
13208 "&& reload_completed"
13209 [(const_int 0)]
13210 {
13211 split_double_concat (<DWI>mode, operands[0], operands[1],
13212 gen_lowpart (<MODE>mode, operands[2]));
13213 DONE;
13214 })
13215
13216 (define_insn_and_split "*concat<mode><dwi>3_3"
13217 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,&r,x")
13218 (any_or_plus:<DWI>
13219 (ashift:<DWI>
13220 (zero_extend:<DWI>
13221 (match_operand:DWIH 1 "nonimmediate_operand" "r,m,r,m,x"))
13222 (match_operand:QI 2 "const_int_operand"))
13223 (zero_extend:<DWI>
13224 (match_operand:DWIH 3 "nonimmediate_operand" "r,r,m,m,0"))))]
13225 "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT"
13226 "#"
13227 "&& reload_completed"
13228 [(const_int 0)]
13229 {
13230 if (SSE_REG_P (operands[0]))
13231 {
13232 rtx tmp = gen_rtx_REG (V2DImode, REGNO (operands[0]));
13233 emit_insn (gen_vec_concatv2di (tmp, operands[3], operands[1]));
13234 }
13235 else
13236 split_double_concat (<DWI>mode, operands[0], operands[3], operands[1]);
13237 DONE;
13238 }
13239 [(set_attr "isa" "*,*,*,x64,x64")])
13240
13241 (define_insn_and_split "*concat<mode><dwi>3_4"
13242 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,&r")
13243 (any_or_plus:<DWI>
13244 (zero_extend:<DWI>
13245 (match_operand:DWIH 1 "nonimmediate_operand" "r,m,r,m"))
13246 (ashift:<DWI>
13247 (zero_extend:<DWI>
13248 (match_operand:DWIH 2 "nonimmediate_operand" "r,r,m,m"))
13249 (match_operand:QI 3 "const_int_operand"))))]
13250 "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
13251 "#"
13252 "&& reload_completed"
13253 [(const_int 0)]
13254 {
13255 split_double_concat (<DWI>mode, operands[0], operands[1], operands[2]);
13256 DONE;
13257 }
13258 [(set_attr "isa" "*,*,*,x64")])
13259
13260 (define_insn_and_split "*concat<half><mode>3_5"
13261 [(set (match_operand:DWI 0 "nonimmediate_operand" "=r,o,o")
13262 (any_or_plus:DWI
13263 (ashift:DWI (match_operand:DWI 1 "register_operand" "r,r,r")
13264 (match_operand:QI 2 "const_int_operand"))
13265 (match_operand:DWI 3 "const_scalar_int_operand" "n,n,Wd")))]
13266 "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT / 2
13267 && (<MODE>mode == DImode
13268 ? CONST_INT_P (operands[3])
13269 && (UINTVAL (operands[3]) & ~GET_MODE_MASK (SImode)) == 0
13270 : CONST_INT_P (operands[3])
13271 ? INTVAL (operands[3]) >= 0
13272 : CONST_WIDE_INT_NUNITS (operands[3]) == 2
13273 && CONST_WIDE_INT_ELT (operands[3], 1) == 0)
13274 && !(CONST_INT_P (operands[3])
13275 ? ix86_endbr_immediate_operand (operands[3], VOIDmode)
13276 : ix86_endbr_immediate_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[3],
13277 0)),
13278 VOIDmode))"
13279 "#"
13280 "&& reload_completed"
13281 [(const_int 0)]
13282 {
13283 rtx op3 = simplify_subreg (<HALF>mode, operands[3], <MODE>mode, 0);
13284 split_double_concat (<MODE>mode, operands[0], op3,
13285 gen_lowpart (<HALF>mode, operands[1]));
13286 DONE;
13287 }
13288 [(set_attr "isa" "*,nox64,x64")])
13289
13290 (define_insn_and_split "*concat<mode><dwi>3_6"
13291 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,o,r")
13292 (any_or_plus:<DWI>
13293 (ashift:<DWI>
13294 (zero_extend:<DWI>
13295 (match_operand:DWIH 1 "nonimmediate_operand" "r,r,r,m"))
13296 (match_operand:QI 2 "const_int_operand"))
13297 (match_operand:<DWI> 3 "const_scalar_int_operand" "n,n,Wd,n")))]
13298 "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT
13299 && (<DWI>mode == DImode
13300 ? CONST_INT_P (operands[3])
13301 && (UINTVAL (operands[3]) & ~GET_MODE_MASK (SImode)) == 0
13302 : CONST_INT_P (operands[3])
13303 ? INTVAL (operands[3]) >= 0
13304 : CONST_WIDE_INT_NUNITS (operands[3]) == 2
13305 && CONST_WIDE_INT_ELT (operands[3], 1) == 0)
13306 && !(CONST_INT_P (operands[3])
13307 ? ix86_endbr_immediate_operand (operands[3], VOIDmode)
13308 : ix86_endbr_immediate_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[3],
13309 0)),
13310 VOIDmode))"
13311 "#"
13312 "&& reload_completed"
13313 [(const_int 0)]
13314 {
13315 rtx op3 = simplify_subreg (<MODE>mode, operands[3], <DWI>mode, 0);
13316 split_double_concat (<DWI>mode, operands[0], op3, operands[1]);
13317 DONE;
13318 }
13319 [(set_attr "isa" "*,nox64,x64,*")])
13320
13321 (define_insn_and_split "*concat<mode><dwi>3_7"
13322 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,o,r")
13323 (any_or_plus:<DWI>
13324 (zero_extend:<DWI>
13325 (match_operand:DWIH 1 "nonimmediate_operand" "r,r,r,m"))
13326 (match_operand:<DWI> 2 "const_scalar_int_operand" "n,n,Wd,n")))]
13327 "<DWI>mode == DImode
13328 ? CONST_INT_P (operands[2])
13329 && (UINTVAL (operands[2]) & GET_MODE_MASK (SImode)) == 0
13330 && !ix86_endbr_immediate_operand (operands[2], VOIDmode)
13331 : CONST_WIDE_INT_P (operands[2])
13332 && CONST_WIDE_INT_NUNITS (operands[2]) == 2
13333 && CONST_WIDE_INT_ELT (operands[2], 0) == 0
13334 && !ix86_endbr_immediate_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[2],
13335 1)),
13336 VOIDmode)"
13337 "#"
13338 "&& reload_completed"
13339 [(const_int 0)]
13340 {
13341 rtx op2;
13342 if (<DWI>mode == DImode)
13343 op2 = gen_int_mode (INTVAL (operands[2]) >> 32, <MODE>mode);
13344 else
13345 op2 = gen_int_mode (CONST_WIDE_INT_ELT (operands[2], 1), <MODE>mode);
13346 split_double_concat (<DWI>mode, operands[0], operands[1], op2);
13347 DONE;
13348 }
13349 [(set_attr "isa" "*,nox64,x64,*")])
13350 \f
13351 ;; Negation instructions
13352
13353 (define_expand "neg<mode>2"
13354 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
13355 (neg:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))]
13356 ""
13357 {
13358 ix86_expand_unary_operator (NEG, <MODE>mode, operands, TARGET_APX_NDD);
13359 DONE;
13360 })
13361
13362 (define_insn_and_split "*neg<dwi>2_doubleword"
13363 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,&r")
13364 (neg:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0,ro")))
13365 (clobber (reg:CC FLAGS_REG))]
13366 "ix86_unary_operator_ok (NEG, <DWI>mode, operands, TARGET_APX_NDD)"
13367 "#"
13368 "&& reload_completed"
13369 [(parallel
13370 [(set (reg:CCC FLAGS_REG)
13371 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
13372 (set (match_dup 0) (neg:DWIH (match_dup 1)))])
13373 (parallel
13374 [(set (match_dup 2)
13375 (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
13376 (match_dup 3))
13377 (const_int 0)))
13378 (clobber (reg:CC FLAGS_REG))])
13379 (parallel
13380 [(set (match_dup 2)
13381 (neg:DWIH (match_dup 2)))
13382 (clobber (reg:CC FLAGS_REG))])]
13383 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);"
13384 [(set_attr "isa" "*,apx_ndd")])
13385
13386 ;; Convert:
13387 ;; mov %esi, %edx
13388 ;; negl %eax
13389 ;; adcl $0, %edx
13390 ;; negl %edx
13391 ;; to:
13392 ;; xorl %edx, %edx
13393 ;; negl %eax
13394 ;; sbbl %esi, %edx
13395
13396 (define_peephole2
13397 [(set (match_operand:SWI48 0 "general_reg_operand")
13398 (match_operand:SWI48 1 "nonimmediate_gr_operand"))
13399 (parallel
13400 [(set (reg:CCC FLAGS_REG)
13401 (unspec:CCC [(match_operand:SWI48 2 "general_reg_operand")
13402 (const_int 0)] UNSPEC_CC_NE))
13403 (set (match_dup 2) (neg:SWI48 (match_dup 2)))])
13404 (parallel
13405 [(set (match_dup 0)
13406 (plus:SWI48 (plus:SWI48
13407 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
13408 (match_dup 0))
13409 (const_int 0)))
13410 (clobber (reg:CC FLAGS_REG))])
13411 (parallel
13412 [(set (match_dup 0)
13413 (neg:SWI48 (match_dup 0)))
13414 (clobber (reg:CC FLAGS_REG))])]
13415 "REGNO (operands[0]) != REGNO (operands[2])
13416 && !reg_mentioned_p (operands[0], operands[1])
13417 && !reg_mentioned_p (operands[2], operands[1])"
13418 [(parallel
13419 [(set (reg:CCC FLAGS_REG)
13420 (unspec:CCC [(match_dup 2) (const_int 0)] UNSPEC_CC_NE))
13421 (set (match_dup 2) (neg:SWI48 (match_dup 2)))])
13422 (parallel
13423 [(set (match_dup 0)
13424 (minus:SWI48 (minus:SWI48
13425 (match_dup 0)
13426 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0)))
13427 (match_dup 1)))
13428 (clobber (reg:CC FLAGS_REG))])]
13429 "ix86_expand_clear (operands[0]);")
13430
13431 ;; Convert:
13432 ;; xorl %edx, %edx
13433 ;; negl %eax
13434 ;; adcl $0, %edx
13435 ;; negl %edx
13436 ;; to:
13437 ;; negl %eax
13438 ;; sbbl %edx, %edx // *x86_mov<mode>cc_0_m1
13439
13440 (define_peephole2
13441 [(parallel
13442 [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0))
13443 (clobber (reg:CC FLAGS_REG))])
13444 (parallel
13445 [(set (reg:CCC FLAGS_REG)
13446 (unspec:CCC [(match_operand:SWI48 1 "general_reg_operand")
13447 (const_int 0)] UNSPEC_CC_NE))
13448 (set (match_dup 1) (neg:SWI48 (match_dup 1)))])
13449 (parallel
13450 [(set (match_dup 0)
13451 (plus:SWI48 (plus:SWI48
13452 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
13453 (match_dup 0))
13454 (const_int 0)))
13455 (clobber (reg:CC FLAGS_REG))])
13456 (parallel
13457 [(set (match_dup 0)
13458 (neg:SWI48 (match_dup 0)))
13459 (clobber (reg:CC FLAGS_REG))])]
13460 "REGNO (operands[0]) != REGNO (operands[1])"
13461 [(parallel
13462 [(set (reg:CCC FLAGS_REG)
13463 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
13464 (set (match_dup 1) (neg:SWI48 (match_dup 1)))])
13465 (parallel
13466 [(set (match_dup 0)
13467 (if_then_else:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
13468 (const_int -1)
13469 (const_int 0)))
13470 (clobber (reg:CC FLAGS_REG))])])
13471
13472 (define_insn "*neg<mode>_1"
13473 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
13474 (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")))
13475 (clobber (reg:CC FLAGS_REG))]
13476 "ix86_unary_operator_ok (NEG, <MODE>mode, operands, TARGET_APX_NDD)"
13477 "@
13478 neg{<imodesuffix>}\t%0
13479 neg{<imodesuffix>}\t{%1, %0|%0, %1}"
13480 [(set_attr "type" "negnot")
13481 (set_attr "isa" "*,apx_ndd")
13482 (set_attr "mode" "<MODE>")])
13483
13484 (define_insn "*negsi_1_zext"
13485 [(set (match_operand:DI 0 "register_operand" "=r,r")
13486 (zero_extend:DI
13487 (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm"))))
13488 (clobber (reg:CC FLAGS_REG))]
13489 "TARGET_64BIT
13490 && ix86_unary_operator_ok (NEG, SImode, operands, TARGET_APX_NDD)"
13491 "@
13492 neg{l}\t%k0
13493 neg{l}\t{%k1, %k0|%k0, %k1}"
13494 [(set_attr "type" "negnot")
13495 (set_attr "isa" "*,apx_ndd")
13496 (set_attr "mode" "SI")])
13497
13498 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
13499 (define_insn_and_split "*neg<mode>_1_slp"
13500 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
13501 (neg:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")))
13502 (clobber (reg:CC FLAGS_REG))]
13503 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
13504 "@
13505 neg{<imodesuffix>}\t%0
13506 #"
13507 "&& reload_completed
13508 && !(rtx_equal_p (operands[0], operands[1]))"
13509 [(set (strict_low_part (match_dup 0)) (match_dup 1))
13510 (parallel
13511 [(set (strict_low_part (match_dup 0))
13512 (neg:SWI12 (match_dup 0)))
13513 (clobber (reg:CC FLAGS_REG))])]
13514 ""
13515 [(set_attr "type" "negnot")
13516 (set_attr "mode" "<MODE>")])
13517
13518 (define_insn "*neg<mode>_2"
13519 [(set (reg FLAGS_REG)
13520 (compare
13521 (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
13522 (const_int 0)))
13523 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
13524 (neg:SWI (match_dup 1)))]
13525 "ix86_match_ccmode (insn, CCGOCmode)
13526 && ix86_unary_operator_ok (NEG, <MODE>mode, operands, TARGET_APX_NDD)"
13527 "@
13528 neg{<imodesuffix>}\t%0
13529 neg{<imodesuffix>}\t{%1, %0|%0, %1}"
13530 [(set_attr "type" "negnot")
13531 (set_attr "isa" "*,apx_ndd")
13532 (set_attr "mode" "<MODE>")])
13533
13534 (define_insn "*negsi_2_zext"
13535 [(set (reg FLAGS_REG)
13536 (compare
13537 (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm"))
13538 (const_int 0)))
13539 (set (match_operand:DI 0 "register_operand" "=r,r")
13540 (zero_extend:DI
13541 (neg:SI (match_dup 1))))]
13542 "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
13543 && ix86_unary_operator_ok (NEG, SImode, operands, TARGET_APX_NDD)"
13544 "@
13545 neg{l}\t%k0
13546 neg{l}\t{%1, %k0|%k0, %1}"
13547 [(set_attr "type" "negnot")
13548 (set_attr "isa" "*,apx_ndd")
13549 (set_attr "mode" "SI")])
13550
13551 (define_insn "*neg<mode>_ccc_1"
13552 [(set (reg:CCC FLAGS_REG)
13553 (unspec:CCC
13554 [(match_operand:SWI 1 "nonimmediate_operand" "0,rm")
13555 (const_int 0)] UNSPEC_CC_NE))
13556 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
13557 (neg:SWI (match_dup 1)))]
13558 ""
13559 "@
13560 neg{<imodesuffix>}\t%0
13561 neg{<imodesuffix>}\t{%1, %0|%0, %1}"
13562 [(set_attr "type" "negnot")
13563 (set_attr "isa" "*,apx_ndd")
13564 (set_attr "mode" "<MODE>")])
13565
13566 (define_insn "*neg<mode>_ccc_2"
13567 [(set (reg:CCC FLAGS_REG)
13568 (unspec:CCC
13569 [(match_operand:SWI 1 "nonimmediate_operand" "0,rm")
13570 (const_int 0)] UNSPEC_CC_NE))
13571 (clobber (match_scratch:SWI 0 "=<r>,r"))]
13572 ""
13573 "@
13574 neg{<imodesuffix>}\t%0
13575 neg{<imodesuffix>}\t{%1, %0|%0, %1}"
13576 [(set_attr "type" "negnot")
13577 (set_attr "isa" "*,apx_ndd")
13578 (set_attr "mode" "<MODE>")])
13579
13580 (define_expand "x86_neg<mode>_ccc"
13581 [(parallel
13582 [(set (reg:CCC FLAGS_REG)
13583 (unspec:CCC [(match_operand:SWI48 1 "register_operand")
13584 (const_int 0)] UNSPEC_CC_NE))
13585 (set (match_operand:SWI48 0 "register_operand")
13586 (neg:SWI48 (match_dup 1)))])])
13587
13588 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
13589 (define_insn_and_split "*negqi_ext<mode>_1"
13590 [(set (zero_extract:SWI248
13591 (match_operand 0 "int248_register_operand" "+Q,&Q")
13592 (const_int 8)
13593 (const_int 8))
13594 (subreg:SWI248
13595 (neg:QI
13596 (subreg:QI
13597 (match_operator:SWI248 2 "extract_operator"
13598 [(match_operand 1 "int248_register_operand" "0,!Q")
13599 (const_int 8)
13600 (const_int 8)]) 0)) 0))
13601 (clobber (reg:CC FLAGS_REG))]
13602 ""
13603 "@
13604 neg{b}\t%h0
13605 #"
13606 "reload_completed
13607 && !(rtx_equal_p (operands[0], operands[1]))"
13608 [(set (zero_extract:SWI248
13609 (match_dup 0) (const_int 8) (const_int 8))
13610 (zero_extract:SWI248
13611 (match_dup 1) (const_int 8) (const_int 8)))
13612 (parallel
13613 [(set (zero_extract:SWI248
13614 (match_dup 0) (const_int 8) (const_int 8))
13615 (subreg:SWI248
13616 (neg:QI
13617 (subreg:QI
13618 (match_op_dup 2
13619 [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))
13620 (clobber (reg:CC FLAGS_REG))])]
13621 ""
13622 [(set_attr "type" "negnot")
13623 (set_attr "mode" "QI")])
13624
13625 ;; Negate with jump on overflow.
13626 (define_expand "negv<mode>3"
13627 [(parallel [(set (reg:CCO FLAGS_REG)
13628 (unspec:CCO
13629 [(match_operand:SWI 1 "register_operand")
13630 (match_dup 3)] UNSPEC_CC_NE))
13631 (set (match_operand:SWI 0 "register_operand")
13632 (neg:SWI (match_dup 1)))])
13633 (set (pc) (if_then_else
13634 (eq (reg:CCO FLAGS_REG) (const_int 0))
13635 (label_ref (match_operand 2))
13636 (pc)))]
13637 ""
13638 {
13639 operands[3]
13640 = gen_int_mode (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (<MODE>mode) - 1),
13641 <MODE>mode);
13642 })
13643
13644 (define_insn "*negv<mode>3"
13645 [(set (reg:CCO FLAGS_REG)
13646 (unspec:CCO [(match_operand:SWI 1 "nonimmediate_operand" "0")
13647 (match_operand:SWI 2 "const_int_operand")]
13648 UNSPEC_CC_NE))
13649 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
13650 (neg:SWI (match_dup 1)))]
13651 "ix86_unary_operator_ok (NEG, <MODE>mode, operands)
13652 && mode_signbit_p (<MODE>mode, operands[2])"
13653 "neg{<imodesuffix>}\t%0"
13654 [(set_attr "type" "negnot")
13655 (set_attr "mode" "<MODE>")])
13656
13657 ;; Optimize *negsi_1 followed by *cmpsi_ccno_1 (PR target/91384)
13658 (define_peephole2
13659 [(set (match_operand:SWI 0 "general_reg_operand")
13660 (match_operand:SWI 1 "general_reg_operand"))
13661 (parallel [(set (match_dup 0) (neg:SWI (match_dup 0)))
13662 (clobber (reg:CC FLAGS_REG))])
13663 (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0)))]
13664 ""
13665 [(set (match_dup 0) (match_dup 1))
13666 (parallel [(set (reg:CCZ FLAGS_REG)
13667 (compare:CCZ (neg:SWI (match_dup 0)) (const_int 0)))
13668 (set (match_dup 0) (neg:SWI (match_dup 0)))])])
13669
13670 ;; Special expand pattern to handle integer mode abs
13671
13672 (define_expand "abs<mode>2"
13673 [(parallel
13674 [(set (match_operand:SDWIM 0 "register_operand")
13675 (abs:SDWIM
13676 (match_operand:SDWIM 1 "general_operand")))
13677 (clobber (reg:CC FLAGS_REG))])]
13678 "TARGET_CMOVE
13679 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)"
13680 {
13681 if (TARGET_EXPAND_ABS)
13682 {
13683 machine_mode mode = <MODE>mode;
13684 operands[1] = force_reg (mode, operands[1]);
13685
13686 /* Generate rtx abs using:
13687 abs (x) = (((signed) x >> (W-1)) ^ x) - ((signed) x >> (W-1)) */
13688
13689 rtx shift_amount = gen_int_mode (GET_MODE_PRECISION (mode) - 1, QImode);
13690 rtx shift_dst = expand_simple_binop (mode, ASHIFTRT, operands[1],
13691 shift_amount, NULL_RTX,
13692 0, OPTAB_DIRECT);
13693 rtx xor_dst = expand_simple_binop (mode, XOR, shift_dst, operands[1],
13694 operands[0], 0, OPTAB_DIRECT);
13695 rtx minus_dst = expand_simple_binop (mode, MINUS, xor_dst, shift_dst,
13696 operands[0], 0, OPTAB_DIRECT);
13697 if (!rtx_equal_p (minus_dst, operands[0]))
13698 emit_move_insn (operands[0], minus_dst);
13699 DONE;
13700 }
13701 })
13702
13703 (define_insn_and_split "*abs<dwi>2_doubleword"
13704 [(set (match_operand:<DWI> 0 "register_operand")
13705 (abs:<DWI>
13706 (match_operand:<DWI> 1 "general_operand")))
13707 (clobber (reg:CC FLAGS_REG))]
13708 "TARGET_CMOVE
13709 && ix86_pre_reload_split ()"
13710 "#"
13711 "&& 1"
13712 [(parallel
13713 [(set (reg:CCC FLAGS_REG)
13714 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
13715 (set (match_dup 2) (neg:DWIH (match_dup 1)))])
13716 (parallel
13717 [(set (match_dup 5)
13718 (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
13719 (match_dup 4))
13720 (const_int 0)))
13721 (clobber (reg:CC FLAGS_REG))])
13722 (parallel
13723 [(set (reg:CCGOC FLAGS_REG)
13724 (compare:CCGOC
13725 (neg:DWIH (match_dup 5))
13726 (const_int 0)))
13727 (set (match_dup 5)
13728 (neg:DWIH (match_dup 5)))])
13729 (set (match_dup 0)
13730 (if_then_else:DWIH
13731 (ge (reg:CCGOC FLAGS_REG) (const_int 0))
13732 (match_dup 2)
13733 (match_dup 1)))
13734 (set (match_dup 3)
13735 (if_then_else:DWIH
13736 (ge (reg:CCGOC FLAGS_REG) (const_int 0))
13737 (match_dup 5)
13738 (match_dup 4)))]
13739 {
13740 operands[1] = force_reg (<DWI>mode, operands[1]);
13741 operands[2] = gen_reg_rtx (<DWI>mode);
13742
13743 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
13744 })
13745
13746 (define_insn_and_split "*nabs<dwi>2_doubleword"
13747 [(set (match_operand:<DWI> 0 "register_operand")
13748 (neg:<DWI>
13749 (abs:<DWI>
13750 (match_operand:<DWI> 1 "general_operand"))))
13751 (clobber (reg:CC FLAGS_REG))]
13752 "TARGET_CMOVE
13753 && ix86_pre_reload_split ()"
13754 "#"
13755 "&& 1"
13756 [(parallel
13757 [(set (reg:CCC FLAGS_REG)
13758 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
13759 (set (match_dup 2) (neg:DWIH (match_dup 1)))])
13760 (parallel
13761 [(set (match_dup 5)
13762 (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
13763 (match_dup 4))
13764 (const_int 0)))
13765 (clobber (reg:CC FLAGS_REG))])
13766 (parallel
13767 [(set (reg:CCGOC FLAGS_REG)
13768 (compare:CCGOC
13769 (neg:DWIH (match_dup 5))
13770 (const_int 0)))
13771 (set (match_dup 5)
13772 (neg:DWIH (match_dup 5)))])
13773 (set (match_dup 0)
13774 (if_then_else:DWIH
13775 (lt (reg:CCGOC FLAGS_REG) (const_int 0))
13776 (match_dup 2)
13777 (match_dup 1)))
13778 (set (match_dup 3)
13779 (if_then_else:DWIH
13780 (lt (reg:CCGOC FLAGS_REG) (const_int 0))
13781 (match_dup 5)
13782 (match_dup 4)))]
13783 {
13784 operands[1] = force_reg (<DWI>mode, operands[1]);
13785 operands[2] = gen_reg_rtx (<DWI>mode);
13786
13787 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
13788 })
13789
13790 (define_insn_and_split "*abs<mode>2_1"
13791 [(set (match_operand:SWI 0 "register_operand")
13792 (abs:SWI
13793 (match_operand:SWI 1 "general_operand")))
13794 (clobber (reg:CC FLAGS_REG))]
13795 "TARGET_CMOVE
13796 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)
13797 && ix86_pre_reload_split ()"
13798 "#"
13799 "&& 1"
13800 [(parallel
13801 [(set (reg:CCGOC FLAGS_REG)
13802 (compare:CCGOC
13803 (neg:SWI (match_dup 1))
13804 (const_int 0)))
13805 (set (match_dup 2)
13806 (neg:SWI (match_dup 1)))])
13807 (set (match_dup 0)
13808 (if_then_else:SWI
13809 (ge (reg:CCGOC FLAGS_REG) (const_int 0))
13810 (match_dup 2)
13811 (match_dup 1)))]
13812 {
13813 operands[1] = force_reg (<MODE>mode, operands[1]);
13814 operands[2] = gen_reg_rtx (<MODE>mode);
13815 })
13816
13817 (define_insn_and_split "*nabs<mode>2_1"
13818 [(set (match_operand:SWI 0 "register_operand")
13819 (neg:SWI
13820 (abs:SWI
13821 (match_operand:SWI 1 "general_operand"))))
13822 (clobber (reg:CC FLAGS_REG))]
13823 "TARGET_CMOVE
13824 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)
13825 && ix86_pre_reload_split ()"
13826 "#"
13827 "&& 1"
13828 [(parallel
13829 [(set (reg:CCGOC FLAGS_REG)
13830 (compare:CCGOC
13831 (neg:SWI (match_dup 1))
13832 (const_int 0)))
13833 (set (match_dup 2)
13834 (neg:SWI (match_dup 1)))])
13835 (set (match_dup 0)
13836 (if_then_else:SWI
13837 (lt (reg:CCGOC FLAGS_REG) (const_int 0))
13838 (match_dup 2)
13839 (match_dup 1)))]
13840 {
13841 operands[1] = force_reg (<MODE>mode, operands[1]);
13842 operands[2] = gen_reg_rtx (<MODE>mode);
13843 })
13844
13845 (define_expand "<code>tf2"
13846 [(set (match_operand:TF 0 "register_operand")
13847 (absneg:TF (match_operand:TF 1 "register_operand")))]
13848 "TARGET_SSE"
13849 "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")
13850
13851 (define_insn_and_split "*<code>tf2_1"
13852 [(set (match_operand:TF 0 "register_operand" "=x,x,Yv,Yv")
13853 (absneg:TF
13854 (match_operand:TF 1 "vector_operand" "0,xBm,Yv,m")))
13855 (use (match_operand:TF 2 "vector_operand" "xBm,0,Yvm,Yv"))]
13856 "TARGET_SSE"
13857 "#"
13858 "&& reload_completed"
13859 [(set (match_dup 0)
13860 (<absneg_op>:TF (match_dup 1) (match_dup 2)))]
13861 {
13862 if (TARGET_AVX)
13863 {
13864 if (MEM_P (operands[1]))
13865 std::swap (operands[1], operands[2]);
13866 }
13867 else
13868 {
13869 if (operands_match_p (operands[0], operands[2]))
13870 std::swap (operands[1], operands[2]);
13871 }
13872 }
13873 [(set_attr "isa" "noavx,noavx,avx,avx")])
13874
13875 (define_insn_and_split "*nabstf2_1"
13876 [(set (match_operand:TF 0 "register_operand" "=x,x,Yv,Yv")
13877 (neg:TF
13878 (abs:TF
13879 (match_operand:TF 1 "vector_operand" "0,xBm,Yv,m"))))
13880 (use (match_operand:TF 2 "vector_operand" "xBm,0,Yvm,Yv"))]
13881 "TARGET_SSE"
13882 "#"
13883 "&& reload_completed"
13884 [(set (match_dup 0)
13885 (ior:TF (match_dup 1) (match_dup 2)))]
13886 {
13887 if (TARGET_AVX)
13888 {
13889 if (MEM_P (operands[1]))
13890 std::swap (operands[1], operands[2]);
13891 }
13892 else
13893 {
13894 if (operands_match_p (operands[0], operands[2]))
13895 std::swap (operands[1], operands[2]);
13896 }
13897 }
13898 [(set_attr "isa" "noavx,noavx,avx,avx")])
13899
13900 (define_expand "<code>hf2"
13901 [(set (match_operand:HF 0 "register_operand")
13902 (absneg:HF (match_operand:HF 1 "register_operand")))]
13903 "TARGET_AVX512FP16"
13904 "ix86_expand_fp_absneg_operator (<CODE>, HFmode, operands); DONE;")
13905
13906 (define_expand "<code><mode>2"
13907 [(set (match_operand:X87MODEF 0 "register_operand")
13908 (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand")))]
13909 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
13910 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
13911
13912 ;; Changing of sign for FP values is doable using integer unit too.
13913 (define_insn "*<code><mode>2_i387_1"
13914 [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r")
13915 (absneg:X87MODEF
13916 (match_operand:X87MODEF 1 "register_operand" "0,0")))
13917 (clobber (reg:CC FLAGS_REG))]
13918 "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
13919 "#")
13920
13921 (define_split
13922 [(set (match_operand:X87MODEF 0 "fp_register_operand")
13923 (absneg:X87MODEF (match_operand:X87MODEF 1 "fp_register_operand")))
13924 (clobber (reg:CC FLAGS_REG))]
13925 "TARGET_80387 && reload_completed"
13926 [(set (match_dup 0) (absneg:X87MODEF (match_dup 1)))])
13927
13928 (define_split
13929 [(set (match_operand:X87MODEF 0 "general_reg_operand")
13930 (absneg:X87MODEF (match_operand:X87MODEF 1 "general_reg_operand")))
13931 (clobber (reg:CC FLAGS_REG))]
13932 "TARGET_80387 && reload_completed"
13933 [(const_int 0)]
13934 "ix86_split_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
13935
13936 (define_insn_and_split "*<code>hf2_1"
13937 [(set (match_operand:HF 0 "register_operand" "=Yv")
13938 (absneg:HF
13939 (match_operand:HF 1 "register_operand" "Yv")))
13940 (use (match_operand:V8HF 2 "vector_operand" "Yvm"))
13941 (clobber (reg:CC FLAGS_REG))]
13942 "TARGET_AVX512FP16"
13943 "#"
13944 "&& reload_completed"
13945 [(set (match_dup 0)
13946 (<absneg_op>:V8HF (match_dup 1) (match_dup 2)))]
13947 {
13948 operands[0] = lowpart_subreg (V8HFmode, operands[0], HFmode);
13949 operands[1] = lowpart_subreg (V8HFmode, operands[1], HFmode);
13950 })
13951
13952 (define_insn "*<code><mode>2_1"
13953 [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv,f,!r")
13954 (absneg:MODEF
13955 (match_operand:MODEF 1 "register_operand" "0,x,Yv,0,0")))
13956 (use (match_operand:<ssevecmode> 2 "vector_operand" "xBm,0,Yvm,X,X"))
13957 (clobber (reg:CC FLAGS_REG))]
13958 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
13959 "#"
13960 [(set_attr "isa" "noavx,noavx,avx,*,*")
13961 (set (attr "enabled")
13962 (if_then_else
13963 (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
13964 (if_then_else
13965 (eq_attr "alternative" "3,4")
13966 (symbol_ref "TARGET_MIX_SSE_I387")
13967 (const_string "*"))
13968 (if_then_else
13969 (eq_attr "alternative" "3,4")
13970 (symbol_ref "true")
13971 (symbol_ref "false"))))])
13972
13973 (define_split
13974 [(set (match_operand:MODEF 0 "sse_reg_operand")
13975 (absneg:MODEF
13976 (match_operand:MODEF 1 "sse_reg_operand")))
13977 (use (match_operand:<ssevecmodef> 2 "vector_operand"))
13978 (clobber (reg:CC FLAGS_REG))]
13979 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
13980 && reload_completed"
13981 [(set (match_dup 0)
13982 (<absneg_op>:<ssevecmodef> (match_dup 1) (match_dup 2)))]
13983 {
13984 machine_mode mode = <MODE>mode;
13985 machine_mode vmode = <ssevecmodef>mode;
13986
13987 operands[0] = lowpart_subreg (vmode, operands[0], mode);
13988 operands[1] = lowpart_subreg (vmode, operands[1], mode);
13989
13990 if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
13991 std::swap (operands[1], operands[2]);
13992 })
13993
13994 (define_split
13995 [(set (match_operand:MODEF 0 "fp_register_operand")
13996 (absneg:MODEF (match_operand:MODEF 1 "fp_register_operand")))
13997 (use (match_operand 2))
13998 (clobber (reg:CC FLAGS_REG))]
13999 "TARGET_80387 && reload_completed"
14000 [(set (match_dup 0) (absneg:MODEF (match_dup 1)))])
14001
14002 (define_split
14003 [(set (match_operand:MODEF 0 "general_reg_operand")
14004 (absneg:MODEF (match_operand:MODEF 1 "general_reg_operand")))
14005 (use (match_operand 2))
14006 (clobber (reg:CC FLAGS_REG))]
14007 "TARGET_80387 && reload_completed"
14008 [(const_int 0)]
14009 "ix86_split_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
14010
14011 (define_insn_and_split "*nabs<mode>2_1"
14012 [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv")
14013 (neg:MODEF
14014 (abs:MODEF
14015 (match_operand:MODEF 1 "register_operand" "0,x,Yv"))))
14016 (use (match_operand:<ssevecmode> 2 "vector_operand" "xBm,0,Yvm"))]
14017 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
14018 "#"
14019 "&& reload_completed"
14020 [(set (match_dup 0)
14021 (ior:<ssevecmodef> (match_dup 1) (match_dup 2)))]
14022 {
14023 machine_mode mode = <MODE>mode;
14024 machine_mode vmode = <ssevecmodef>mode;
14025
14026 operands[0] = lowpart_subreg (vmode, operands[0], mode);
14027 operands[1] = lowpart_subreg (vmode, operands[1], mode);
14028
14029 if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
14030 std::swap (operands[1], operands[2]);
14031 }
14032 [(set_attr "isa" "noavx,noavx,avx")])
14033
14034 ;; Conditionalize these after reload. If they match before reload, we
14035 ;; lose the clobber and ability to use integer instructions.
14036
14037 (define_insn "*<code><mode>2_i387"
14038 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
14039 (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))]
14040 "TARGET_80387 && reload_completed"
14041 "<absneg_mnemonic>"
14042 [(set_attr "type" "fsgn")
14043 (set_attr "mode" "<MODE>")])
14044
14045 ;; Copysign instructions
14046
14047 (define_expand "copysign<mode>3"
14048 [(match_operand:SSEMODEF 0 "register_operand")
14049 (match_operand:SSEMODEF 1 "nonmemory_operand")
14050 (match_operand:SSEMODEF 2 "register_operand")]
14051 "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
14052 || (TARGET_SSE && (<MODE>mode == TFmode))
14053 || (TARGET_AVX512FP16 && (<MODE>mode ==HFmode))"
14054 "ix86_expand_copysign (operands); DONE;")
14055
14056 (define_expand "xorsign<mode>3"
14057 [(match_operand:MODEFH 0 "register_operand")
14058 (match_operand:MODEFH 1 "register_operand")
14059 (match_operand:MODEFH 2 "register_operand")]
14060 "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
14061 || <MODE>mode == HFmode"
14062 {
14063 if (rtx_equal_p (operands[1], operands[2]))
14064 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
14065 else
14066 ix86_expand_xorsign (operands);
14067 DONE;
14068 })
14069 \f
14070 ;; One complement instructions
14071
14072 (define_expand "one_cmpl<mode>2"
14073 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
14074 (not:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))]
14075 ""
14076 {
14077 ix86_expand_unary_operator (NOT, <MODE>mode, operands, TARGET_APX_NDD);
14078 DONE;
14079 })
14080
14081 (define_insn_and_split "*one_cmpl<dwi>2_doubleword"
14082 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,&r")
14083 (not:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0,ro")))]
14084 "ix86_unary_operator_ok (NOT, <DWI>mode, operands, TARGET_APX_NDD)"
14085 "#"
14086 "&& reload_completed"
14087 [(set (match_dup 0)
14088 (not:DWIH (match_dup 1)))
14089 (set (match_dup 2)
14090 (not:DWIH (match_dup 3)))]
14091 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);"
14092 [(set_attr "isa" "*,apx_ndd")])
14093
14094 (define_insn "*one_cmpl<mode>2_1"
14095 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
14096 (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0,rm,k")))]
14097 "ix86_unary_operator_ok (NOT, <MODE>mode, operands, TARGET_APX_NDD)"
14098 "@
14099 not{<imodesuffix>}\t%0
14100 not{<imodesuffix>}\t{%1, %0|%0, %1}
14101 #"
14102 [(set_attr "isa" "*,apx_ndd,<kmov_isa>")
14103 (set_attr "type" "negnot,negnot,msklog")
14104 (set_attr "mode" "<MODE>")])
14105
14106 (define_insn "*one_cmplsi2_1_zext"
14107 [(set (match_operand:DI 0 "register_operand" "=r,r,?k")
14108 (zero_extend:DI
14109 (not:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,k"))))]
14110 "TARGET_64BIT
14111 && ix86_unary_operator_ok (NOT, SImode, operands, TARGET_APX_NDD)"
14112 "@
14113 not{l}\t%k0
14114 not{l}\t{%1, %k0|%k0, %1}
14115 #"
14116 [(set_attr "isa" "x64,apx_ndd,avx512bw")
14117 (set_attr "type" "negnot,negnot,msklog")
14118 (set_attr "mode" "SI,SI,SI")])
14119
14120 (define_insn "*one_cmplqi2_1"
14121 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r,?k")
14122 (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,rm,k")))]
14123 "ix86_unary_operator_ok (NOT, QImode, operands, TARGET_APX_NDD)"
14124 "@
14125 not{b}\t%0
14126 not{l}\t%k0
14127 not{b}\t{%1, %0|%0, %1}
14128 #"
14129 [(set_attr "isa" "*,*,apx_ndd,avx512f")
14130 (set_attr "type" "negnot,negnot,negnot,msklog")
14131 (set (attr "mode")
14132 (cond [(eq_attr "alternative" "1")
14133 (const_string "SI")
14134 (and (eq_attr "alternative" "3")
14135 (match_test "!TARGET_AVX512DQ"))
14136 (const_string "HI")
14137 ]
14138 (const_string "QI")))
14139 ;; Potential partial reg stall on alternative 1.
14140 (set (attr "preferred_for_speed")
14141 (cond [(eq_attr "alternative" "1")
14142 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
14143 (symbol_ref "true")))])
14144
14145 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
14146 (define_insn_and_split "*one_cmpl<mode>_1_slp"
14147 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
14148 (not:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")))]
14149 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
14150 "@
14151 not{<imodesuffix>}\t%0
14152 #"
14153 "&& reload_completed
14154 && !(rtx_equal_p (operands[0], operands[1]))"
14155 [(set (strict_low_part (match_dup 0)) (match_dup 1))
14156 (set (strict_low_part (match_dup 0))
14157 (not:SWI12 (match_dup 0)))]
14158 ""
14159 [(set_attr "type" "negnot")
14160 (set_attr "mode" "<MODE>")])
14161
14162 (define_insn "*one_cmpl<mode>2_2"
14163 [(set (reg FLAGS_REG)
14164 (compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
14165 (const_int 0)))
14166 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
14167 (not:SWI (match_dup 1)))]
14168 "ix86_match_ccmode (insn, CCNOmode)
14169 && ix86_unary_operator_ok (NOT, <MODE>mode, operands, TARGET_APX_NDD)"
14170 "#"
14171 [(set_attr "type" "alu1")
14172 (set_attr "isa" "*,apx_ndd")
14173 (set_attr "mode" "<MODE>")])
14174
14175 (define_split
14176 [(set (match_operand 0 "flags_reg_operand")
14177 (match_operator 2 "compare_operator"
14178 [(not:SWI (match_operand:SWI 3 "nonimmediate_operand"))
14179 (const_int 0)]))
14180 (set (match_operand:SWI 1 "nonimmediate_operand")
14181 (not:SWI (match_dup 3)))]
14182 "ix86_match_ccmode (insn, CCNOmode)"
14183 [(parallel [(set (match_dup 0)
14184 (match_op_dup 2 [(xor:SWI (match_dup 3) (const_int -1))
14185 (const_int 0)]))
14186 (set (match_dup 1)
14187 (xor:SWI (match_dup 3) (const_int -1)))])])
14188
14189 (define_insn "*one_cmplsi2_2_zext"
14190 [(set (reg FLAGS_REG)
14191 (compare (not:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm"))
14192 (const_int 0)))
14193 (set (match_operand:DI 0 "register_operand" "=r,r")
14194 (zero_extend:DI (not:SI (match_dup 1))))]
14195 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
14196 && ix86_unary_operator_ok (NOT, SImode, operands, TARGET_APX_NDD)"
14197 "#"
14198 [(set_attr "type" "alu1")
14199 (set_attr "isa" "*,apx_ndd")
14200 (set_attr "mode" "SI")])
14201
14202 (define_split
14203 [(set (match_operand 0 "flags_reg_operand")
14204 (match_operator 2 "compare_operator"
14205 [(not:SI (match_operand:SI 3 "nonimmediate_operand"))
14206 (const_int 0)]))
14207 (set (match_operand:DI 1 "register_operand")
14208 (zero_extend:DI (not:SI (match_dup 3))))]
14209 "ix86_match_ccmode (insn, CCNOmode)"
14210 [(parallel [(set (match_dup 0)
14211 (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1))
14212 (const_int 0)]))
14213 (set (match_dup 1)
14214 (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])])
14215
14216 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
14217 (define_insn_and_split "*one_cmplqi_ext<mode>_1"
14218 [(set (zero_extract:SWI248
14219 (match_operand 0 "int248_register_operand" "+Q,&Q")
14220 (const_int 8)
14221 (const_int 8))
14222 (subreg:SWI248
14223 (not:QI
14224 (subreg:QI
14225 (match_operator:SWI248 2 "extract_operator"
14226 [(match_operand 1 "int248_register_operand" "0,!Q")
14227 (const_int 8)
14228 (const_int 8)]) 0)) 0))]
14229 ""
14230 "@
14231 not{b}\t%h0
14232 #"
14233 "reload_completed
14234 && !(rtx_equal_p (operands[0], operands[1]))"
14235 [(set (zero_extract:SWI248
14236 (match_dup 0) (const_int 8) (const_int 8))
14237 (zero_extract:SWI248
14238 (match_dup 1) (const_int 8) (const_int 8)))
14239 (set (zero_extract:SWI248
14240 (match_dup 0) (const_int 8) (const_int 8))
14241 (subreg:SWI248
14242 (not:QI
14243 (subreg:QI
14244 (match_op_dup 2
14245 [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))]
14246 ""
14247 [(set_attr "type" "negnot")
14248 (set_attr "mode" "QI")])
14249 \f
14250 ;; Shift instructions
14251
14252 ;; DImode shifts are implemented using the i386 "shift double" opcode,
14253 ;; which is written as "sh[lr]d[lw] imm,reg,reg/mem". If the shift count
14254 ;; is variable, then the count is in %cl and the "imm" operand is dropped
14255 ;; from the assembler input.
14256 ;;
14257 ;; This instruction shifts the target reg/mem as usual, but instead of
14258 ;; shifting in zeros, bits are shifted in from reg operand. If the insn
14259 ;; is a left shift double, bits are taken from the high order bits of
14260 ;; reg, else if the insn is a shift right double, bits are taken from the
14261 ;; low order bits of reg. So if %eax is "1234" and %edx is "5678",
14262 ;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345".
14263 ;;
14264 ;; Since sh[lr]d does not change the `reg' operand, that is done
14265 ;; separately, making all shifts emit pairs of shift double and normal
14266 ;; shift. Since sh[lr]d does not shift more than 31 bits, and we wish to
14267 ;; support a 63 bit shift, each shift where the count is in a reg expands
14268 ;; to a pair of shifts, a branch, a shift by 32 and a label.
14269 ;;
14270 ;; If the shift count is a constant, we need never emit more than one
14271 ;; shift pair, instead using moves and sign extension for counts greater
14272 ;; than 31.
14273
14274 (define_expand "ashl<mode>3"
14275 [(set (match_operand:SDWIM 0 "<shift_operand>")
14276 (ashift:SDWIM (match_operand:SDWIM 1 "<ashl_input_operand>")
14277 (match_operand:QI 2 "nonmemory_operand")))]
14278 ""
14279 {
14280 ix86_expand_binary_operator (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD);
14281 DONE;
14282 })
14283
14284 (define_insn_and_split "*ashl<dwi>3_doubleword_mask"
14285 [(set (match_operand:<DWI> 0 "register_operand")
14286 (ashift:<DWI>
14287 (match_operand:<DWI> 1 "register_operand")
14288 (subreg:QI
14289 (and
14290 (match_operand 2 "int248_register_operand" "c")
14291 (match_operand 3 "const_int_operand")) 0)))
14292 (clobber (reg:CC FLAGS_REG))]
14293 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
14294 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
14295 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
14296 && ix86_pre_reload_split ()"
14297 "#"
14298 "&& 1"
14299 [(parallel
14300 [(set (match_dup 6)
14301 (ior:DWIH (ashift:DWIH (match_dup 6)
14302 (and:QI (match_dup 2) (match_dup 8)))
14303 (subreg:DWIH
14304 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
14305 (minus:QI (match_dup 9)
14306 (and:QI (match_dup 2) (match_dup 8)))) 0)))
14307 (clobber (reg:CC FLAGS_REG))])
14308 (parallel
14309 [(set (match_dup 4)
14310 (ashift:DWIH (match_dup 5) (match_dup 2)))
14311 (clobber (reg:CC FLAGS_REG))])]
14312 {
14313 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
14314 {
14315 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
14316 operands[2] = gen_lowpart (QImode, operands[2]);
14317 emit_insn (gen_ashl<dwi>3_doubleword (operands[0], operands[1],
14318 operands[2]));
14319 DONE;
14320 }
14321
14322 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
14323
14324 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
14325 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
14326
14327 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
14328 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
14329 {
14330 rtx xops[3];
14331 xops[0] = gen_reg_rtx (GET_MODE (operands[2]));
14332 xops[1] = operands[2];
14333 xops[2] = GEN_INT (INTVAL (operands[3])
14334 & ((<MODE_SIZE> * BITS_PER_UNIT) - 1));
14335 ix86_expand_binary_operator (AND, GET_MODE (operands[2]), xops);
14336 operands[2] = xops[0];
14337 }
14338
14339 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
14340 operands[2] = gen_lowpart (QImode, operands[2]);
14341
14342 if (!rtx_equal_p (operands[6], operands[7]))
14343 emit_move_insn (operands[6], operands[7]);
14344 })
14345
14346 (define_insn_and_split "*ashl<dwi>3_doubleword_mask_1"
14347 [(set (match_operand:<DWI> 0 "register_operand")
14348 (ashift:<DWI>
14349 (match_operand:<DWI> 1 "register_operand")
14350 (and:QI
14351 (match_operand:QI 2 "register_operand" "c")
14352 (match_operand:QI 3 "const_int_operand"))))
14353 (clobber (reg:CC FLAGS_REG))]
14354 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
14355 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
14356 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
14357 && ix86_pre_reload_split ()"
14358 "#"
14359 "&& 1"
14360 [(parallel
14361 [(set (match_dup 6)
14362 (ior:DWIH (ashift:DWIH (match_dup 6)
14363 (and:QI (match_dup 2) (match_dup 8)))
14364 (subreg:DWIH
14365 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
14366 (minus:QI (match_dup 9)
14367 (and:QI (match_dup 2) (match_dup 8)))) 0)))
14368 (clobber (reg:CC FLAGS_REG))])
14369 (parallel
14370 [(set (match_dup 4)
14371 (ashift:DWIH (match_dup 5) (match_dup 2)))
14372 (clobber (reg:CC FLAGS_REG))])]
14373 {
14374 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
14375 {
14376 emit_insn (gen_ashl<dwi>3_doubleword (operands[0], operands[1],
14377 operands[2]));
14378 DONE;
14379 }
14380
14381 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
14382
14383 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
14384 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
14385
14386 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
14387 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
14388 {
14389 rtx tem = gen_reg_rtx (QImode);
14390 emit_insn (gen_andqi3 (tem, operands[2], operands[3]));
14391 operands[2] = tem;
14392 }
14393
14394 if (!rtx_equal_p (operands[6], operands[7]))
14395 emit_move_insn (operands[6], operands[7]);
14396 })
14397
14398 (define_insn "ashl<mode>3_doubleword"
14399 [(set (match_operand:DWI 0 "register_operand" "=&r,&r")
14400 (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n,r")
14401 (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c")))
14402 (clobber (reg:CC FLAGS_REG))]
14403 ""
14404 "#"
14405 [(set_attr "type" "multi")
14406 (set_attr "isa" "*,apx_ndd")])
14407
14408 (define_split
14409 [(set (match_operand:DWI 0 "register_operand")
14410 (ashift:DWI (match_operand:DWI 1 "nonmemory_operand")
14411 (match_operand:QI 2 "nonmemory_operand")))
14412 (clobber (reg:CC FLAGS_REG))]
14413 "epilogue_completed"
14414 [(const_int 0)]
14415 {
14416 if (TARGET_APX_NDD
14417 && !rtx_equal_p (operands[0], operands[1])
14418 && REG_P (operands[1]))
14419 ix86_split_ashl_ndd (operands, NULL_RTX);
14420 else
14421 ix86_split_ashl (operands, NULL_RTX, <MODE>mode);
14422 DONE;
14423 })
14424
14425 ;; By default we don't ask for a scratch register, because when DWImode
14426 ;; values are manipulated, registers are already at a premium. But if
14427 ;; we have one handy, we won't turn it away.
14428
14429 (define_peephole2
14430 [(match_scratch:DWIH 3 "r")
14431 (parallel [(set (match_operand:<DWI> 0 "register_operand")
14432 (ashift:<DWI>
14433 (match_operand:<DWI> 1 "nonmemory_operand")
14434 (match_operand:QI 2 "nonmemory_operand")))
14435 (clobber (reg:CC FLAGS_REG))])
14436 (match_dup 3)]
14437 "TARGET_CMOVE"
14438 [(const_int 0)]
14439 {
14440 if (TARGET_APX_NDD
14441 && !rtx_equal_p (operands[0], operands[1])
14442 && (REG_P (operands[1])))
14443 ix86_split_ashl_ndd (operands, operands[3]);
14444 else
14445 ix86_split_ashl (operands, operands[3], <DWI>mode);
14446 DONE;
14447 })
14448
14449 (define_insn_and_split "*ashl<dwi>3_doubleword_highpart"
14450 [(set (match_operand:<DWI> 0 "register_operand" "=r")
14451 (ashift:<DWI>
14452 (any_extend:<DWI> (match_operand:DWIH 1 "nonimmediate_operand" "rm"))
14453 (match_operand:QI 2 "const_int_operand")))
14454 (clobber (reg:CC FLAGS_REG))]
14455 "INTVAL (operands[2]) >= <MODE_SIZE> * BITS_PER_UNIT
14456 && INTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT * 2"
14457 "#"
14458 "&& reload_completed"
14459 [(const_int 0)]
14460 {
14461 split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[3]);
14462 int bits = INTVAL (operands[2]) - (<MODE_SIZE> * BITS_PER_UNIT);
14463 bool op_equal_p = rtx_equal_p (operands[3], operands[1]);
14464 if (bits == 0)
14465 {
14466 if (!op_equal_p)
14467 emit_move_insn (operands[3], operands[1]);
14468 }
14469 else
14470 {
14471 if (!op_equal_p && !TARGET_APX_NDD)
14472 emit_move_insn (operands[3], operands[1]);
14473 rtx op_tmp = TARGET_APX_NDD ? operands[1] : operands[3];
14474 emit_insn (gen_ashl<mode>3 (operands[3], op_tmp, GEN_INT (bits)));
14475 }
14476 ix86_expand_clear (operands[0]);
14477 DONE;
14478 })
14479
14480 (define_insn "x86_64_shld"
14481 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
14482 (ior:DI (ashift:DI (match_dup 0)
14483 (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
14484 (const_int 63)))
14485 (subreg:DI
14486 (lshiftrt:TI
14487 (zero_extend:TI
14488 (match_operand:DI 1 "register_operand" "r"))
14489 (minus:QI (const_int 64)
14490 (and:QI (match_dup 2) (const_int 63)))) 0)))
14491 (clobber (reg:CC FLAGS_REG))]
14492 "TARGET_64BIT"
14493 "shld{q}\t{%s2%1, %0|%0, %1, %2}"
14494 [(set_attr "type" "ishift")
14495 (set_attr "prefix_0f" "1")
14496 (set_attr "mode" "DI")
14497 (set_attr "athlon_decode" "vector")
14498 (set_attr "amdfam10_decode" "vector")
14499 (set_attr "bdver1_decode" "vector")])
14500
14501 (define_insn "x86_64_shld_ndd"
14502 [(set (match_operand:DI 0 "register_operand" "=r")
14503 (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
14504 (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
14505 (const_int 63)))
14506 (subreg:DI
14507 (lshiftrt:TI
14508 (zero_extend:TI
14509 (match_operand:DI 2 "register_operand" "r"))
14510 (minus:QI (const_int 64)
14511 (and:QI (match_dup 3) (const_int 63)))) 0)))
14512 (clobber (reg:CC FLAGS_REG))]
14513 "TARGET_APX_NDD"
14514 "shld{q}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
14515 [(set_attr "type" "ishift")
14516 (set_attr "mode" "DI")])
14517
14518 (define_insn "x86_64_shld_1"
14519 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
14520 (ior:DI (ashift:DI (match_dup 0)
14521 (match_operand:QI 2 "const_0_to_63_operand"))
14522 (subreg:DI
14523 (lshiftrt:TI
14524 (zero_extend:TI
14525 (match_operand:DI 1 "register_operand" "r"))
14526 (match_operand:QI 3 "const_0_to_255_operand")) 0)))
14527 (clobber (reg:CC FLAGS_REG))]
14528 "TARGET_64BIT
14529 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
14530 "shld{q}\t{%2, %1, %0|%0, %1, %2}"
14531 [(set_attr "type" "ishift")
14532 (set_attr "prefix_0f" "1")
14533 (set_attr "mode" "DI")
14534 (set_attr "length_immediate" "1")
14535 (set_attr "athlon_decode" "vector")
14536 (set_attr "amdfam10_decode" "vector")
14537 (set_attr "bdver1_decode" "vector")])
14538
14539 (define_insn "x86_64_shld_ndd_1"
14540 [(set (match_operand:DI 0 "register_operand" "=r")
14541 (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
14542 (match_operand:QI 3 "const_0_to_63_operand"))
14543 (subreg:DI
14544 (lshiftrt:TI
14545 (zero_extend:TI
14546 (match_operand:DI 2 "register_operand" "r"))
14547 (match_operand:QI 4 "const_0_to_255_operand")) 0)))
14548 (clobber (reg:CC FLAGS_REG))]
14549 "TARGET_APX_NDD
14550 && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
14551 "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14552 [(set_attr "type" "ishift")
14553 (set_attr "mode" "DI")
14554 (set_attr "length_immediate" "1")])
14555
14556
14557 (define_insn_and_split "*x86_64_shld_shrd_1_nozext"
14558 [(set (match_operand:DI 0 "nonimmediate_operand")
14559 (ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand")
14560 (match_operand:QI 2 "const_0_to_63_operand"))
14561 (lshiftrt:DI
14562 (match_operand:DI 1 "nonimmediate_operand")
14563 (match_operand:QI 3 "const_0_to_63_operand"))))
14564 (clobber (reg:CC FLAGS_REG))]
14565 "TARGET_64BIT
14566 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
14567 && ix86_pre_reload_split ()"
14568 "#"
14569 "&& 1"
14570 [(const_int 0)]
14571 {
14572 if (rtx_equal_p (operands[4], operands[0]))
14573 {
14574 operands[1] = force_reg (DImode, operands[1]);
14575 emit_insn (gen_x86_64_shld_1 (operands[0], operands[1], operands[2], operands[3]));
14576 }
14577 else if (rtx_equal_p (operands[1], operands[0]))
14578 {
14579 operands[4] = force_reg (DImode, operands[4]);
14580 emit_insn (gen_x86_64_shrd_1 (operands[0], operands[4], operands[3], operands[2]));
14581 }
14582 else if (TARGET_APX_NDD)
14583 {
14584 rtx tmp = gen_reg_rtx (DImode);
14585 if (MEM_P (operands[4]))
14586 {
14587 operands[1] = force_reg (DImode, operands[1]);
14588 emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[4], operands[1],
14589 operands[2], operands[3]));
14590 }
14591 else if (MEM_P (operands[1]))
14592 emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[1], operands[4],
14593 operands[3], operands[2]));
14594 else
14595 emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[4], operands[1],
14596 operands[2], operands[3]));
14597 emit_move_insn (operands[0], tmp);
14598 }
14599 else
14600 {
14601 operands[1] = force_reg (DImode, operands[1]);
14602 rtx tmp = gen_reg_rtx (DImode);
14603 emit_move_insn (tmp, operands[4]);
14604 emit_insn (gen_x86_64_shld_1 (tmp, operands[1], operands[2], operands[3]));
14605 emit_move_insn (operands[0], tmp);
14606 }
14607 DONE;
14608 })
14609
14610 (define_insn_and_split "*x86_64_shld_2"
14611 [(set (match_operand:DI 0 "nonimmediate_operand")
14612 (ior:DI (ashift:DI (match_dup 0)
14613 (match_operand:QI 2 "nonmemory_operand"))
14614 (lshiftrt:DI (match_operand:DI 1 "register_operand")
14615 (minus:QI (const_int 64) (match_dup 2)))))
14616 (clobber (reg:CC FLAGS_REG))]
14617 "TARGET_64BIT && ix86_pre_reload_split ()"
14618 "#"
14619 "&& 1"
14620 [(parallel [(set (match_dup 0)
14621 (ior:DI (ashift:DI (match_dup 0)
14622 (and:QI (match_dup 2) (const_int 63)))
14623 (subreg:DI
14624 (lshiftrt:TI
14625 (zero_extend:TI (match_dup 1))
14626 (minus:QI (const_int 64)
14627 (and:QI (match_dup 2)
14628 (const_int 63)))) 0)))
14629 (clobber (reg:CC FLAGS_REG))])])
14630
14631 (define_insn_and_split "*x86_64_shld_ndd_2"
14632 [(set (match_operand:DI 0 "nonimmediate_operand")
14633 (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand")
14634 (match_operand:QI 3 "nonmemory_operand"))
14635 (lshiftrt:DI (match_operand:DI 2 "register_operand")
14636 (minus:QI (const_int 64) (match_dup 3)))))
14637 (clobber (reg:CC FLAGS_REG))]
14638 "TARGET_APX_NDD
14639 && ix86_pre_reload_split ()"
14640 "#"
14641 "&& 1"
14642 [(parallel [(set (match_dup 4)
14643 (ior:DI (ashift:DI (match_dup 1)
14644 (and:QI (match_dup 3) (const_int 63)))
14645 (subreg:DI
14646 (lshiftrt:TI
14647 (zero_extend:TI (match_dup 2))
14648 (minus:QI (const_int 64)
14649 (and:QI (match_dup 3)
14650 (const_int 63)))) 0)))
14651 (clobber (reg:CC FLAGS_REG))
14652 (set (match_dup 0) (match_dup 4))])]
14653 {
14654 operands[4] = gen_reg_rtx (DImode);
14655 emit_move_insn (operands[4], operands[0]);
14656 })
14657
14658 (define_insn "x86_shld"
14659 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
14660 (ior:SI (ashift:SI (match_dup 0)
14661 (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
14662 (const_int 31)))
14663 (subreg:SI
14664 (lshiftrt:DI
14665 (zero_extend:DI
14666 (match_operand:SI 1 "register_operand" "r"))
14667 (minus:QI (const_int 32)
14668 (and:QI (match_dup 2) (const_int 31)))) 0)))
14669 (clobber (reg:CC FLAGS_REG))]
14670 ""
14671 "shld{l}\t{%s2%1, %0|%0, %1, %2}"
14672 [(set_attr "type" "ishift")
14673 (set_attr "prefix_0f" "1")
14674 (set_attr "mode" "SI")
14675 (set_attr "pent_pair" "np")
14676 (set_attr "athlon_decode" "vector")
14677 (set_attr "amdfam10_decode" "vector")
14678 (set_attr "bdver1_decode" "vector")])
14679
14680 (define_insn "x86_shld_ndd"
14681 [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
14682 (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
14683 (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
14684 (const_int 31)))
14685 (subreg:SI
14686 (lshiftrt:DI
14687 (zero_extend:DI
14688 (match_operand:SI 2 "register_operand" "r"))
14689 (minus:QI (const_int 32)
14690 (and:QI (match_dup 3) (const_int 31)))) 0)))
14691 (clobber (reg:CC FLAGS_REG))]
14692 "TARGET_APX_NDD"
14693 "shld{l}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
14694 [(set_attr "type" "ishift")
14695 (set_attr "mode" "SI")])
14696
14697
14698 (define_insn "x86_shld_1"
14699 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
14700 (ior:SI (ashift:SI (match_dup 0)
14701 (match_operand:QI 2 "const_0_to_31_operand"))
14702 (subreg:SI
14703 (lshiftrt:DI
14704 (zero_extend:DI
14705 (match_operand:SI 1 "register_operand" "r"))
14706 (match_operand:QI 3 "const_0_to_63_operand")) 0)))
14707 (clobber (reg:CC FLAGS_REG))]
14708 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
14709 "shld{l}\t{%2, %1, %0|%0, %1, %2}"
14710 [(set_attr "type" "ishift")
14711 (set_attr "prefix_0f" "1")
14712 (set_attr "length_immediate" "1")
14713 (set_attr "mode" "SI")
14714 (set_attr "pent_pair" "np")
14715 (set_attr "athlon_decode" "vector")
14716 (set_attr "amdfam10_decode" "vector")
14717 (set_attr "bdver1_decode" "vector")])
14718
14719 (define_insn "x86_shld_ndd_1"
14720 [(set (match_operand:SI 0 "register_operand" "=r")
14721 (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
14722 (match_operand:QI 3 "const_0_to_31_operand"))
14723 (subreg:SI
14724 (lshiftrt:DI
14725 (zero_extend:DI
14726 (match_operand:SI 2 "register_operand" "r"))
14727 (match_operand:QI 4 "const_0_to_63_operand")) 0)))
14728 (clobber (reg:CC FLAGS_REG))]
14729 "TARGET_APX_NDD
14730 && INTVAL (operands[4]) == 32 - INTVAL (operands[3])"
14731 "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14732 [(set_attr "type" "ishift")
14733 (set_attr "length_immediate" "1")
14734 (set_attr "mode" "SI")])
14735
14736
14737 (define_insn_and_split "*x86_shld_shrd_1_nozext"
14738 [(set (match_operand:SI 0 "nonimmediate_operand")
14739 (ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand")
14740 (match_operand:QI 2 "const_0_to_31_operand"))
14741 (lshiftrt:SI
14742 (match_operand:SI 1 "nonimmediate_operand")
14743 (match_operand:QI 3 "const_0_to_31_operand"))))
14744 (clobber (reg:CC FLAGS_REG))]
14745 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
14746 && ix86_pre_reload_split ()"
14747 "#"
14748 "&& 1"
14749 [(const_int 0)]
14750 {
14751 if (rtx_equal_p (operands[4], operands[0]))
14752 {
14753 operands[1] = force_reg (SImode, operands[1]);
14754 emit_insn (gen_x86_shld_1 (operands[0], operands[1], operands[2], operands[3]));
14755 }
14756 else if (rtx_equal_p (operands[1], operands[0]))
14757 {
14758 operands[4] = force_reg (SImode, operands[4]);
14759 emit_insn (gen_x86_shrd_1 (operands[0], operands[4], operands[3], operands[2]));
14760 }
14761 else if (TARGET_APX_NDD)
14762 {
14763 rtx tmp = gen_reg_rtx (SImode);
14764 if (MEM_P (operands[4]))
14765 {
14766 operands[1] = force_reg (SImode, operands[1]);
14767 emit_insn (gen_x86_shld_ndd_1 (tmp, operands[4], operands[1],
14768 operands[2], operands[3]));
14769 }
14770 else if (MEM_P (operands[1]))
14771 emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[1], operands[4],
14772 operands[3], operands[2]));
14773 else
14774 emit_insn (gen_x86_shld_ndd_1 (tmp, operands[4], operands[1],
14775 operands[2], operands[3]));
14776 emit_move_insn (operands[0], tmp);
14777 }
14778 else
14779 {
14780 operands[1] = force_reg (SImode, operands[1]);
14781 rtx tmp = gen_reg_rtx (SImode);
14782 emit_move_insn (tmp, operands[4]);
14783 emit_insn (gen_x86_shld_1 (tmp, operands[1], operands[2], operands[3]));
14784 emit_move_insn (operands[0], tmp);
14785 }
14786 DONE;
14787 })
14788
14789 (define_insn_and_split "*x86_shld_2"
14790 [(set (match_operand:SI 0 "nonimmediate_operand")
14791 (ior:SI (ashift:SI (match_dup 0)
14792 (match_operand:QI 2 "nonmemory_operand"))
14793 (lshiftrt:SI (match_operand:SI 1 "register_operand")
14794 (minus:QI (const_int 32) (match_dup 2)))))
14795 (clobber (reg:CC FLAGS_REG))]
14796 "TARGET_64BIT && ix86_pre_reload_split ()"
14797 "#"
14798 "&& 1"
14799 [(parallel [(set (match_dup 0)
14800 (ior:SI (ashift:SI (match_dup 0)
14801 (and:QI (match_dup 2) (const_int 31)))
14802 (subreg:SI
14803 (lshiftrt:DI
14804 (zero_extend:DI (match_dup 1))
14805 (minus:QI (const_int 32)
14806 (and:QI (match_dup 2)
14807 (const_int 31)))) 0)))
14808 (clobber (reg:CC FLAGS_REG))])])
14809
14810 (define_insn_and_split "*x86_shld_ndd_2"
14811 [(set (match_operand:SI 0 "nonimmediate_operand")
14812 (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand")
14813 (match_operand:QI 3 "nonmemory_operand"))
14814 (lshiftrt:SI (match_operand:SI 2 "register_operand")
14815 (minus:QI (const_int 32) (match_dup 3)))))
14816 (clobber (reg:CC FLAGS_REG))]
14817 "TARGET_APX_NDD
14818 && ix86_pre_reload_split ()"
14819 "#"
14820 "&& 1"
14821 [(parallel [(set (match_dup 4)
14822 (ior:SI (ashift:SI (match_dup 1)
14823 (and:QI (match_dup 3) (const_int 31)))
14824 (subreg:SI
14825 (lshiftrt:DI
14826 (zero_extend:DI (match_dup 2))
14827 (minus:QI (const_int 32)
14828 (and:QI (match_dup 3)
14829 (const_int 31)))) 0)))
14830 (clobber (reg:CC FLAGS_REG))
14831 (set (match_dup 0) (match_dup 4))])]
14832 {
14833 operands[4] = gen_reg_rtx (SImode);
14834 emit_move_insn (operands[4], operands[0]);
14835 })
14836
14837 (define_expand "@x86_shift<mode>_adj_1"
14838 [(set (reg:CCZ FLAGS_REG)
14839 (compare:CCZ (and:QI (match_operand:QI 2 "register_operand")
14840 (match_dup 4))
14841 (const_int 0)))
14842 (set (match_operand:SWI48 0 "register_operand")
14843 (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
14844 (match_operand:SWI48 1 "register_operand")
14845 (match_dup 0)))
14846 (set (match_dup 1)
14847 (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
14848 (match_operand:SWI48 3 "register_operand")
14849 (match_dup 1)))]
14850 "TARGET_CMOVE"
14851 "operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));")
14852
14853 (define_expand "@x86_shift<mode>_adj_2"
14854 [(use (match_operand:SWI48 0 "register_operand"))
14855 (use (match_operand:SWI48 1 "register_operand"))
14856 (use (match_operand:QI 2 "register_operand"))]
14857 ""
14858 {
14859 rtx_code_label *label = gen_label_rtx ();
14860 rtx tmp;
14861
14862 emit_insn (gen_testqi_ccz_1 (operands[2],
14863 GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));
14864
14865 tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
14866 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14867 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14868 gen_rtx_LABEL_REF (VOIDmode, label),
14869 pc_rtx);
14870 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
14871 JUMP_LABEL (tmp) = label;
14872
14873 emit_move_insn (operands[0], operands[1]);
14874 ix86_expand_clear (operands[1]);
14875
14876 emit_label (label);
14877 LABEL_NUSES (label) = 1;
14878
14879 DONE;
14880 })
14881
14882 ;; Avoid useless masking of count operand.
14883 (define_insn_and_split "*ashl<mode>3_mask"
14884 [(set (match_operand:SWI48 0 "nonimmediate_operand")
14885 (ashift:SWI48
14886 (match_operand:SWI48 1 "nonimmediate_operand")
14887 (subreg:QI
14888 (and
14889 (match_operand 2 "int248_register_operand" "c,r")
14890 (match_operand 3 "const_int_operand")) 0)))
14891 (clobber (reg:CC FLAGS_REG))]
14892 "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
14893 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
14894 == GET_MODE_BITSIZE (<MODE>mode)-1
14895 && ix86_pre_reload_split ()"
14896 "#"
14897 "&& 1"
14898 [(parallel
14899 [(set (match_dup 0)
14900 (ashift:SWI48 (match_dup 1)
14901 (match_dup 2)))
14902 (clobber (reg:CC FLAGS_REG))])]
14903 {
14904 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
14905 operands[2] = gen_lowpart (QImode, operands[2]);
14906 }
14907 [(set_attr "isa" "*,bmi2")])
14908
14909 (define_insn_and_split "*ashl<mode>3_mask_1"
14910 [(set (match_operand:SWI48 0 "nonimmediate_operand")
14911 (ashift:SWI48
14912 (match_operand:SWI48 1 "nonimmediate_operand")
14913 (and:QI
14914 (match_operand:QI 2 "register_operand" "c,r")
14915 (match_operand:QI 3 "const_int_operand"))))
14916 (clobber (reg:CC FLAGS_REG))]
14917 "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
14918 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
14919 == GET_MODE_BITSIZE (<MODE>mode)-1
14920 && ix86_pre_reload_split ()"
14921 "#"
14922 "&& 1"
14923 [(parallel
14924 [(set (match_dup 0)
14925 (ashift:SWI48 (match_dup 1)
14926 (match_dup 2)))
14927 (clobber (reg:CC FLAGS_REG))])]
14928 ""
14929 [(set_attr "isa" "*,bmi2")])
14930
14931 (define_insn "*bmi2_ashl<mode>3_1"
14932 [(set (match_operand:SWI48 0 "register_operand" "=r")
14933 (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
14934 (match_operand:SWI48 2 "register_operand" "r")))]
14935 "TARGET_BMI2"
14936 "shlx\t{%2, %1, %0|%0, %1, %2}"
14937 [(set_attr "type" "ishiftx")
14938 (set_attr "mode" "<MODE>")])
14939
14940 (define_insn "*ashl<mode>3_1"
14941 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r")
14942 (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm")
14943 (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>,c<S>")))
14944 (clobber (reg:CC FLAGS_REG))]
14945 "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD)"
14946 {
14947 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
14948 switch (get_attr_type (insn))
14949 {
14950 case TYPE_LEA:
14951 case TYPE_ISHIFTX:
14952 case TYPE_MSKLOG:
14953 return "#";
14954
14955 case TYPE_ALU:
14956 gcc_assert (operands[2] == const1_rtx);
14957 gcc_assert (rtx_equal_p (operands[0], operands[1]));
14958 return "add{<imodesuffix>}\t%0, %0";
14959
14960 default:
14961 if (operands[2] == const1_rtx
14962 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
14963 /* For NDD form instructions related to TARGET_SHIFT1, the $1
14964 immediate do not need to be omitted as assembler will map it
14965 to use shorter encoding. */
14966 && !use_ndd)
14967 return "sal{<imodesuffix>}\t%0";
14968 else
14969 return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
14970 : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
14971 }
14972 }
14973 [(set_attr "isa" "*,*,bmi2,avx512bw,apx_ndd")
14974 (set (attr "type")
14975 (cond [(eq_attr "alternative" "1")
14976 (const_string "lea")
14977 (eq_attr "alternative" "2")
14978 (const_string "ishiftx")
14979 (eq_attr "alternative" "4")
14980 (const_string "ishift")
14981 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
14982 (match_operand 0 "register_operand"))
14983 (match_operand 2 "const1_operand"))
14984 (const_string "alu")
14985 (eq_attr "alternative" "3")
14986 (const_string "msklog")
14987 ]
14988 (const_string "ishift")))
14989 (set (attr "length_immediate")
14990 (if_then_else
14991 (ior (eq_attr "type" "alu")
14992 (and (eq_attr "type" "ishift")
14993 (and (match_operand 2 "const1_operand")
14994 (ior (match_test "TARGET_SHIFT1")
14995 (match_test "optimize_function_for_size_p (cfun)")))))
14996 (const_string "0")
14997 (const_string "*")))
14998 (set_attr "mode" "<MODE>")])
14999
15000 ;; Convert shift to the shiftx pattern to avoid flags dependency.
15001 (define_split
15002 [(set (match_operand:SWI48 0 "register_operand")
15003 (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
15004 (match_operand:QI 2 "register_operand")))
15005 (clobber (reg:CC FLAGS_REG))]
15006 "TARGET_BMI2 && reload_completed"
15007 [(set (match_dup 0)
15008 (ashift:SWI48 (match_dup 1) (match_dup 2)))]
15009 "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
15010
15011 (define_insn "*bmi2_ashlsi3_1_zext"
15012 [(set (match_operand:DI 0 "register_operand" "=r")
15013 (zero_extend:DI
15014 (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
15015 (match_operand:SI 2 "register_operand" "r"))))]
15016 "TARGET_64BIT && TARGET_BMI2"
15017 "shlx\t{%2, %1, %k0|%k0, %1, %2}"
15018 [(set_attr "type" "ishiftx")
15019 (set_attr "mode" "SI")])
15020
15021 (define_insn "*ashlsi3_1_zext"
15022 [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
15023 (zero_extend:DI
15024 (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm,rm")
15025 (match_operand:QI 2 "nonmemory_operand" "cI,M,r,cI"))))
15026 (clobber (reg:CC FLAGS_REG))]
15027 "TARGET_64BIT
15028 && ix86_binary_operator_ok (ASHIFT, SImode, operands, TARGET_APX_NDD)"
15029 {
15030 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
15031 switch (get_attr_type (insn))
15032 {
15033 case TYPE_LEA:
15034 case TYPE_ISHIFTX:
15035 return "#";
15036
15037 case TYPE_ALU:
15038 gcc_assert (operands[2] == const1_rtx);
15039 return "add{l}\t%k0, %k0";
15040
15041 default:
15042 if (operands[2] == const1_rtx
15043 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
15044 && !use_ndd)
15045 return "sal{l}\t%k0";
15046 else
15047 return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}"
15048 : "sal{l}\t{%2, %k0|%k0, %2}";
15049 }
15050 }
15051 [(set_attr "isa" "*,*,bmi2,apx_ndd")
15052 (set (attr "type")
15053 (cond [(eq_attr "alternative" "1")
15054 (const_string "lea")
15055 (eq_attr "alternative" "2")
15056 (const_string "ishiftx")
15057 (eq_attr "alternative" "3")
15058 (const_string "ishift")
15059 (and (match_test "TARGET_DOUBLE_WITH_ADD")
15060 (match_operand 2 "const1_operand"))
15061 (const_string "alu")
15062 ]
15063 (const_string "ishift")))
15064 (set (attr "length_immediate")
15065 (if_then_else
15066 (ior (eq_attr "type" "alu")
15067 (and (eq_attr "type" "ishift")
15068 (and (match_operand 2 "const1_operand")
15069 (ior (match_test "TARGET_SHIFT1")
15070 (match_test "optimize_function_for_size_p (cfun)")))))
15071 (const_string "0")
15072 (const_string "*")))
15073 (set_attr "mode" "SI")])
15074
15075 ;; Convert shift to the shiftx pattern to avoid flags dependency.
15076 (define_split
15077 [(set (match_operand:DI 0 "register_operand")
15078 (zero_extend:DI
15079 (ashift:SI (match_operand:SI 1 "nonimmediate_operand")
15080 (match_operand:QI 2 "register_operand"))))
15081 (clobber (reg:CC FLAGS_REG))]
15082 "TARGET_64BIT && TARGET_BMI2 && reload_completed"
15083 [(set (match_dup 0)
15084 (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
15085 "operands[2] = gen_lowpart (SImode, operands[2]);")
15086
15087 (define_insn "*ashlhi3_1"
15088 [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r")
15089 (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm")
15090 (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))
15091 (clobber (reg:CC FLAGS_REG))]
15092 "ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD)"
15093 {
15094 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
15095 switch (get_attr_type (insn))
15096 {
15097 case TYPE_LEA:
15098 case TYPE_MSKLOG:
15099 return "#";
15100
15101 case TYPE_ALU:
15102 gcc_assert (operands[2] == const1_rtx);
15103 return "add{w}\t%0, %0";
15104
15105 default:
15106 if (operands[2] == const1_rtx
15107 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
15108 && !use_ndd)
15109 return "sal{w}\t%0";
15110 else
15111 return use_ndd ? "sal{w}\t{%2, %1, %0|%0, %1, %2}"
15112 : "sal{w}\t{%2, %0|%0, %2}";
15113 }
15114 }
15115 [(set_attr "isa" "*,*,avx512f,apx_ndd")
15116 (set (attr "type")
15117 (cond [(eq_attr "alternative" "1")
15118 (const_string "lea")
15119 (eq_attr "alternative" "2")
15120 (const_string "msklog")
15121 (eq_attr "alternative" "3")
15122 (const_string "ishift")
15123 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
15124 (match_operand 0 "register_operand"))
15125 (match_operand 2 "const1_operand"))
15126 (const_string "alu")
15127 ]
15128 (const_string "ishift")))
15129 (set (attr "length_immediate")
15130 (if_then_else
15131 (ior (eq_attr "type" "alu")
15132 (and (eq_attr "type" "ishift")
15133 (and (match_operand 2 "const1_operand")
15134 (ior (match_test "TARGET_SHIFT1")
15135 (match_test "optimize_function_for_size_p (cfun)")))))
15136 (const_string "0")
15137 (const_string "*")))
15138 (set_attr "mode" "HI,SI,HI,HI")])
15139
15140 (define_insn "*ashlqi3_1"
15141 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r")
15142 (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm")
15143 (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI")))
15144 (clobber (reg:CC FLAGS_REG))]
15145 "ix86_binary_operator_ok (ASHIFT, QImode, operands, TARGET_APX_NDD)"
15146 {
15147 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
15148 switch (get_attr_type (insn))
15149 {
15150 case TYPE_LEA:
15151 case TYPE_MSKLOG:
15152 return "#";
15153
15154 case TYPE_ALU:
15155 gcc_assert (operands[2] == const1_rtx);
15156 if (REG_P (operands[1]) && !ANY_QI_REGNO_P (REGNO (operands[1])))
15157 return "add{l}\t%k0, %k0";
15158 else
15159 return "add{b}\t%0, %0";
15160
15161 default:
15162 if (operands[2] == const1_rtx
15163 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
15164 && !use_ndd)
15165 {
15166 if (get_attr_mode (insn) == MODE_SI)
15167 return "sal{l}\t%k0";
15168 else
15169 return "sal{b}\t%0";
15170 }
15171 else
15172 {
15173 if (get_attr_mode (insn) == MODE_SI)
15174 return "sal{l}\t{%2, %k0|%k0, %2}";
15175 else
15176 return use_ndd ? "sal{b}\t{%2, %1, %0|%0, %1, %2}"
15177 : "sal{b}\t{%2, %0|%0, %2}";
15178 }
15179 }
15180 }
15181 [(set_attr "isa" "*,*,*,avx512dq,apx_ndd")
15182 (set (attr "type")
15183 (cond [(eq_attr "alternative" "2")
15184 (const_string "lea")
15185 (eq_attr "alternative" "3")
15186 (const_string "msklog")
15187 (eq_attr "alternative" "4")
15188 (const_string "ishift")
15189 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
15190 (match_operand 0 "register_operand"))
15191 (match_operand 2 "const1_operand"))
15192 (const_string "alu")
15193 ]
15194 (const_string "ishift")))
15195 (set (attr "length_immediate")
15196 (if_then_else
15197 (ior (eq_attr "type" "alu")
15198 (and (eq_attr "type" "ishift")
15199 (and (match_operand 2 "const1_operand")
15200 (ior (match_test "TARGET_SHIFT1")
15201 (match_test "optimize_function_for_size_p (cfun)")))))
15202 (const_string "0")
15203 (const_string "*")))
15204 (set_attr "mode" "QI,SI,SI,QI,QI")
15205 ;; Potential partial reg stall on alternative 1.
15206 (set (attr "preferred_for_speed")
15207 (cond [(eq_attr "alternative" "1,4")
15208 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
15209 (symbol_ref "true")))])
15210
15211 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
15212 (define_insn_and_split "*ashl<mode>3_1_slp"
15213 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
15214 (ashift:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
15215 (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
15216 (clobber (reg:CC FLAGS_REG))]
15217 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
15218 {
15219 if (which_alternative)
15220 return "#";
15221
15222 switch (get_attr_type (insn))
15223 {
15224 case TYPE_ALU:
15225 gcc_assert (operands[2] == const1_rtx);
15226 return "add{<imodesuffix>}\t%0, %0";
15227
15228 default:
15229 if (operands[2] == const1_rtx
15230 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
15231 return "sal{<imodesuffix>}\t%0";
15232 else
15233 return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
15234 }
15235 }
15236 "&& reload_completed
15237 && !(rtx_equal_p (operands[0], operands[1]))"
15238 [(set (strict_low_part (match_dup 0)) (match_dup 1))
15239 (parallel
15240 [(set (strict_low_part (match_dup 0))
15241 (ashift:SWI12 (match_dup 0) (match_dup 2)))
15242 (clobber (reg:CC FLAGS_REG))])]
15243 ""
15244 [(set (attr "type")
15245 (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
15246 (match_operand 2 "const1_operand"))
15247 (const_string "alu")
15248 ]
15249 (const_string "ishift")))
15250 (set (attr "length_immediate")
15251 (if_then_else
15252 (ior (eq_attr "type" "alu")
15253 (and (eq_attr "type" "ishift")
15254 (and (match_operand 2 "const1_operand")
15255 (ior (match_test "TARGET_SHIFT1")
15256 (match_test "optimize_function_for_size_p (cfun)")))))
15257 (const_string "0")
15258 (const_string "*")))
15259 (set_attr "mode" "<MODE>")])
15260
15261 ;; Convert ashift to the lea pattern to avoid flags dependency.
15262 (define_split
15263 [(set (match_operand:SWI 0 "general_reg_operand")
15264 (ashift:SWI (match_operand:SWI 1 "index_reg_operand")
15265 (match_operand 2 "const_0_to_3_operand")))
15266 (clobber (reg:CC FLAGS_REG))]
15267 "reload_completed
15268 && REGNO (operands[0]) != REGNO (operands[1])"
15269 [(set (match_dup 0)
15270 (mult:<LEAMODE> (match_dup 1) (match_dup 2)))]
15271 {
15272 if (<MODE>mode != <LEAMODE>mode)
15273 {
15274 operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
15275 operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
15276 }
15277 operands[2] = GEN_INT (1 << INTVAL (operands[2]));
15278 })
15279
15280 ;; Convert ashift to the lea pattern to avoid flags dependency.
15281 (define_split
15282 [(set (match_operand:DI 0 "general_reg_operand")
15283 (zero_extend:DI
15284 (ashift:SI (match_operand:SI 1 "index_reg_operand")
15285 (match_operand 2 "const_0_to_3_operand"))))
15286 (clobber (reg:CC FLAGS_REG))]
15287 "TARGET_64BIT && reload_completed
15288 && REGNO (operands[0]) != REGNO (operands[1])"
15289 [(set (match_dup 0)
15290 (zero_extend:DI (mult:SI (match_dup 1) (match_dup 2))))]
15291 {
15292 operands[1] = gen_lowpart (SImode, operands[1]);
15293 operands[2] = GEN_INT (1 << INTVAL (operands[2]));
15294 })
15295
15296 ;; This pattern can't accept a variable shift count, since shifts by
15297 ;; zero don't affect the flags. We assume that shifts by constant
15298 ;; zero are optimized away.
15299 (define_insn "*ashl<mode>3_cmp"
15300 [(set (reg FLAGS_REG)
15301 (compare
15302 (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
15303 (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
15304 (const_int 0)))
15305 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
15306 (ashift:SWI (match_dup 1) (match_dup 2)))]
15307 "(optimize_function_for_size_p (cfun)
15308 || !TARGET_PARTIAL_FLAG_REG_STALL
15309 || (operands[2] == const1_rtx
15310 && (TARGET_SHIFT1
15311 || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
15312 && ix86_match_ccmode (insn, CCGOCmode)
15313 && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD)"
15314 {
15315 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
15316 switch (get_attr_type (insn))
15317 {
15318 case TYPE_ALU:
15319 gcc_assert (operands[2] == const1_rtx);
15320 return "add{<imodesuffix>}\t%0, %0";
15321
15322 default:
15323 if (operands[2] == const1_rtx
15324 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
15325 && !use_ndd)
15326 return "sal{<imodesuffix>}\t%0";
15327 else
15328 return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
15329 : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
15330 }
15331 }
15332 [(set_attr "isa" "*,apx_ndd")
15333 (set (attr "type")
15334 (cond [(eq_attr "alternative" "1")
15335 (const_string "ishift")
15336 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
15337 (match_operand 0 "register_operand"))
15338 (match_operand 2 "const1_operand"))
15339 (const_string "alu")
15340 ]
15341 (const_string "ishift")))
15342 (set (attr "length_immediate")
15343 (if_then_else
15344 (ior (eq_attr "type" "alu")
15345 (and (eq_attr "type" "ishift")
15346 (and (match_operand 2 "const1_operand")
15347 (ior (match_test "TARGET_SHIFT1")
15348 (match_test "optimize_function_for_size_p (cfun)")))))
15349 (const_string "0")
15350 (const_string "*")))
15351 (set_attr "mode" "<MODE>")])
15352
15353 (define_insn "*ashlsi3_cmp_zext"
15354 [(set (reg FLAGS_REG)
15355 (compare
15356 (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
15357 (match_operand:QI 2 "const_1_to_31_operand"))
15358 (const_int 0)))
15359 (set (match_operand:DI 0 "register_operand" "=r,r")
15360 (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
15361 "TARGET_64BIT
15362 && (optimize_function_for_size_p (cfun)
15363 || !TARGET_PARTIAL_FLAG_REG_STALL
15364 || (operands[2] == const1_rtx
15365 && (TARGET_SHIFT1
15366 || TARGET_DOUBLE_WITH_ADD)))
15367 && ix86_match_ccmode (insn, CCGOCmode)
15368 && ix86_binary_operator_ok (ASHIFT, SImode, operands, TARGET_APX_NDD)"
15369 {
15370 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
15371 switch (get_attr_type (insn))
15372 {
15373 case TYPE_ALU:
15374 gcc_assert (operands[2] == const1_rtx);
15375 return "add{l}\t%k0, %k0";
15376
15377 default:
15378 if (operands[2] == const1_rtx
15379 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
15380 && !use_ndd)
15381 return "sal{l}\t%k0";
15382 else
15383 return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}"
15384 : "sal{l}\t{%2, %k0|%k0, %2}";
15385 }
15386 }
15387 [(set_attr "isa" "*,apx_ndd")
15388 (set (attr "type")
15389 (cond [(eq_attr "alternative" "1")
15390 (const_string "ishift")
15391 (and (match_test "TARGET_DOUBLE_WITH_ADD")
15392 (match_operand 2 "const1_operand"))
15393 (const_string "alu")
15394 ]
15395 (const_string "ishift")))
15396 (set (attr "length_immediate")
15397 (if_then_else
15398 (ior (eq_attr "type" "alu")
15399 (and (eq_attr "type" "ishift")
15400 (and (match_operand 2 "const1_operand")
15401 (ior (match_test "TARGET_SHIFT1")
15402 (match_test "optimize_function_for_size_p (cfun)")))))
15403 (const_string "0")
15404 (const_string "*")))
15405 (set_attr "mode" "SI")])
15406
15407 (define_insn "*ashl<mode>3_cconly"
15408 [(set (reg FLAGS_REG)
15409 (compare
15410 (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
15411 (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
15412 (const_int 0)))
15413 (clobber (match_scratch:SWI 0 "=<r>,r"))]
15414 "(optimize_function_for_size_p (cfun)
15415 || !TARGET_PARTIAL_FLAG_REG_STALL
15416 || (operands[2] == const1_rtx
15417 && (TARGET_SHIFT1
15418 || TARGET_DOUBLE_WITH_ADD)))
15419 && ix86_match_ccmode (insn, CCGOCmode)"
15420 {
15421 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
15422 switch (get_attr_type (insn))
15423 {
15424 case TYPE_ALU:
15425 gcc_assert (operands[2] == const1_rtx);
15426 return "add{<imodesuffix>}\t%0, %0";
15427
15428 default:
15429 if (operands[2] == const1_rtx
15430 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
15431 && !use_ndd)
15432 return "sal{<imodesuffix>}\t%0";
15433 else
15434 return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
15435 : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
15436 }
15437 }
15438 [(set_attr "isa" "*,apx_ndd")
15439 (set (attr "type")
15440 (cond [(eq_attr "alternative" "1")
15441 (const_string "ishift")
15442 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
15443 (match_operand 0 "register_operand"))
15444 (match_operand 2 "const1_operand"))
15445 (const_string "alu")
15446 ]
15447 (const_string "ishift")))
15448 (set (attr "length_immediate")
15449 (if_then_else
15450 (ior (eq_attr "type" "alu")
15451 (and (eq_attr "type" "ishift")
15452 (and (match_operand 2 "const1_operand")
15453 (ior (match_test "TARGET_SHIFT1")
15454 (match_test "optimize_function_for_size_p (cfun)")))))
15455 (const_string "0")
15456 (const_string "*")))
15457 (set_attr "mode" "<MODE>")])
15458
15459 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
15460 (define_insn_and_split "*ashlqi_ext<mode>_1"
15461 [(set (zero_extract:SWI248
15462 (match_operand 0 "int248_register_operand" "+Q,&Q")
15463 (const_int 8)
15464 (const_int 8))
15465 (subreg:SWI248
15466 (ashift:QI
15467 (subreg:QI
15468 (match_operator:SWI248 3 "extract_operator"
15469 [(match_operand 1 "int248_register_operand" "0,!Q")
15470 (const_int 8)
15471 (const_int 8)]) 0)
15472 (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0))
15473 (clobber (reg:CC FLAGS_REG))]
15474 ""
15475 {
15476 if (which_alternative)
15477 return "#";
15478
15479 switch (get_attr_type (insn))
15480 {
15481 case TYPE_ALU:
15482 gcc_assert (operands[2] == const1_rtx);
15483 return "add{b}\t%h0, %h0";
15484
15485 default:
15486 if (operands[2] == const1_rtx
15487 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
15488 return "sal{b}\t%h0";
15489 else
15490 return "sal{b}\t{%2, %h0|%h0, %2}";
15491 }
15492 }
15493 "reload_completed
15494 && !(rtx_equal_p (operands[0], operands[1]))"
15495 [(set (zero_extract:SWI248
15496 (match_dup 0) (const_int 8) (const_int 8))
15497 (zero_extract:SWI248
15498 (match_dup 1) (const_int 8) (const_int 8)))
15499 (parallel
15500 [(set (zero_extract:SWI248
15501 (match_dup 0) (const_int 8) (const_int 8))
15502 (subreg:SWI248
15503 (ashift:QI
15504 (subreg:QI
15505 (match_op_dup 3
15506 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
15507 (match_dup 2)) 0))
15508 (clobber (reg:CC FLAGS_REG))])]
15509 ""
15510 [(set (attr "type")
15511 (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
15512 (match_operand 2 "const1_operand"))
15513 (const_string "alu")
15514 ]
15515 (const_string "ishift")))
15516 (set (attr "length_immediate")
15517 (if_then_else
15518 (ior (eq_attr "type" "alu")
15519 (and (eq_attr "type" "ishift")
15520 (and (match_operand 2 "const1_operand")
15521 (ior (match_test "TARGET_SHIFT1")
15522 (match_test "optimize_function_for_size_p (cfun)")))))
15523 (const_string "0")
15524 (const_string "*")))
15525 (set_attr "mode" "QI")])
15526
15527 ;; See comment above `ashl<mode>3' about how this works.
15528
15529 (define_expand "<insn><mode>3"
15530 [(set (match_operand:SDWIM 0 "<shift_operand>")
15531 (any_shiftrt:SDWIM (match_operand:SDWIM 1 "<shift_operand>")
15532 (match_operand:QI 2 "nonmemory_operand")))]
15533 ""
15534 {
15535 ix86_expand_binary_operator (<CODE>, <MODE>mode, operands, TARGET_APX_NDD);
15536 DONE;
15537 })
15538
15539 ;; Avoid useless masking of count operand.
15540 (define_insn_and_split "*<insn><mode>3_mask"
15541 [(set (match_operand:SWI48 0 "nonimmediate_operand")
15542 (any_shiftrt:SWI48
15543 (match_operand:SWI48 1 "nonimmediate_operand")
15544 (subreg:QI
15545 (and
15546 (match_operand 2 "int248_register_operand" "c,r")
15547 (match_operand 3 "const_int_operand")) 0)))
15548 (clobber (reg:CC FLAGS_REG))]
15549 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
15550 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
15551 == GET_MODE_BITSIZE (<MODE>mode)-1
15552 && ix86_pre_reload_split ()"
15553 "#"
15554 "&& 1"
15555 [(parallel
15556 [(set (match_dup 0)
15557 (any_shiftrt:SWI48 (match_dup 1)
15558 (match_dup 2)))
15559 (clobber (reg:CC FLAGS_REG))])]
15560 {
15561 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
15562 operands[2] = gen_lowpart (QImode, operands[2]);
15563 }
15564 [(set_attr "isa" "*,bmi2")])
15565
15566 (define_insn_and_split "*<insn><mode>3_mask_1"
15567 [(set (match_operand:SWI48 0 "nonimmediate_operand")
15568 (any_shiftrt:SWI48
15569 (match_operand:SWI48 1 "nonimmediate_operand")
15570 (and:QI
15571 (match_operand:QI 2 "register_operand" "c,r")
15572 (match_operand:QI 3 "const_int_operand"))))
15573 (clobber (reg:CC FLAGS_REG))]
15574 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
15575 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
15576 == GET_MODE_BITSIZE (<MODE>mode)-1
15577 && ix86_pre_reload_split ()"
15578 "#"
15579 "&& 1"
15580 [(parallel
15581 [(set (match_dup 0)
15582 (any_shiftrt:SWI48 (match_dup 1)
15583 (match_dup 2)))
15584 (clobber (reg:CC FLAGS_REG))])]
15585 ""
15586 [(set_attr "isa" "*,bmi2")])
15587
15588 (define_insn_and_split "*<insn><dwi>3_doubleword_mask"
15589 [(set (match_operand:<DWI> 0 "register_operand")
15590 (any_shiftrt:<DWI>
15591 (match_operand:<DWI> 1 "register_operand")
15592 (subreg:QI
15593 (and
15594 (match_operand 2 "int248_register_operand" "c")
15595 (match_operand 3 "const_int_operand")) 0)))
15596 (clobber (reg:CC FLAGS_REG))]
15597 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
15598 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
15599 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
15600 && ix86_pre_reload_split ()"
15601 "#"
15602 "&& 1"
15603 [(parallel
15604 [(set (match_dup 4)
15605 (ior:DWIH (lshiftrt:DWIH (match_dup 4)
15606 (and:QI (match_dup 2) (match_dup 8)))
15607 (subreg:DWIH
15608 (ashift:<DWI> (zero_extend:<DWI> (match_dup 7))
15609 (minus:QI (match_dup 9)
15610 (and:QI (match_dup 2) (match_dup 8)))) 0)))
15611 (clobber (reg:CC FLAGS_REG))])
15612 (parallel
15613 [(set (match_dup 6)
15614 (any_shiftrt:DWIH (match_dup 7) (match_dup 2)))
15615 (clobber (reg:CC FLAGS_REG))])]
15616 {
15617 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
15618 {
15619 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
15620 operands[2] = gen_lowpart (QImode, operands[2]);
15621 emit_insn (gen_<insn><dwi>3_doubleword (operands[0], operands[1],
15622 operands[2]));
15623 DONE;
15624 }
15625
15626 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
15627
15628 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
15629 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
15630
15631 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
15632 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
15633 {
15634 rtx xops[3];
15635 xops[0] = gen_reg_rtx (GET_MODE (operands[2]));
15636 xops[1] = operands[2];
15637 xops[2] = GEN_INT (INTVAL (operands[3])
15638 & ((<MODE_SIZE> * BITS_PER_UNIT) - 1));
15639 ix86_expand_binary_operator (AND, GET_MODE (operands[2]), xops);
15640 operands[2] = xops[0];
15641 }
15642
15643 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
15644 operands[2] = gen_lowpart (QImode, operands[2]);
15645
15646 if (!rtx_equal_p (operands[4], operands[5]))
15647 emit_move_insn (operands[4], operands[5]);
15648 })
15649
15650 (define_insn_and_split "*<insn><dwi>3_doubleword_mask_1"
15651 [(set (match_operand:<DWI> 0 "register_operand")
15652 (any_shiftrt:<DWI>
15653 (match_operand:<DWI> 1 "register_operand")
15654 (and:QI
15655 (match_operand:QI 2 "register_operand" "c")
15656 (match_operand:QI 3 "const_int_operand"))))
15657 (clobber (reg:CC FLAGS_REG))]
15658 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
15659 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
15660 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
15661 && ix86_pre_reload_split ()"
15662 "#"
15663 "&& 1"
15664 [(parallel
15665 [(set (match_dup 4)
15666 (ior:DWIH (lshiftrt:DWIH (match_dup 4)
15667 (and:QI (match_dup 2) (match_dup 8)))
15668 (subreg:DWIH
15669 (ashift:<DWI> (zero_extend:<DWI> (match_dup 7))
15670 (minus:QI (match_dup 9)
15671 (and:QI (match_dup 2) (match_dup 8)))) 0)))
15672 (clobber (reg:CC FLAGS_REG))])
15673 (parallel
15674 [(set (match_dup 6)
15675 (any_shiftrt:DWIH (match_dup 7) (match_dup 2)))
15676 (clobber (reg:CC FLAGS_REG))])]
15677 {
15678 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
15679 {
15680 emit_insn (gen_<insn><dwi>3_doubleword (operands[0], operands[1],
15681 operands[2]));
15682 DONE;
15683 }
15684
15685 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
15686
15687 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
15688 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
15689
15690 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
15691 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
15692 {
15693 rtx tem = gen_reg_rtx (QImode);
15694 emit_insn (gen_andqi3 (tem, operands[2], operands[3]));
15695 operands[2] = tem;
15696 }
15697
15698 if (!rtx_equal_p (operands[4], operands[5]))
15699 emit_move_insn (operands[4], operands[5]);
15700 })
15701
15702 (define_insn_and_split "<insn><mode>3_doubleword"
15703 [(set (match_operand:DWI 0 "register_operand" "=&r,&r")
15704 (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0,r")
15705 (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c")))
15706 (clobber (reg:CC FLAGS_REG))]
15707 ""
15708 "#"
15709 "epilogue_completed"
15710 [(const_int 0)]
15711 {
15712 if (TARGET_APX_NDD
15713 && !rtx_equal_p (operands[0], operands[1]))
15714 ix86_split_rshift_ndd (<CODE>, operands, NULL_RTX);
15715 else
15716 ix86_split_<insn> (operands, NULL_RTX, <MODE>mode);
15717 DONE;
15718 }
15719 [(set_attr "type" "multi")
15720 (set_attr "isa" "*,apx_ndd")])
15721
15722 ;; By default we don't ask for a scratch register, because when DWImode
15723 ;; values are manipulated, registers are already at a premium. But if
15724 ;; we have one handy, we won't turn it away.
15725
15726 (define_peephole2
15727 [(match_scratch:DWIH 3 "r")
15728 (parallel [(set (match_operand:<DWI> 0 "register_operand")
15729 (any_shiftrt:<DWI>
15730 (match_operand:<DWI> 1 "register_operand")
15731 (match_operand:QI 2 "nonmemory_operand")))
15732 (clobber (reg:CC FLAGS_REG))])
15733 (match_dup 3)]
15734 "TARGET_CMOVE"
15735 [(const_int 0)]
15736 {
15737 if (TARGET_APX_NDD
15738 && !rtx_equal_p (operands[0], operands[1]))
15739 ix86_split_rshift_ndd (<CODE>, operands, operands[3]);
15740 else
15741 ix86_split_<insn> (operands, operands[3], <DWI>mode);
15742 DONE;
15743 })
15744
15745 ;; Split truncations of double word right shifts into x86_shrd_1.
15746 (define_insn_and_split "<insn><dwi>3_doubleword_lowpart"
15747 [(set (match_operand:DWIH 0 "register_operand" "=&r")
15748 (subreg:DWIH
15749 (any_shiftrt:<DWI> (match_operand:<DWI> 1 "register_operand" "r")
15750 (match_operand:QI 2 "const_int_operand")) 0))
15751 (clobber (reg:CC FLAGS_REG))]
15752 "UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
15753 "#"
15754 "&& reload_completed"
15755 [(parallel
15756 [(set (match_dup 0)
15757 (ior:DWIH (lshiftrt:DWIH (match_dup 0) (match_dup 2))
15758 (subreg:DWIH
15759 (ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
15760 (match_dup 4)) 0)))
15761 (clobber (reg:CC FLAGS_REG))])]
15762 {
15763 split_double_mode (<DWI>mode, &operands[1], 1, &operands[1], &operands[3]);
15764 operands[4] = GEN_INT ((<MODE_SIZE> * BITS_PER_UNIT) - INTVAL (operands[2]));
15765 if (!rtx_equal_p (operands[0], operands[1]))
15766 emit_move_insn (operands[0], operands[1]);
15767 })
15768
15769 (define_insn "x86_64_shrd"
15770 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
15771 (ior:DI (lshiftrt:DI (match_dup 0)
15772 (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
15773 (const_int 63)))
15774 (subreg:DI
15775 (ashift:TI
15776 (zero_extend:TI
15777 (match_operand:DI 1 "register_operand" "r"))
15778 (minus:QI (const_int 64)
15779 (and:QI (match_dup 2) (const_int 63)))) 0)))
15780 (clobber (reg:CC FLAGS_REG))]
15781 "TARGET_64BIT"
15782 "shrd{q}\t{%s2%1, %0|%0, %1, %2}"
15783 [(set_attr "type" "ishift")
15784 (set_attr "prefix_0f" "1")
15785 (set_attr "mode" "DI")
15786 (set_attr "athlon_decode" "vector")
15787 (set_attr "amdfam10_decode" "vector")
15788 (set_attr "bdver1_decode" "vector")])
15789
15790 (define_insn "x86_64_shrd_ndd"
15791 [(set (match_operand:DI 0 "register_operand" "=r")
15792 (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
15793 (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
15794 (const_int 63)))
15795 (subreg:DI
15796 (ashift:TI
15797 (zero_extend:TI
15798 (match_operand:DI 2 "register_operand" "r"))
15799 (minus:QI (const_int 64)
15800 (and:QI (match_dup 3) (const_int 63)))) 0)))
15801 (clobber (reg:CC FLAGS_REG))]
15802 "TARGET_APX_NDD"
15803 "shrd{q}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
15804 [(set_attr "type" "ishift")
15805 (set_attr "mode" "DI")])
15806
15807
15808 (define_insn "x86_64_shrd_1"
15809 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
15810 (ior:DI (lshiftrt:DI (match_dup 0)
15811 (match_operand:QI 2 "const_0_to_63_operand"))
15812 (subreg:DI
15813 (ashift:TI
15814 (zero_extend:TI
15815 (match_operand:DI 1 "register_operand" "r"))
15816 (match_operand:QI 3 "const_0_to_255_operand")) 0)))
15817 (clobber (reg:CC FLAGS_REG))]
15818 "TARGET_64BIT
15819 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
15820 "shrd{q}\t{%2, %1, %0|%0, %1, %2}"
15821 [(set_attr "type" "ishift")
15822 (set_attr "prefix_0f" "1")
15823 (set_attr "length_immediate" "1")
15824 (set_attr "mode" "DI")
15825 (set_attr "athlon_decode" "vector")
15826 (set_attr "amdfam10_decode" "vector")
15827 (set_attr "bdver1_decode" "vector")])
15828
15829 (define_insn "x86_64_shrd_ndd_1"
15830 [(set (match_operand:DI 0 "register_operand" "=r")
15831 (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
15832 (match_operand:QI 3 "const_0_to_63_operand"))
15833 (subreg:DI
15834 (ashift:TI
15835 (zero_extend:TI
15836 (match_operand:DI 2 "register_operand" "r"))
15837 (match_operand:QI 4 "const_0_to_255_operand")) 0)))
15838 (clobber (reg:CC FLAGS_REG))]
15839 "TARGET_APX_NDD
15840 && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
15841 "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15842 [(set_attr "type" "ishift")
15843 (set_attr "length_immediate" "1")
15844 (set_attr "mode" "DI")])
15845
15846
15847 (define_insn_and_split "*x86_64_shrd_shld_1_nozext"
15848 [(set (match_operand:DI 0 "nonimmediate_operand")
15849 (ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand")
15850 (match_operand:QI 2 "const_0_to_63_operand"))
15851 (ashift:DI
15852 (match_operand:DI 1 "nonimmediate_operand")
15853 (match_operand:QI 3 "const_0_to_63_operand"))))
15854 (clobber (reg:CC FLAGS_REG))]
15855 "TARGET_64BIT
15856 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
15857 && ix86_pre_reload_split ()"
15858 "#"
15859 "&& 1"
15860 [(const_int 0)]
15861 {
15862 if (rtx_equal_p (operands[4], operands[0]))
15863 {
15864 operands[1] = force_reg (DImode, operands[1]);
15865 emit_insn (gen_x86_64_shrd_1 (operands[0], operands[1], operands[2], operands[3]));
15866 }
15867 else if (rtx_equal_p (operands[1], operands[0]))
15868 {
15869 operands[4] = force_reg (DImode, operands[4]);
15870 emit_insn (gen_x86_64_shld_1 (operands[0], operands[4], operands[3], operands[2]));
15871 }
15872 else if (TARGET_APX_NDD)
15873 {
15874 rtx tmp = gen_reg_rtx (DImode);
15875 if (MEM_P (operands[4]))
15876 {
15877 operands[1] = force_reg (DImode, operands[1]);
15878 emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[4], operands[1],
15879 operands[2], operands[3]));
15880 }
15881 else if (MEM_P (operands[1]))
15882 emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[1], operands[4],
15883 operands[3], operands[2]));
15884 else
15885 emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[4], operands[1],
15886 operands[2], operands[3]));
15887 emit_move_insn (operands[0], tmp);
15888 }
15889 else
15890 {
15891 operands[1] = force_reg (DImode, operands[1]);
15892 rtx tmp = gen_reg_rtx (DImode);
15893 emit_move_insn (tmp, operands[4]);
15894 emit_insn (gen_x86_64_shrd_1 (tmp, operands[1], operands[2], operands[3]));
15895 emit_move_insn (operands[0], tmp);
15896 }
15897 DONE;
15898 })
15899
15900 (define_insn_and_split "*x86_64_shrd_2"
15901 [(set (match_operand:DI 0 "nonimmediate_operand")
15902 (ior:DI (lshiftrt:DI (match_dup 0)
15903 (match_operand:QI 2 "nonmemory_operand"))
15904 (ashift:DI (match_operand:DI 1 "register_operand")
15905 (minus:QI (const_int 64) (match_dup 2)))))
15906 (clobber (reg:CC FLAGS_REG))]
15907 "TARGET_64BIT && ix86_pre_reload_split ()"
15908 "#"
15909 "&& 1"
15910 [(parallel [(set (match_dup 0)
15911 (ior:DI (lshiftrt:DI (match_dup 0)
15912 (and:QI (match_dup 2) (const_int 63)))
15913 (subreg:DI
15914 (ashift:TI
15915 (zero_extend:TI (match_dup 1))
15916 (minus:QI (const_int 64)
15917 (and:QI (match_dup 2)
15918 (const_int 63)))) 0)))
15919 (clobber (reg:CC FLAGS_REG))])])
15920
15921 (define_insn_and_split "*x86_64_shrd_ndd_2"
15922 [(set (match_operand:DI 0 "nonimmediate_operand")
15923 (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand")
15924 (match_operand:QI 3 "nonmemory_operand"))
15925 (ashift:DI (match_operand:DI 2 "register_operand")
15926 (minus:QI (const_int 64) (match_dup 2)))))
15927 (clobber (reg:CC FLAGS_REG))]
15928 "TARGET_APX_NDD
15929 && ix86_pre_reload_split ()"
15930 "#"
15931 "&& 1"
15932 [(parallel [(set (match_dup 4)
15933 (ior:DI (lshiftrt:DI (match_dup 1)
15934 (and:QI (match_dup 3) (const_int 63)))
15935 (subreg:DI
15936 (ashift:TI
15937 (zero_extend:TI (match_dup 2))
15938 (minus:QI (const_int 64)
15939 (and:QI (match_dup 3)
15940 (const_int 63)))) 0)))
15941 (clobber (reg:CC FLAGS_REG))
15942 (set (match_dup 0) (match_dup 4))])]
15943 {
15944 operands[4] = gen_reg_rtx (DImode);
15945 emit_move_insn (operands[4], operands[0]);
15946 })
15947
15948 (define_insn "x86_shrd"
15949 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
15950 (ior:SI (lshiftrt:SI (match_dup 0)
15951 (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
15952 (const_int 31)))
15953 (subreg:SI
15954 (ashift:DI
15955 (zero_extend:DI
15956 (match_operand:SI 1 "register_operand" "r"))
15957 (minus:QI (const_int 32)
15958 (and:QI (match_dup 2) (const_int 31)))) 0)))
15959 (clobber (reg:CC FLAGS_REG))]
15960 ""
15961 "shrd{l}\t{%s2%1, %0|%0, %1, %2}"
15962 [(set_attr "type" "ishift")
15963 (set_attr "prefix_0f" "1")
15964 (set_attr "mode" "SI")
15965 (set_attr "pent_pair" "np")
15966 (set_attr "athlon_decode" "vector")
15967 (set_attr "amdfam10_decode" "vector")
15968 (set_attr "bdver1_decode" "vector")])
15969
15970 (define_insn "x86_shrd_ndd"
15971 [(set (match_operand:SI 0 "register_operand" "=r")
15972 (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
15973 (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
15974 (const_int 31)))
15975 (subreg:SI
15976 (ashift:DI
15977 (zero_extend:DI
15978 (match_operand:SI 2 "register_operand" "r"))
15979 (minus:QI (const_int 32)
15980 (and:QI (match_dup 3) (const_int 31)))) 0)))
15981 (clobber (reg:CC FLAGS_REG))]
15982 "TARGET_APX_NDD"
15983 "shrd{l}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
15984 [(set_attr "type" "ishift")
15985 (set_attr "mode" "SI")])
15986
15987 (define_insn "x86_shrd_1"
15988 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
15989 (ior:SI (lshiftrt:SI (match_dup 0)
15990 (match_operand:QI 2 "const_0_to_31_operand"))
15991 (subreg:SI
15992 (ashift:DI
15993 (zero_extend:DI
15994 (match_operand:SI 1 "register_operand" "r"))
15995 (match_operand:QI 3 "const_0_to_63_operand")) 0)))
15996 (clobber (reg:CC FLAGS_REG))]
15997 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
15998 "shrd{l}\t{%2, %1, %0|%0, %1, %2}"
15999 [(set_attr "type" "ishift")
16000 (set_attr "prefix_0f" "1")
16001 (set_attr "length_immediate" "1")
16002 (set_attr "mode" "SI")
16003 (set_attr "pent_pair" "np")
16004 (set_attr "athlon_decode" "vector")
16005 (set_attr "amdfam10_decode" "vector")
16006 (set_attr "bdver1_decode" "vector")])
16007
16008 (define_insn "x86_shrd_ndd_1"
16009 [(set (match_operand:SI 0 "register_operand" "=r")
16010 (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
16011 (match_operand:QI 3 "const_0_to_31_operand"))
16012 (subreg:SI
16013 (ashift:DI
16014 (zero_extend:DI
16015 (match_operand:SI 2 "register_operand" "r"))
16016 (match_operand:QI 4 "const_0_to_63_operand")) 0)))
16017 (clobber (reg:CC FLAGS_REG))]
16018 "TARGET_APX_NDD
16019 && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))"
16020 "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16021 [(set_attr "type" "ishift")
16022 (set_attr "length_immediate" "1")
16023 (set_attr "mode" "SI")])
16024
16025
16026 (define_insn_and_split "*x86_shrd_shld_1_nozext"
16027 [(set (match_operand:SI 0 "nonimmediate_operand")
16028 (ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand")
16029 (match_operand:QI 2 "const_0_to_31_operand"))
16030 (ashift:SI
16031 (match_operand:SI 1 "nonimmediate_operand")
16032 (match_operand:QI 3 "const_0_to_31_operand"))))
16033 (clobber (reg:CC FLAGS_REG))]
16034 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
16035 && ix86_pre_reload_split ()"
16036 "#"
16037 "&& 1"
16038 [(const_int 0)]
16039 {
16040 if (rtx_equal_p (operands[4], operands[0]))
16041 {
16042 operands[1] = force_reg (SImode, operands[1]);
16043 emit_insn (gen_x86_shrd_1 (operands[0], operands[1], operands[2], operands[3]));
16044 }
16045 else if (rtx_equal_p (operands[1], operands[0]))
16046 {
16047 operands[4] = force_reg (SImode, operands[4]);
16048 emit_insn (gen_x86_shld_1 (operands[0], operands[4], operands[3], operands[2]));
16049 }
16050 else if (TARGET_APX_NDD)
16051 {
16052 rtx tmp = gen_reg_rtx (SImode);
16053 if (MEM_P (operands[4]))
16054 {
16055 operands[1] = force_reg (SImode, operands[1]);
16056 emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[4], operands[1],
16057 operands[2], operands[3]));
16058 }
16059 else if (MEM_P (operands[1]))
16060 emit_insn (gen_x86_shld_ndd_1 (tmp, operands[1], operands[4],
16061 operands[3], operands[2]));
16062 else
16063 emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[4], operands[1],
16064 operands[2], operands[3]));
16065 emit_move_insn (operands[0], tmp);
16066 }
16067 else
16068 {
16069 operands[1] = force_reg (SImode, operands[1]);
16070 rtx tmp = gen_reg_rtx (SImode);
16071 emit_move_insn (tmp, operands[4]);
16072 emit_insn (gen_x86_shrd_1 (tmp, operands[1], operands[2], operands[3]));
16073 emit_move_insn (operands[0], tmp);
16074 }
16075 DONE;
16076 })
16077
16078 (define_insn_and_split "*x86_shrd_2"
16079 [(set (match_operand:SI 0 "nonimmediate_operand")
16080 (ior:SI (lshiftrt:SI (match_dup 0)
16081 (match_operand:QI 2 "nonmemory_operand"))
16082 (ashift:SI (match_operand:SI 1 "register_operand")
16083 (minus:QI (const_int 32) (match_dup 2)))))
16084 (clobber (reg:CC FLAGS_REG))]
16085 "TARGET_64BIT && ix86_pre_reload_split ()"
16086 "#"
16087 "&& 1"
16088 [(parallel [(set (match_dup 0)
16089 (ior:SI (lshiftrt:SI (match_dup 0)
16090 (and:QI (match_dup 2) (const_int 31)))
16091 (subreg:SI
16092 (ashift:DI
16093 (zero_extend:DI (match_dup 1))
16094 (minus:QI (const_int 32)
16095 (and:QI (match_dup 2)
16096 (const_int 31)))) 0)))
16097 (clobber (reg:CC FLAGS_REG))])])
16098
16099 (define_insn_and_split "*x86_shrd_ndd_2"
16100 [(set (match_operand:SI 0 "nonimmediate_operand")
16101 (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand")
16102 (match_operand:QI 3 "nonmemory_operand"))
16103 (ashift:SI (match_operand:SI 2 "register_operand")
16104 (minus:QI (const_int 32) (match_dup 3)))))
16105 (clobber (reg:CC FLAGS_REG))]
16106 "TARGET_APX_NDD
16107 && ix86_pre_reload_split ()"
16108 "#"
16109 "&& 1"
16110 [(parallel [(set (match_dup 4)
16111 (ior:SI (lshiftrt:SI (match_dup 1)
16112 (and:QI (match_dup 3) (const_int 31)))
16113 (subreg:SI
16114 (ashift:DI
16115 (zero_extend:DI (match_dup 2))
16116 (minus:QI (const_int 32)
16117 (and:QI (match_dup 3)
16118 (const_int 31)))) 0)))
16119 (clobber (reg:CC FLAGS_REG))
16120 (set (match_dup 0) (match_dup 4))])]
16121 {
16122 operands[4] = gen_reg_rtx (SImode);
16123 emit_move_insn (operands[4], operands[0]);
16124 })
16125
16126 ;; Base name for insn mnemonic.
16127 (define_mode_attr cvt_mnemonic
16128 [(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
16129
16130 (define_insn "ashr<mode>3_cvt"
16131 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm,r")
16132 (ashiftrt:SWI48
16133 (match_operand:SWI48 1 "nonimmediate_operand" "*a,0,rm")
16134 (match_operand:QI 2 "const_int_operand")))
16135 (clobber (reg:CC FLAGS_REG))]
16136 "INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)-1
16137 && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
16138 && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
16139 "@
16140 <cvt_mnemonic>
16141 sar{<imodesuffix>}\t{%2, %0|%0, %2}
16142 sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
16143 [(set_attr "isa" "*,*,apx_ndd")
16144 (set_attr "type" "imovx,ishift,ishift")
16145 (set_attr "prefix_0f" "0,*,*")
16146 (set_attr "length_immediate" "0,*,*")
16147 (set_attr "modrm" "0,1,1")
16148 (set_attr "mode" "<MODE>")])
16149
16150 (define_insn "*ashrsi3_cvt_zext"
16151 [(set (match_operand:DI 0 "register_operand" "=*d,r,r")
16152 (zero_extend:DI
16153 (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0,rm")
16154 (match_operand:QI 2 "const_int_operand"))))
16155 (clobber (reg:CC FLAGS_REG))]
16156 "TARGET_64BIT && INTVAL (operands[2]) == 31
16157 && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
16158 && ix86_binary_operator_ok (ASHIFTRT, SImode, operands, TARGET_APX_NDD)"
16159 "@
16160 {cltd|cdq}
16161 sar{l}\t{%2, %k0|%k0, %2}
16162 sar{l}\t{%2, %1, %k0|%k0, %1, %2}"
16163 [(set_attr "isa" "*,*,apx_ndd")
16164 (set_attr "type" "imovx,ishift,ishift")
16165 (set_attr "prefix_0f" "0,*,*")
16166 (set_attr "length_immediate" "0,*,*")
16167 (set_attr "modrm" "0,1,1")
16168 (set_attr "mode" "SI")])
16169
16170 (define_expand "@x86_shift<mode>_adj_3"
16171 [(use (match_operand:SWI48 0 "register_operand"))
16172 (use (match_operand:SWI48 1 "register_operand"))
16173 (use (match_operand:QI 2 "register_operand"))]
16174 ""
16175 {
16176 rtx_code_label *label = gen_label_rtx ();
16177 rtx tmp;
16178
16179 emit_insn (gen_testqi_ccz_1 (operands[2],
16180 GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));
16181
16182 tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
16183 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16184 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16185 gen_rtx_LABEL_REF (VOIDmode, label),
16186 pc_rtx);
16187 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
16188 JUMP_LABEL (tmp) = label;
16189
16190 emit_move_insn (operands[0], operands[1]);
16191 emit_insn (gen_ashr<mode>3_cvt (operands[1], operands[1],
16192 GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1)));
16193 emit_label (label);
16194 LABEL_NUSES (label) = 1;
16195
16196 DONE;
16197 })
16198
16199 (define_insn "*bmi2_<insn><mode>3_1"
16200 [(set (match_operand:SWI48 0 "register_operand" "=r")
16201 (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
16202 (match_operand:SWI48 2 "register_operand" "r")))]
16203 "TARGET_BMI2"
16204 "<shift>x\t{%2, %1, %0|%0, %1, %2}"
16205 [(set_attr "type" "ishiftx")
16206 (set_attr "mode" "<MODE>")])
16207
16208 (define_insn "*ashr<mode>3_1"
16209 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
16210 (ashiftrt:SWI48
16211 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
16212 (match_operand:QI 2 "nonmemory_operand" "c<S>,r,c<S>")))
16213 (clobber (reg:CC FLAGS_REG))]
16214 "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
16215 {
16216 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16217 switch (get_attr_type (insn))
16218 {
16219 case TYPE_ISHIFTX:
16220 return "#";
16221
16222 default:
16223 if (operands[2] == const1_rtx
16224 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16225 && !use_ndd)
16226 return "sar{<imodesuffix>}\t%0";
16227 else
16228 return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
16229 : "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
16230 }
16231 }
16232 [(set_attr "isa" "*,bmi2,apx_ndd")
16233 (set_attr "type" "ishift,ishiftx,ishift")
16234 (set (attr "length_immediate")
16235 (if_then_else
16236 (and (match_operand 2 "const1_operand")
16237 (ior (match_test "TARGET_SHIFT1")
16238 (match_test "optimize_function_for_size_p (cfun)")))
16239 (const_string "0")
16240 (const_string "*")))
16241 (set_attr "mode" "<MODE>")])
16242
16243 ;; Specialization of *lshr<mode>3_1 below, extracting the SImode
16244 ;; highpart of a DI to be extracted, but allowing it to be clobbered.
16245 (define_insn_and_split "*highpartdisi2"
16246 [(set (subreg:DI (match_operand:SI 0 "register_operand" "=r,x,?k,r") 0)
16247 (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0,k,rm")
16248 (const_int 32)))
16249 (clobber (reg:CC FLAGS_REG))]
16250 "TARGET_64BIT"
16251 "#"
16252 "&& reload_completed"
16253 [(parallel
16254 [(set (match_dup 0) (lshiftrt:DI (match_dup 1) (const_int 32)))
16255 (clobber (reg:CC FLAGS_REG))])]
16256 {
16257 if (SSE_REG_P (operands[0]))
16258 {
16259 rtx tmp = gen_rtx_REG (V4SImode, REGNO (operands[0]));
16260 emit_insn (gen_sse_shufps_v4si (tmp, tmp, tmp,
16261 const1_rtx, const1_rtx,
16262 GEN_INT (5), GEN_INT (5)));
16263 DONE;
16264 }
16265 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
16266 }
16267 [(set_attr "isa" "*,*,*,apx_ndd")])
16268
16269
16270 (define_insn "*lshr<mode>3_1"
16271 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r")
16272 (lshiftrt:SWI48
16273 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k,rm")
16274 (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>,c<S>")))
16275 (clobber (reg:CC FLAGS_REG))]
16276 "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
16277 {
16278 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16279 switch (get_attr_type (insn))
16280 {
16281 case TYPE_ISHIFTX:
16282 case TYPE_MSKLOG:
16283 return "#";
16284
16285 default:
16286 if (operands[2] == const1_rtx
16287 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16288 && !use_ndd)
16289 return "shr{<imodesuffix>}\t%0";
16290 else
16291 return use_ndd ? "shr{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
16292 : "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
16293 }
16294 }
16295 [(set_attr "isa" "*,bmi2,avx512bw,apx_ndd")
16296 (set_attr "type" "ishift,ishiftx,msklog,ishift")
16297 (set (attr "length_immediate")
16298 (if_then_else
16299 (and (and (match_operand 2 "const1_operand")
16300 (eq_attr "alternative" "0"))
16301 (ior (match_test "TARGET_SHIFT1")
16302 (match_test "optimize_function_for_size_p (cfun)")))
16303 (const_string "0")
16304 (const_string "*")))
16305 (set_attr "mode" "<MODE>")])
16306
16307 ;; Convert shift to the shiftx pattern to avoid flags dependency.
16308 (define_split
16309 [(set (match_operand:SWI48 0 "register_operand")
16310 (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
16311 (match_operand:QI 2 "register_operand")))
16312 (clobber (reg:CC FLAGS_REG))]
16313 "TARGET_BMI2 && reload_completed"
16314 [(set (match_dup 0)
16315 (any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
16316 "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
16317
16318 (define_insn "*bmi2_<insn>si3_1_zext"
16319 [(set (match_operand:DI 0 "register_operand" "=r")
16320 (zero_extend:DI
16321 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
16322 (match_operand:SI 2 "register_operand" "r"))))]
16323 "TARGET_64BIT && TARGET_BMI2"
16324 "<shift>x\t{%2, %1, %k0|%k0, %1, %2}"
16325 [(set_attr "type" "ishiftx")
16326 (set_attr "mode" "SI")])
16327
16328 (define_insn "*<insn>si3_1_zext"
16329 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
16330 (zero_extend:DI
16331 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm")
16332 (match_operand:QI 2 "nonmemory_operand" "cI,r,cI"))))
16333 (clobber (reg:CC FLAGS_REG))]
16334 "TARGET_64BIT
16335 && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
16336 {
16337 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16338 switch (get_attr_type (insn))
16339 {
16340 case TYPE_ISHIFTX:
16341 return "#";
16342
16343 default:
16344 if (operands[2] == const1_rtx
16345 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16346 && !use_ndd)
16347 return "<shift>{l}\t%k0";
16348 else
16349 return use_ndd ? "<shift>{l}\t{%2, %1, %k0|%k0, %1, %2}"
16350 : "<shift>{l}\t{%2, %k0|%k0, %2}";
16351 }
16352 }
16353 [(set_attr "isa" "*,bmi2,apx_ndd")
16354 (set_attr "type" "ishift,ishiftx,ishift")
16355 (set (attr "length_immediate")
16356 (if_then_else
16357 (and (match_operand 2 "const1_operand")
16358 (ior (match_test "TARGET_SHIFT1")
16359 (match_test "optimize_function_for_size_p (cfun)")))
16360 (const_string "0")
16361 (const_string "*")))
16362 (set_attr "mode" "SI")])
16363
16364 ;; Convert shift to the shiftx pattern to avoid flags dependency.
16365 (define_split
16366 [(set (match_operand:DI 0 "register_operand")
16367 (zero_extend:DI
16368 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand")
16369 (match_operand:QI 2 "register_operand"))))
16370 (clobber (reg:CC FLAGS_REG))]
16371 "TARGET_64BIT && TARGET_BMI2 && reload_completed"
16372 [(set (match_dup 0)
16373 (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
16374 "operands[2] = gen_lowpart (SImode, operands[2]);")
16375
16376 (define_insn "*ashr<mode>3_1"
16377 [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, r")
16378 (ashiftrt:SWI12
16379 (match_operand:SWI12 1 "nonimmediate_operand" "0, rm")
16380 (match_operand:QI 2 "nonmemory_operand" "c<S>, c<S>")))
16381 (clobber (reg:CC FLAGS_REG))]
16382 "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
16383 {
16384 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16385 if (operands[2] == const1_rtx
16386 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16387 && !use_ndd)
16388 return "sar{<imodesuffix>}\t%0";
16389 else
16390 return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
16391 : "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
16392 }
16393 [(set_attr "isa" "*, apx_ndd")
16394 (set_attr "type" "ishift")
16395 (set (attr "length_immediate")
16396 (if_then_else
16397 (and (match_operand 2 "const1_operand")
16398 (ior (match_test "TARGET_SHIFT1")
16399 (match_test "optimize_function_for_size_p (cfun)")))
16400 (const_string "0")
16401 (const_string "*")))
16402 (set_attr "mode" "<MODE>")])
16403
16404 (define_insn "*lshrqi3_1"
16405 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k,r")
16406 (lshiftrt:QI
16407 (match_operand:QI 1 "nonimmediate_operand" "0, k, rm")
16408 (match_operand:QI 2 "nonmemory_operand" "cI,Wb,cI")))
16409 (clobber (reg:CC FLAGS_REG))]
16410 "ix86_binary_operator_ok (LSHIFTRT, QImode, operands, TARGET_APX_NDD)"
16411 {
16412 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16413 switch (get_attr_type (insn))
16414 {
16415 case TYPE_ISHIFT:
16416 if (operands[2] == const1_rtx
16417 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16418 && !use_ndd)
16419 return "shr{b}\t%0";
16420 else
16421 return use_ndd ? "shr{b}\t{%2, %1, %0|%0, %1, %2}"
16422 : "shr{b}\t{%2, %0|%0, %2}";
16423 case TYPE_MSKLOG:
16424 return "#";
16425 default:
16426 gcc_unreachable ();
16427 }
16428 }
16429 [(set_attr "isa" "*,avx512dq,apx_ndd")
16430 (set_attr "type" "ishift,msklog,ishift")
16431 (set (attr "length_immediate")
16432 (if_then_else
16433 (and (and (match_operand 2 "const1_operand")
16434 (eq_attr "alternative" "0"))
16435 (ior (match_test "TARGET_SHIFT1")
16436 (match_test "optimize_function_for_size_p (cfun)")))
16437 (const_string "0")
16438 (const_string "*")))
16439 (set_attr "mode" "QI")])
16440
16441 (define_insn "*lshrhi3_1"
16442 [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k, r")
16443 (lshiftrt:HI
16444 (match_operand:HI 1 "nonimmediate_operand" "0, k, rm")
16445 (match_operand:QI 2 "nonmemory_operand" "cI, Ww, cI")))
16446 (clobber (reg:CC FLAGS_REG))]
16447 "ix86_binary_operator_ok (LSHIFTRT, HImode, operands, TARGET_APX_NDD)"
16448 {
16449 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16450 switch (get_attr_type (insn))
16451 {
16452 case TYPE_ISHIFT:
16453 if (operands[2] == const1_rtx
16454 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16455 && !use_ndd)
16456 return "shr{w}\t%0";
16457 else
16458 return use_ndd ? "shr{w}\t{%2, %1, %0|%0, %1, %2}"
16459 : "shr{w}\t{%2, %0|%0, %2}";
16460 case TYPE_MSKLOG:
16461 return "#";
16462 default:
16463 gcc_unreachable ();
16464 }
16465 }
16466 [(set_attr "isa" "*, avx512f, apx_ndd")
16467 (set_attr "type" "ishift,msklog,ishift")
16468 (set (attr "length_immediate")
16469 (if_then_else
16470 (and (and (match_operand 2 "const1_operand")
16471 (eq_attr "alternative" "0"))
16472 (ior (match_test "TARGET_SHIFT1")
16473 (match_test "optimize_function_for_size_p (cfun)")))
16474 (const_string "0")
16475 (const_string "*")))
16476 (set_attr "mode" "HI")])
16477
16478 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
16479 (define_insn_and_split "*<insn><mode>3_1_slp"
16480 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
16481 (any_shiftrt:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
16482 (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
16483 (clobber (reg:CC FLAGS_REG))]
16484 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
16485 {
16486 if (which_alternative)
16487 return "#";
16488
16489 if (operands[2] == const1_rtx
16490 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
16491 return "<shift>{<imodesuffix>}\t%0";
16492 else
16493 return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
16494 }
16495 "&& reload_completed
16496 && !(rtx_equal_p (operands[0], operands[1]))"
16497 [(set (strict_low_part (match_dup 0)) (match_dup 1))
16498 (parallel
16499 [(set (strict_low_part (match_dup 0))
16500 (any_shiftrt:SWI12 (match_dup 0) (match_dup 2)))
16501 (clobber (reg:CC FLAGS_REG))])]
16502 ""
16503 [(set_attr "type" "ishift")
16504 (set (attr "length_immediate")
16505 (if_then_else
16506 (and (match_operand 2 "const1_operand")
16507 (ior (match_test "TARGET_SHIFT1")
16508 (match_test "optimize_function_for_size_p (cfun)")))
16509 (const_string "0")
16510 (const_string "*")))
16511 (set_attr "mode" "<MODE>")])
16512
16513 ;; This pattern can't accept a variable shift count, since shifts by
16514 ;; zero don't affect the flags. We assume that shifts by constant
16515 ;; zero are optimized away.
16516 (define_insn "*<insn><mode>3_cmp"
16517 [(set (reg FLAGS_REG)
16518 (compare
16519 (any_shiftrt:SWI
16520 (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
16521 (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
16522 (const_int 0)))
16523 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
16524 (any_shiftrt:SWI (match_dup 1) (match_dup 2)))]
16525 "(optimize_function_for_size_p (cfun)
16526 || !TARGET_PARTIAL_FLAG_REG_STALL
16527 || (operands[2] == const1_rtx
16528 && TARGET_SHIFT1))
16529 && ix86_match_ccmode (insn, CCGOCmode)
16530 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
16531 {
16532 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16533 if (operands[2] == const1_rtx
16534 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16535 && !use_ndd)
16536 return "<shift>{<imodesuffix>}\t%0";
16537 else
16538 return use_ndd ? "<shift>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
16539 : "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
16540 }
16541 [(set_attr "isa" "*,apx_ndd")
16542 (set_attr "type" "ishift")
16543 (set (attr "length_immediate")
16544 (if_then_else
16545 (and (match_operand 2 "const1_operand")
16546 (ior (match_test "TARGET_SHIFT1")
16547 (match_test "optimize_function_for_size_p (cfun)")))
16548 (const_string "0")
16549 (const_string "*")))
16550 (set_attr "mode" "<MODE>")])
16551
16552 (define_insn "*<insn>si3_cmp_zext"
16553 [(set (reg FLAGS_REG)
16554 (compare
16555 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
16556 (match_operand:QI 2 "const_1_to_31_operand"))
16557 (const_int 0)))
16558 (set (match_operand:DI 0 "register_operand" "=r,r")
16559 (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
16560 "TARGET_64BIT
16561 && (optimize_function_for_size_p (cfun)
16562 || !TARGET_PARTIAL_FLAG_REG_STALL
16563 || (operands[2] == const1_rtx
16564 && TARGET_SHIFT1))
16565 && ix86_match_ccmode (insn, CCGOCmode)
16566 && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
16567 {
16568 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16569 if (operands[2] == const1_rtx
16570 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16571 && !use_ndd)
16572 return "<shift>{l}\t%k0";
16573 else
16574 return use_ndd ? "<shift>{l}\t{%2, %1, %k0|%k0, %1, %2}"
16575 : "<shift>{l}\t{%2, %k0|%k0, %2}";
16576 }
16577 [(set_attr "isa" "*,apx_ndd")
16578 (set_attr "type" "ishift")
16579 (set (attr "length_immediate")
16580 (if_then_else
16581 (and (match_operand 2 "const1_operand")
16582 (ior (match_test "TARGET_SHIFT1")
16583 (match_test "optimize_function_for_size_p (cfun)")))
16584 (const_string "0")
16585 (const_string "*")))
16586 (set_attr "mode" "SI")])
16587
16588 (define_insn "*<insn><mode>3_cconly"
16589 [(set (reg FLAGS_REG)
16590 (compare
16591 (any_shiftrt:SWI
16592 (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
16593 (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
16594 (const_int 0)))
16595 (clobber (match_scratch:SWI 0 "=<r>,r"))]
16596 "(optimize_function_for_size_p (cfun)
16597 || !TARGET_PARTIAL_FLAG_REG_STALL
16598 || (operands[2] == const1_rtx
16599 && TARGET_SHIFT1))
16600 && ix86_match_ccmode (insn, CCGOCmode)"
16601 {
16602 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16603 if (operands[2] == const1_rtx
16604 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16605 && !use_ndd)
16606 return "<shift>{<imodesuffix>}\t%0";
16607 else
16608 return use_ndd
16609 ? "<shift>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
16610 : "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
16611 }
16612 [(set_attr "isa" "*,apx_ndd")
16613 (set_attr "type" "ishift")
16614 (set (attr "length_immediate")
16615 (if_then_else
16616 (and (match_operand 2 "const1_operand")
16617 (ior (match_test "TARGET_SHIFT1")
16618 (match_test "optimize_function_for_size_p (cfun)")))
16619 (const_string "0")
16620 (const_string "*")))
16621 (set_attr "mode" "<MODE>")])
16622
16623 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
16624 (define_insn_and_split "*<insn>qi_ext<mode>_1"
16625 [(set (zero_extract:SWI248
16626 (match_operand 0 "int248_register_operand" "+Q,&Q")
16627 (const_int 8)
16628 (const_int 8))
16629 (subreg:SWI248
16630 (any_shiftrt:QI
16631 (subreg:QI
16632 (match_operator:SWI248 3 "extract_operator"
16633 [(match_operand 1 "int248_register_operand" "0,!Q")
16634 (const_int 8)
16635 (const_int 8)]) 0)
16636 (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0))
16637 (clobber (reg:CC FLAGS_REG))]
16638 ""
16639 {
16640 if (which_alternative)
16641 return "#";
16642
16643 if (operands[2] == const1_rtx
16644 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
16645 return "<shift>{b}\t%h0";
16646 else
16647 return "<shift>{b}\t{%2, %h0|%h0, %2}";
16648 }
16649 "reload_completed
16650 && !(rtx_equal_p (operands[0], operands[1]))"
16651 [(set (zero_extract:SWI248
16652 (match_dup 0) (const_int 8) (const_int 8))
16653 (zero_extract:SWI248
16654 (match_dup 1) (const_int 8) (const_int 8)))
16655 (parallel
16656 [(set (zero_extract:SWI248
16657 (match_dup 0) (const_int 8) (const_int 8))
16658 (subreg:SWI248
16659 (any_shiftrt:QI
16660 (subreg:QI
16661 (match_op_dup 3
16662 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
16663 (match_dup 2)) 0))
16664 (clobber (reg:CC FLAGS_REG))])]
16665 ""
16666 [(set_attr "type" "ishift")
16667 (set (attr "length_immediate")
16668 (if_then_else
16669 (and (match_operand 2 "const1_operand")
16670 (ior (match_test "TARGET_SHIFT1")
16671 (match_test "optimize_function_for_size_p (cfun)")))
16672 (const_string "0")
16673 (const_string "*")))
16674 (set_attr "mode" "QI")])
16675
16676 (define_insn_and_split "*extend<dwi>2_doubleword_highpart"
16677 [(set (match_operand:<DWI> 0 "register_operand" "=r")
16678 (ashiftrt:<DWI>
16679 (ashift:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")
16680 (match_operand:QI 2 "const_int_operand"))
16681 (match_operand:QI 3 "const_int_operand")))
16682 (clobber (reg:CC FLAGS_REG))]
16683 "INTVAL (operands[2]) == INTVAL (operands[3])
16684 && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
16685 "#"
16686 "&& reload_completed"
16687 [(parallel [(set (match_dup 4)
16688 (ashift:DWIH (match_dup 4) (match_dup 2)))
16689 (clobber (reg:CC FLAGS_REG))])
16690 (parallel [(set (match_dup 4)
16691 (ashiftrt:DWIH (match_dup 4) (match_dup 2)))
16692 (clobber (reg:CC FLAGS_REG))])]
16693 "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[4]);")
16694
16695 (define_insn_and_split "*extendv2di2_highpart_stv"
16696 [(set (match_operand:V2DI 0 "register_operand" "=v")
16697 (ashiftrt:V2DI
16698 (ashift:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "vm")
16699 (match_operand:QI 2 "const_int_operand"))
16700 (match_operand:QI 3 "const_int_operand")))]
16701 "!TARGET_64BIT && TARGET_STV && TARGET_AVX512VL
16702 && INTVAL (operands[2]) == INTVAL (operands[3])
16703 && UINTVAL (operands[2]) < 32"
16704 "#"
16705 "&& reload_completed"
16706 [(set (match_dup 0)
16707 (ashift:V2DI (match_dup 1) (match_dup 2)))
16708 (set (match_dup 0)
16709 (ashiftrt:V2DI (match_dup 0) (match_dup 2)))])
16710 \f
16711 ;; Rotate instructions
16712
16713 (define_expand "<insn>ti3"
16714 [(set (match_operand:TI 0 "register_operand")
16715 (any_rotate:TI (match_operand:TI 1 "register_operand")
16716 (match_operand:QI 2 "nonmemory_operand")))]
16717 "TARGET_64BIT"
16718 {
16719 if (const_1_to_63_operand (operands[2], VOIDmode))
16720 emit_insn (gen_ix86_<insn>ti3_doubleword
16721 (operands[0], operands[1], operands[2]));
16722 else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 64)
16723 {
16724 operands[1] = force_reg (TImode, operands[1]);
16725 emit_insn (gen_<insn>64ti2_doubleword (operands[0], operands[1]));
16726 }
16727 else
16728 {
16729 rtx amount = force_reg (QImode, operands[2]);
16730 rtx src_lo = gen_lowpart (DImode, operands[1]);
16731 rtx src_hi = gen_highpart (DImode, operands[1]);
16732 rtx tmp_lo = gen_reg_rtx (DImode);
16733 rtx tmp_hi = gen_reg_rtx (DImode);
16734 emit_move_insn (tmp_lo, src_lo);
16735 emit_move_insn (tmp_hi, src_hi);
16736 rtx (*shiftd) (rtx, rtx, rtx)
16737 = (<CODE> == ROTATE) ? gen_x86_64_shld : gen_x86_64_shrd;
16738 emit_insn (shiftd (tmp_lo, src_hi, amount));
16739 emit_insn (shiftd (tmp_hi, src_lo, amount));
16740 rtx dst_lo = gen_lowpart (DImode, operands[0]);
16741 rtx dst_hi = gen_highpart (DImode, operands[0]);
16742 emit_move_insn (dst_lo, tmp_lo);
16743 emit_move_insn (dst_hi, tmp_hi);
16744 emit_insn (gen_x86_shiftdi_adj_1 (dst_lo, dst_hi, amount, tmp_lo));
16745 }
16746 DONE;
16747 })
16748
16749 (define_expand "<insn>di3"
16750 [(set (match_operand:DI 0 "shiftdi_operand")
16751 (any_rotate:DI (match_operand:DI 1 "shiftdi_operand")
16752 (match_operand:QI 2 "nonmemory_operand")))]
16753 ""
16754 {
16755 if (TARGET_64BIT)
16756 ix86_expand_binary_operator (<CODE>, DImode, operands, TARGET_APX_NDD);
16757 else if (const_1_to_31_operand (operands[2], VOIDmode))
16758 emit_insn (gen_ix86_<insn>di3_doubleword
16759 (operands[0], operands[1], operands[2]));
16760 else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 32)
16761 {
16762 operands[1] = force_reg (DImode, operands[1]);
16763 emit_insn (gen_<insn>32di2_doubleword (operands[0], operands[1]));
16764 }
16765 else
16766 FAIL;
16767
16768 DONE;
16769 })
16770
16771 (define_expand "<insn><mode>3"
16772 [(set (match_operand:SWIM124 0 "nonimmediate_operand")
16773 (any_rotate:SWIM124 (match_operand:SWIM124 1 "nonimmediate_operand")
16774 (match_operand:QI 2 "nonmemory_operand")))]
16775 ""
16776 {
16777 ix86_expand_binary_operator (<CODE>, <MODE>mode, operands, TARGET_APX_NDD);
16778 DONE;
16779 })
16780
16781 ;; Avoid useless masking of count operand.
16782 (define_insn_and_split "*<insn><mode>3_mask"
16783 [(set (match_operand:SWI 0 "nonimmediate_operand")
16784 (any_rotate:SWI
16785 (match_operand:SWI 1 "nonimmediate_operand")
16786 (subreg:QI
16787 (and
16788 (match_operand 2 "int248_register_operand" "c")
16789 (match_operand 3 "const_int_operand")) 0)))
16790 (clobber (reg:CC FLAGS_REG))]
16791 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
16792 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
16793 == GET_MODE_BITSIZE (<MODE>mode)-1
16794 && ix86_pre_reload_split ()"
16795 "#"
16796 "&& 1"
16797 [(parallel
16798 [(set (match_dup 0)
16799 (any_rotate:SWI (match_dup 1)
16800 (match_dup 2)))
16801 (clobber (reg:CC FLAGS_REG))])]
16802 {
16803 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
16804 operands[2] = gen_lowpart (QImode, operands[2]);
16805 })
16806
16807 (define_split
16808 [(set (match_operand:SWI 0 "register_operand")
16809 (any_rotate:SWI
16810 (match_operand:SWI 1 "const_int_operand")
16811 (subreg:QI
16812 (and
16813 (match_operand 2 "int248_register_operand")
16814 (match_operand 3 "const_int_operand")) 0)))]
16815 "(INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1))
16816 == GET_MODE_BITSIZE (<MODE>mode) - 1"
16817 [(set (match_dup 4) (match_dup 1))
16818 (set (match_dup 0)
16819 (any_rotate:SWI (match_dup 4)
16820 (subreg:QI (match_dup 2) 0)))]
16821 "operands[4] = gen_reg_rtx (<MODE>mode);")
16822
16823 (define_insn_and_split "*<insn><mode>3_mask_1"
16824 [(set (match_operand:SWI 0 "nonimmediate_operand")
16825 (any_rotate:SWI
16826 (match_operand:SWI 1 "nonimmediate_operand")
16827 (and:QI
16828 (match_operand:QI 2 "register_operand" "c")
16829 (match_operand:QI 3 "const_int_operand"))))
16830 (clobber (reg:CC FLAGS_REG))]
16831 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
16832 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
16833 == GET_MODE_BITSIZE (<MODE>mode)-1
16834 && ix86_pre_reload_split ()"
16835 "#"
16836 "&& 1"
16837 [(parallel
16838 [(set (match_dup 0)
16839 (any_rotate:SWI (match_dup 1)
16840 (match_dup 2)))
16841 (clobber (reg:CC FLAGS_REG))])])
16842
16843 (define_split
16844 [(set (match_operand:SWI 0 "register_operand")
16845 (any_rotate:SWI
16846 (match_operand:SWI 1 "const_int_operand")
16847 (and:QI
16848 (match_operand:QI 2 "register_operand")
16849 (match_operand:QI 3 "const_int_operand"))))]
16850 "(INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1))
16851 == GET_MODE_BITSIZE (<MODE>mode) - 1"
16852 [(set (match_dup 4) (match_dup 1))
16853 (set (match_dup 0)
16854 (any_rotate:SWI (match_dup 4) (match_dup 2)))]
16855 "operands[4] = gen_reg_rtx (<MODE>mode);")
16856
16857 ;; Implement rotation using two double-precision
16858 ;; shift instructions and a scratch register.
16859
16860 (define_insn_and_split "ix86_rotl<dwi>3_doubleword"
16861 [(set (match_operand:<DWI> 0 "register_operand" "=r")
16862 (rotate:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
16863 (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
16864 (clobber (reg:CC FLAGS_REG))
16865 (clobber (match_scratch:DWIH 3 "=&r"))]
16866 ""
16867 "#"
16868 "reload_completed"
16869 [(set (match_dup 3) (match_dup 4))
16870 (parallel
16871 [(set (match_dup 4)
16872 (ior:DWIH (ashift:DWIH (match_dup 4)
16873 (and:QI (match_dup 2) (match_dup 6)))
16874 (subreg:DWIH
16875 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
16876 (minus:QI (match_dup 7)
16877 (and:QI (match_dup 2)
16878 (match_dup 6)))) 0)))
16879 (clobber (reg:CC FLAGS_REG))])
16880 (parallel
16881 [(set (match_dup 5)
16882 (ior:DWIH (ashift:DWIH (match_dup 5)
16883 (and:QI (match_dup 2) (match_dup 6)))
16884 (subreg:DWIH
16885 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 3))
16886 (minus:QI (match_dup 7)
16887 (and:QI (match_dup 2)
16888 (match_dup 6)))) 0)))
16889 (clobber (reg:CC FLAGS_REG))])]
16890 {
16891 operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
16892 operands[7] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
16893
16894 split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
16895 })
16896
16897 (define_insn_and_split "ix86_rotr<dwi>3_doubleword"
16898 [(set (match_operand:<DWI> 0 "register_operand" "=r")
16899 (rotatert:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
16900 (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
16901 (clobber (reg:CC FLAGS_REG))
16902 (clobber (match_scratch:DWIH 3 "=&r"))]
16903 ""
16904 "#"
16905 "reload_completed"
16906 [(set (match_dup 3) (match_dup 4))
16907 (parallel
16908 [(set (match_dup 4)
16909 (ior:DWIH (lshiftrt:DWIH (match_dup 4)
16910 (and:QI (match_dup 2) (match_dup 6)))
16911 (subreg:DWIH
16912 (ashift:<DWI> (zero_extend:<DWI> (match_dup 5))
16913 (minus:QI (match_dup 7)
16914 (and:QI (match_dup 2)
16915 (match_dup 6)))) 0)))
16916 (clobber (reg:CC FLAGS_REG))])
16917 (parallel
16918 [(set (match_dup 5)
16919 (ior:DWIH (lshiftrt:DWIH (match_dup 5)
16920 (and:QI (match_dup 2) (match_dup 6)))
16921 (subreg:DWIH
16922 (ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
16923 (minus:QI (match_dup 7)
16924 (and:QI (match_dup 2)
16925 (match_dup 6)))) 0)))
16926 (clobber (reg:CC FLAGS_REG))])]
16927 {
16928 operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
16929 operands[7] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
16930
16931 split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
16932 })
16933
16934 (define_insn_and_split "<insn>32di2_doubleword"
16935 [(set (match_operand:DI 0 "register_operand" "=r,r")
16936 (any_rotate:DI (match_operand:DI 1 "register_operand" "0,r")
16937 (const_int 32)))]
16938 "!TARGET_64BIT"
16939 "#"
16940 "&& reload_completed"
16941 [(set (match_dup 0) (match_dup 3))
16942 (set (match_dup 2) (match_dup 1))]
16943 {
16944 split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]);
16945 if (rtx_equal_p (operands[0], operands[1]))
16946 {
16947 emit_insn (gen_swapsi (operands[0], operands[2]));
16948 DONE;
16949 }
16950 })
16951
16952 (define_insn_and_split "<insn>64ti2_doubleword"
16953 [(set (match_operand:TI 0 "register_operand" "=r,r")
16954 (any_rotate:TI (match_operand:TI 1 "register_operand" "0,r")
16955 (const_int 64)))]
16956 "TARGET_64BIT"
16957 "#"
16958 "&& reload_completed"
16959 [(set (match_dup 0) (match_dup 3))
16960 (set (match_dup 2) (match_dup 1))]
16961 {
16962 split_double_mode (TImode, &operands[0], 2, &operands[0], &operands[2]);
16963 if (rtx_equal_p (operands[0], operands[1]))
16964 {
16965 emit_insn (gen_swapdi (operands[0], operands[2]));
16966 DONE;
16967 }
16968 })
16969
16970 (define_mode_attr rorx_immediate_operand
16971 [(SI "const_0_to_31_operand")
16972 (DI "const_0_to_63_operand")])
16973
16974 (define_insn "*bmi2_rorx<mode>3_1"
16975 [(set (match_operand:SWI48 0 "register_operand" "=r")
16976 (rotatert:SWI48
16977 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
16978 (match_operand:QI 2 "<rorx_immediate_operand>" "<S>")))]
16979 "TARGET_BMI2 && !optimize_function_for_size_p (cfun)"
16980 "rorx\t{%2, %1, %0|%0, %1, %2}"
16981 [(set_attr "type" "rotatex")
16982 (set_attr "mode" "<MODE>")])
16983
16984 (define_insn "*<insn><mode>3_1"
16985 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
16986 (any_rotate:SWI48
16987 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
16988 (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>,c<S>")))
16989 (clobber (reg:CC FLAGS_REG))]
16990 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
16991 {
16992 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16993 switch (get_attr_type (insn))
16994 {
16995 case TYPE_ROTATEX:
16996 return "#";
16997
16998 default:
16999 if (operands[2] == const1_rtx
17000 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17001 && !use_ndd)
17002 return "<rotate>{<imodesuffix>}\t%0";
17003 else
17004 return use_ndd ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
17005 : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
17006 }
17007 }
17008 [(set_attr "isa" "*,bmi2,apx_ndd")
17009 (set_attr "type" "rotate,rotatex,rotate")
17010 (set (attr "preferred_for_size")
17011 (cond [(eq_attr "alternative" "0")
17012 (symbol_ref "true")]
17013 (symbol_ref "false")))
17014 (set (attr "length_immediate")
17015 (if_then_else
17016 (and (eq_attr "type" "rotate")
17017 (and (match_operand 2 "const1_operand")
17018 (ior (match_test "TARGET_SHIFT1")
17019 (match_test "optimize_function_for_size_p (cfun)"))))
17020 (const_string "0")
17021 (const_string "*")))
17022 (set_attr "mode" "<MODE>")])
17023
17024 ;; Convert rotate to the rotatex pattern to avoid flags dependency.
17025 (define_split
17026 [(set (match_operand:SWI48 0 "register_operand")
17027 (rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
17028 (match_operand:QI 2 "const_int_operand")))
17029 (clobber (reg:CC FLAGS_REG))]
17030 "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)"
17031 [(set (match_dup 0)
17032 (rotatert:SWI48 (match_dup 1) (match_dup 2)))]
17033 {
17034 int bitsize = GET_MODE_BITSIZE (<MODE>mode);
17035
17036 operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
17037 })
17038
17039 (define_split
17040 [(set (match_operand:SWI48 0 "register_operand")
17041 (rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
17042 (match_operand:QI 2 "const_int_operand")))
17043 (clobber (reg:CC FLAGS_REG))]
17044 "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)"
17045 [(set (match_dup 0)
17046 (rotatert:SWI48 (match_dup 1) (match_dup 2)))])
17047
17048 (define_insn "*bmi2_rorxsi3_1_zext"
17049 [(set (match_operand:DI 0 "register_operand" "=r")
17050 (zero_extend:DI
17051 (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
17052 (match_operand:QI 2 "const_0_to_31_operand"))))]
17053 "TARGET_64BIT && TARGET_BMI2 && !optimize_function_for_size_p (cfun)"
17054 "rorx\t{%2, %1, %k0|%k0, %1, %2}"
17055 [(set_attr "type" "rotatex")
17056 (set_attr "mode" "SI")])
17057
17058 (define_insn "*<insn>si3_1_zext"
17059 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
17060 (zero_extend:DI
17061 (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm")
17062 (match_operand:QI 2 "nonmemory_operand" "cI,I,cI"))))
17063 (clobber (reg:CC FLAGS_REG))]
17064 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
17065 {
17066 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
17067 switch (get_attr_type (insn))
17068 {
17069 case TYPE_ROTATEX:
17070 return "#";
17071
17072 default:
17073 if (operands[2] == const1_rtx
17074 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17075 && !use_ndd)
17076 return "<rotate>{l}\t%k0";
17077 else
17078 return use_ndd ? "<rotate>{l}\t{%2, %1, %k0|%k0, %1, %2}"
17079 : "<rotate>{l}\t{%2, %k0|%k0, %2}";
17080 }
17081 }
17082 [(set_attr "isa" "*,bmi2,apx_ndd")
17083 (set_attr "type" "rotate,rotatex,rotate")
17084 (set (attr "preferred_for_size")
17085 (cond [(eq_attr "alternative" "0")
17086 (symbol_ref "true")]
17087 (symbol_ref "false")))
17088 (set (attr "length_immediate")
17089 (if_then_else
17090 (and (eq_attr "type" "rotate")
17091 (and (match_operand 2 "const1_operand")
17092 (ior (match_test "TARGET_SHIFT1")
17093 (match_test "optimize_function_for_size_p (cfun)"))))
17094 (const_string "0")
17095 (const_string "*")))
17096 (set_attr "mode" "SI")])
17097
17098 ;; Convert rotate to the rotatex pattern to avoid flags dependency.
17099 (define_split
17100 [(set (match_operand:DI 0 "register_operand")
17101 (zero_extend:DI
17102 (rotate:SI (match_operand:SI 1 "nonimmediate_operand")
17103 (match_operand:QI 2 "const_int_operand"))))
17104 (clobber (reg:CC FLAGS_REG))]
17105 "TARGET_64BIT && TARGET_BMI2 && reload_completed
17106 && !optimize_function_for_size_p (cfun)"
17107 [(set (match_dup 0)
17108 (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]
17109 {
17110 int bitsize = GET_MODE_BITSIZE (SImode);
17111
17112 operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
17113 })
17114
17115 (define_split
17116 [(set (match_operand:DI 0 "register_operand")
17117 (zero_extend:DI
17118 (rotatert:SI (match_operand:SI 1 "nonimmediate_operand")
17119 (match_operand:QI 2 "const_int_operand"))))
17120 (clobber (reg:CC FLAGS_REG))]
17121 "TARGET_64BIT && TARGET_BMI2 && reload_completed
17122 && !optimize_function_for_size_p (cfun)"
17123 [(set (match_dup 0)
17124 (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
17125
17126 (define_insn "*<insn><mode>3_1"
17127 [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m,r")
17128 (any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm")
17129 (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>")))
17130 (clobber (reg:CC FLAGS_REG))]
17131 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
17132 {
17133 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
17134 if (operands[2] == const1_rtx
17135 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17136 && !use_ndd)
17137 return "<rotate>{<imodesuffix>}\t%0";
17138 else
17139 return use_ndd
17140 ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
17141 : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
17142 }
17143 [(set_attr "isa" "*,apx_ndd")
17144 (set_attr "type" "rotate")
17145 (set (attr "length_immediate")
17146 (if_then_else
17147 (and (match_operand 2 "const1_operand")
17148 (ior (match_test "TARGET_SHIFT1")
17149 (match_test "optimize_function_for_size_p (cfun)")))
17150 (const_string "0")
17151 (const_string "*")))
17152 (set_attr "mode" "<MODE>")])
17153
17154 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
17155 (define_insn_and_split "*<insn><mode>3_1_slp"
17156 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
17157 (any_rotate:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
17158 (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
17159 (clobber (reg:CC FLAGS_REG))]
17160 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
17161 {
17162 if (which_alternative)
17163 return "#";
17164
17165 if (operands[2] == const1_rtx
17166 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
17167 return "<rotate>{<imodesuffix>}\t%0";
17168 else
17169 return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
17170 }
17171 "&& reload_completed
17172 && !(rtx_equal_p (operands[0], operands[1]))"
17173 [(set (strict_low_part (match_dup 0)) (match_dup 1))
17174 (parallel
17175 [(set (strict_low_part (match_dup 0))
17176 (any_rotate:SWI12 (match_dup 0) (match_dup 2)))
17177 (clobber (reg:CC FLAGS_REG))])]
17178 ""
17179 [(set_attr "type" "rotate")
17180 (set (attr "length_immediate")
17181 (if_then_else
17182 (and (match_operand 2 "const1_operand")
17183 (ior (match_test "TARGET_SHIFT1")
17184 (match_test "optimize_function_for_size_p (cfun)")))
17185 (const_string "0")
17186 (const_string "*")))
17187 (set_attr "mode" "<MODE>")])
17188
17189 (define_split
17190 [(set (match_operand:HI 0 "QIreg_operand")
17191 (any_rotate:HI (match_dup 0) (const_int 8)))
17192 (clobber (reg:CC FLAGS_REG))]
17193 "reload_completed
17194 && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))"
17195 [(parallel [(set (strict_low_part (match_dup 0))
17196 (bswap:HI (match_dup 0)))
17197 (clobber (reg:CC FLAGS_REG))])])
17198
17199 ;; Rotations through carry flag
17200 (define_insn "rcrsi2"
17201 [(set (match_operand:SI 0 "register_operand" "=r,r")
17202 (plus:SI
17203 (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
17204 (const_int 1))
17205 (ashift:SI (ltu:SI (reg:CCC FLAGS_REG) (const_int 0))
17206 (const_int 31))))
17207 (clobber (reg:CC FLAGS_REG))]
17208 ""
17209 "@
17210 rcr{l}\t%0
17211 rcr{l}\t{%1, %0|%0, %1}"
17212 [(set_attr "isa" "*,apx_ndd")
17213 (set_attr "type" "ishift1")
17214 (set_attr "memory" "none")
17215 (set_attr "length_immediate" "0")
17216 (set_attr "mode" "SI")])
17217
17218 (define_insn "rcrdi2"
17219 [(set (match_operand:DI 0 "register_operand" "=r,r")
17220 (plus:DI
17221 (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,rm")
17222 (const_int 1))
17223 (ashift:DI (ltu:DI (reg:CCC FLAGS_REG) (const_int 0))
17224 (const_int 63))))
17225 (clobber (reg:CC FLAGS_REG))]
17226 "TARGET_64BIT"
17227 "@
17228 rcr{q}\t%0
17229 rcr{q}\t{%1, %0|%0, %1}"
17230 [(set_attr "isa" "*,apx_ndd")
17231 (set_attr "type" "ishift1")
17232 (set_attr "length_immediate" "0")
17233 (set_attr "mode" "DI")])
17234
17235 ;; Versions of sar and shr that set the carry flag.
17236 (define_insn "<insn><mode>3_carry"
17237 [(set (reg:CCC FLAGS_REG)
17238 (unspec:CCC [(and:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
17239 (const_int 1))
17240 (const_int 0)] UNSPEC_CC_NE))
17241 (set (match_operand:SWI48 0 "register_operand" "=r,r")
17242 (any_shiftrt:SWI48 (match_dup 1) (const_int 1)))]
17243 ""
17244 {
17245 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
17246 if ((TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17247 && !use_ndd)
17248 return "<shift>{<imodesuffix>}\t%0";
17249 return use_ndd ? "<shift>{<imodesuffix>}\t{$1, %1, %0|%0, %1, 1}"
17250 : "<shift>{<imodesuffix>}\t{$1, %0|%0, 1}";
17251 }
17252 [(set_attr "isa" "*, apx_ndd")
17253 (set_attr "type" "ishift1")
17254 (set (attr "length_immediate")
17255 (if_then_else
17256 (ior (match_test "TARGET_SHIFT1")
17257 (match_test "optimize_function_for_size_p (cfun)"))
17258 (const_string "0")
17259 (const_string "*")))
17260 (set_attr "mode" "<MODE>")])
17261 \f
17262 ;; Bit set / bit test instructions
17263
17264 ;; %%% bts, btr, btc
17265
17266 ;; These instructions are *slow* when applied to memory.
17267
17268 (define_code_attr btsc [(ior "bts") (xor "btc")])
17269
17270 (define_insn "*<btsc><mode>"
17271 [(set (match_operand:SWI48 0 "register_operand" "=r")
17272 (any_or:SWI48
17273 (ashift:SWI48 (const_int 1)
17274 (match_operand:QI 2 "register_operand" "r"))
17275 (match_operand:SWI48 1 "register_operand" "0")))
17276 (clobber (reg:CC FLAGS_REG))]
17277 "TARGET_USE_BT"
17278 "<btsc>{<imodesuffix>}\t{%<k>2, %0|%0, %<k>2}"
17279 [(set_attr "type" "alu1")
17280 (set_attr "prefix_0f" "1")
17281 (set_attr "znver1_decode" "double")
17282 (set_attr "mode" "<MODE>")])
17283
17284 ;; Avoid useless masking of count operand.
17285 (define_insn_and_split "*<btsc><mode>_mask"
17286 [(set (match_operand:SWI48 0 "register_operand")
17287 (any_or:SWI48
17288 (ashift:SWI48
17289 (const_int 1)
17290 (subreg:QI
17291 (and
17292 (match_operand 1 "int248_register_operand")
17293 (match_operand 2 "const_int_operand")) 0))
17294 (match_operand:SWI48 3 "register_operand")))
17295 (clobber (reg:CC FLAGS_REG))]
17296 "TARGET_USE_BT
17297 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
17298 == GET_MODE_BITSIZE (<MODE>mode)-1
17299 && ix86_pre_reload_split ()"
17300 "#"
17301 "&& 1"
17302 [(parallel
17303 [(set (match_dup 0)
17304 (any_or:SWI48
17305 (ashift:SWI48 (const_int 1)
17306 (match_dup 1))
17307 (match_dup 3)))
17308 (clobber (reg:CC FLAGS_REG))])]
17309 {
17310 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
17311 operands[1] = gen_lowpart (QImode, operands[1]);
17312 })
17313
17314 (define_insn_and_split "*<btsc><mode>_mask_1"
17315 [(set (match_operand:SWI48 0 "register_operand")
17316 (any_or:SWI48
17317 (ashift:SWI48
17318 (const_int 1)
17319 (and:QI
17320 (match_operand:QI 1 "register_operand")
17321 (match_operand:QI 2 "const_int_operand")))
17322 (match_operand:SWI48 3 "register_operand")))
17323 (clobber (reg:CC FLAGS_REG))]
17324 "TARGET_USE_BT
17325 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
17326 == GET_MODE_BITSIZE (<MODE>mode)-1
17327 && ix86_pre_reload_split ()"
17328 "#"
17329 "&& 1"
17330 [(parallel
17331 [(set (match_dup 0)
17332 (any_or:SWI48
17333 (ashift:SWI48 (const_int 1)
17334 (match_dup 1))
17335 (match_dup 3)))
17336 (clobber (reg:CC FLAGS_REG))])])
17337
17338 (define_insn "*btr<mode>"
17339 [(set (match_operand:SWI48 0 "register_operand" "=r")
17340 (and:SWI48
17341 (rotate:SWI48 (const_int -2)
17342 (match_operand:QI 2 "register_operand" "r"))
17343 (match_operand:SWI48 1 "register_operand" "0")))
17344 (clobber (reg:CC FLAGS_REG))]
17345 "TARGET_USE_BT"
17346 "btr{<imodesuffix>}\t{%<k>2, %0|%0, %<k>2}"
17347 [(set_attr "type" "alu1")
17348 (set_attr "prefix_0f" "1")
17349 (set_attr "znver1_decode" "double")
17350 (set_attr "mode" "<MODE>")])
17351
17352 ;; Avoid useless masking of count operand.
17353 (define_insn_and_split "*btr<mode>_mask"
17354 [(set (match_operand:SWI48 0 "register_operand")
17355 (and:SWI48
17356 (rotate:SWI48
17357 (const_int -2)
17358 (subreg:QI
17359 (and
17360 (match_operand 1 "int248_register_operand")
17361 (match_operand 2 "const_int_operand")) 0))
17362 (match_operand:SWI48 3 "register_operand")))
17363 (clobber (reg:CC FLAGS_REG))]
17364 "TARGET_USE_BT
17365 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
17366 == GET_MODE_BITSIZE (<MODE>mode)-1
17367 && ix86_pre_reload_split ()"
17368 "#"
17369 "&& 1"
17370 [(parallel
17371 [(set (match_dup 0)
17372 (and:SWI48
17373 (rotate:SWI48 (const_int -2)
17374 (match_dup 1))
17375 (match_dup 3)))
17376 (clobber (reg:CC FLAGS_REG))])]
17377 {
17378 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
17379 operands[1] = gen_lowpart (QImode, operands[1]);
17380 })
17381
17382 (define_insn_and_split "*btr<mode>_mask_1"
17383 [(set (match_operand:SWI48 0 "register_operand")
17384 (and:SWI48
17385 (rotate:SWI48
17386 (const_int -2)
17387 (and:QI
17388 (match_operand:QI 1 "register_operand")
17389 (match_operand:QI 2 "const_int_operand")))
17390 (match_operand:SWI48 3 "register_operand")))
17391 (clobber (reg:CC FLAGS_REG))]
17392 "TARGET_USE_BT
17393 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
17394 == GET_MODE_BITSIZE (<MODE>mode)-1
17395 && ix86_pre_reload_split ()"
17396 "#"
17397 "&& 1"
17398 [(parallel
17399 [(set (match_dup 0)
17400 (and:SWI48
17401 (rotate:SWI48 (const_int -2)
17402 (match_dup 1))
17403 (match_dup 3)))
17404 (clobber (reg:CC FLAGS_REG))])])
17405
17406 (define_insn_and_split "*btr<mode>_1"
17407 [(set (match_operand:SWI12 0 "register_operand")
17408 (and:SWI12
17409 (subreg:SWI12
17410 (rotate:SI (const_int -2)
17411 (match_operand:QI 2 "register_operand")) 0)
17412 (match_operand:SWI12 1 "nonimmediate_operand")))
17413 (clobber (reg:CC FLAGS_REG))]
17414 "TARGET_USE_BT && ix86_pre_reload_split ()"
17415 "#"
17416 "&& 1"
17417 [(parallel
17418 [(set (match_dup 0)
17419 (and:SI (rotate:SI (const_int -2) (match_dup 2))
17420 (match_dup 1)))
17421 (clobber (reg:CC FLAGS_REG))])]
17422 {
17423 operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
17424 operands[1] = force_reg (<MODE>mode, operands[1]);
17425 operands[1] = lowpart_subreg (SImode, operands[1], <MODE>mode);
17426 })
17427
17428 (define_insn_and_split "*btr<mode>_2"
17429 [(set (zero_extract:HI
17430 (match_operand:SWI12 0 "nonimmediate_operand")
17431 (const_int 1)
17432 (match_operand:QI 1 "register_operand"))
17433 (const_int 0))
17434 (clobber (reg:CC FLAGS_REG))]
17435 "TARGET_USE_BT && ix86_pre_reload_split ()"
17436 "#"
17437 "&& MEM_P (operands[0])"
17438 [(set (match_dup 2) (match_dup 0))
17439 (parallel
17440 [(set (match_dup 3)
17441 (and:SI (rotate:SI (const_int -2) (match_dup 1))
17442 (match_dup 4)))
17443 (clobber (reg:CC FLAGS_REG))])
17444 (set (match_dup 0) (match_dup 5))]
17445 {
17446 operands[2] = gen_reg_rtx (<MODE>mode);
17447 operands[5] = gen_reg_rtx (<MODE>mode);
17448 operands[3] = lowpart_subreg (SImode, operands[5], <MODE>mode);
17449 operands[4] = lowpart_subreg (SImode, operands[2], <MODE>mode);
17450 })
17451
17452 (define_split
17453 [(set (zero_extract:HI
17454 (match_operand:SWI12 0 "register_operand")
17455 (const_int 1)
17456 (match_operand:QI 1 "register_operand"))
17457 (const_int 0))
17458 (clobber (reg:CC FLAGS_REG))]
17459 "TARGET_USE_BT && ix86_pre_reload_split ()"
17460 [(parallel
17461 [(set (match_dup 0)
17462 (and:SI (rotate:SI (const_int -2) (match_dup 1))
17463 (match_dup 2)))
17464 (clobber (reg:CC FLAGS_REG))])]
17465 {
17466 operands[2] = lowpart_subreg (SImode, operands[0], <MODE>mode);
17467 operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
17468 })
17469
17470 ;; These instructions are never faster than the corresponding
17471 ;; and/ior/xor operations when using immediate operand, so with
17472 ;; 32-bit there's no point. But in 64-bit, we can't hold the
17473 ;; relevant immediates within the instruction itself, so operating
17474 ;; on bits in the high 32-bits of a register becomes easier.
17475 ;;
17476 ;; These are slow on Nocona, but fast on Athlon64. We do require the use
17477 ;; of btrq and btcq for corner cases of post-reload expansion of absdf and
17478 ;; negdf respectively, so they can never be disabled entirely.
17479
17480 (define_insn "*btsq_imm"
17481 [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
17482 (const_int 1)
17483 (match_operand:QI 1 "const_0_to_63_operand"))
17484 (const_int 1))
17485 (clobber (reg:CC FLAGS_REG))]
17486 "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
17487 "bts{q}\t{%1, %0|%0, %1}"
17488 [(set_attr "type" "alu1")
17489 (set_attr "prefix_0f" "1")
17490 (set_attr "znver1_decode" "double")
17491 (set_attr "mode" "DI")])
17492
17493 (define_insn "*btrq_imm"
17494 [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
17495 (const_int 1)
17496 (match_operand:QI 1 "const_0_to_63_operand"))
17497 (const_int 0))
17498 (clobber (reg:CC FLAGS_REG))]
17499 "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
17500 "btr{q}\t{%1, %0|%0, %1}"
17501 [(set_attr "type" "alu1")
17502 (set_attr "prefix_0f" "1")
17503 (set_attr "znver1_decode" "double")
17504 (set_attr "mode" "DI")])
17505
17506 (define_insn "*btcq_imm"
17507 [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
17508 (const_int 1)
17509 (match_operand:QI 1 "const_0_to_63_operand"))
17510 (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1))))
17511 (clobber (reg:CC FLAGS_REG))]
17512 "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
17513 "btc{q}\t{%1, %0|%0, %1}"
17514 [(set_attr "type" "alu1")
17515 (set_attr "prefix_0f" "1")
17516 (set_attr "znver1_decode" "double")
17517 (set_attr "mode" "DI")])
17518
17519 ;; Allow Nocona to avoid these instructions if a register is available.
17520
17521 (define_peephole2
17522 [(match_scratch:DI 2 "r")
17523 (parallel [(set (zero_extract:DI
17524 (match_operand:DI 0 "nonimmediate_operand")
17525 (const_int 1)
17526 (match_operand:QI 1 "const_0_to_63_operand"))
17527 (const_int 1))
17528 (clobber (reg:CC FLAGS_REG))])]
17529 "TARGET_64BIT && !TARGET_USE_BT"
17530 [(parallel [(set (match_dup 0)
17531 (ior:DI (match_dup 0) (match_dup 3)))
17532 (clobber (reg:CC FLAGS_REG))])]
17533 {
17534 int i = INTVAL (operands[1]);
17535
17536 operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
17537
17538 if (!x86_64_immediate_operand (operands[3], DImode))
17539 {
17540 emit_move_insn (operands[2], operands[3]);
17541 operands[3] = operands[2];
17542 }
17543 })
17544
17545 (define_peephole2
17546 [(match_scratch:DI 2 "r")
17547 (parallel [(set (zero_extract:DI
17548 (match_operand:DI 0 "nonimmediate_operand")
17549 (const_int 1)
17550 (match_operand:QI 1 "const_0_to_63_operand"))
17551 (const_int 0))
17552 (clobber (reg:CC FLAGS_REG))])]
17553 "TARGET_64BIT && !TARGET_USE_BT"
17554 [(parallel [(set (match_dup 0)
17555 (and:DI (match_dup 0) (match_dup 3)))
17556 (clobber (reg:CC FLAGS_REG))])]
17557 {
17558 int i = INTVAL (operands[1]);
17559
17560 operands[3] = gen_int_mode (~(HOST_WIDE_INT_1U << i), DImode);
17561
17562 if (!x86_64_immediate_operand (operands[3], DImode))
17563 {
17564 emit_move_insn (operands[2], operands[3]);
17565 operands[3] = operands[2];
17566 }
17567 })
17568
17569 (define_peephole2
17570 [(match_scratch:DI 2 "r")
17571 (parallel [(set (zero_extract:DI
17572 (match_operand:DI 0 "nonimmediate_operand")
17573 (const_int 1)
17574 (match_operand:QI 1 "const_0_to_63_operand"))
17575 (not:DI (zero_extract:DI
17576 (match_dup 0) (const_int 1) (match_dup 1))))
17577 (clobber (reg:CC FLAGS_REG))])]
17578 "TARGET_64BIT && !TARGET_USE_BT"
17579 [(parallel [(set (match_dup 0)
17580 (xor:DI (match_dup 0) (match_dup 3)))
17581 (clobber (reg:CC FLAGS_REG))])]
17582 {
17583 int i = INTVAL (operands[1]);
17584
17585 operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
17586
17587 if (!x86_64_immediate_operand (operands[3], DImode))
17588 {
17589 emit_move_insn (operands[2], operands[3]);
17590 operands[3] = operands[2];
17591 }
17592 })
17593
17594 ;; %%% bt
17595
17596 (define_insn "*bt<mode>"
17597 [(set (reg:CCC FLAGS_REG)
17598 (compare:CCC
17599 (zero_extract:SWI48
17600 (match_operand:SWI48 0 "nonimmediate_operand" "r,m")
17601 (const_int 1)
17602 (match_operand:QI 1 "nonmemory_operand" "q<S>,<S>"))
17603 (const_int 0)))]
17604 ""
17605 {
17606 switch (get_attr_mode (insn))
17607 {
17608 case MODE_SI:
17609 return "bt{l}\t{%k1, %k0|%k0, %k1}";
17610
17611 case MODE_DI:
17612 return "bt{q}\t{%q1, %0|%0, %q1}";
17613
17614 default:
17615 gcc_unreachable ();
17616 }
17617 }
17618 [(set_attr "type" "alu1")
17619 (set_attr "prefix_0f" "1")
17620 (set (attr "mode")
17621 (if_then_else
17622 (and (match_test "CONST_INT_P (operands[1])")
17623 (match_test "INTVAL (operands[1]) < 32"))
17624 (const_string "SI")
17625 (const_string "<MODE>")))])
17626
17627 (define_insn_and_split "*bt<SWI48:mode>_mask"
17628 [(set (reg:CCC FLAGS_REG)
17629 (compare:CCC
17630 (zero_extract:SWI48
17631 (match_operand:SWI48 0 "nonimmediate_operand" "r,m")
17632 (const_int 1)
17633 (subreg:QI
17634 (and:SWI248
17635 (match_operand:SWI248 1 "register_operand")
17636 (match_operand 2 "const_int_operand")) 0))
17637 (const_int 0)))]
17638 "TARGET_USE_BT
17639 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<SWI48:MODE>mode)-1))
17640 == GET_MODE_BITSIZE (<SWI48:MODE>mode)-1
17641 && ix86_pre_reload_split ()"
17642 "#"
17643 "&& 1"
17644 [(set (reg:CCC FLAGS_REG)
17645 (compare:CCC
17646 (zero_extract:SWI48 (match_dup 0) (const_int 1) (match_dup 1))
17647 (const_int 0)))]
17648 "operands[1] = gen_lowpart (QImode, operands[1]);")
17649
17650 (define_insn_and_split "*jcc_bt<mode>"
17651 [(set (pc)
17652 (if_then_else (match_operator 0 "bt_comparison_operator"
17653 [(zero_extract:SWI48
17654 (match_operand:SWI48 1 "nonimmediate_operand")
17655 (const_int 1)
17656 (match_operand:QI 2 "nonmemory_operand"))
17657 (const_int 0)])
17658 (label_ref (match_operand 3))
17659 (pc)))
17660 (clobber (reg:CC FLAGS_REG))]
17661 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
17662 && (CONST_INT_P (operands[2])
17663 ? (INTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)
17664 && INTVAL (operands[2])
17665 >= (optimize_function_for_size_p (cfun) ? 8 : 32))
17666 : !memory_operand (operands[1], <MODE>mode))
17667 && ix86_pre_reload_split ()"
17668 "#"
17669 "&& 1"
17670 [(set (reg:CCC FLAGS_REG)
17671 (compare:CCC
17672 (zero_extract:SWI48
17673 (match_dup 1)
17674 (const_int 1)
17675 (match_dup 2))
17676 (const_int 0)))
17677 (set (pc)
17678 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
17679 (label_ref (match_dup 3))
17680 (pc)))]
17681 {
17682 operands[0] = shallow_copy_rtx (operands[0]);
17683 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
17684 })
17685
17686 ;; Avoid useless masking of bit offset operand.
17687 (define_insn_and_split "*jcc_bt<mode>_mask"
17688 [(set (pc)
17689 (if_then_else (match_operator 0 "bt_comparison_operator"
17690 [(zero_extract:SWI48
17691 (match_operand:SWI48 1 "register_operand")
17692 (const_int 1)
17693 (and:QI
17694 (match_operand:QI 2 "register_operand")
17695 (match_operand 3 "const_int_operand")))
17696 (const_int 0)])
17697 (label_ref (match_operand 4))
17698 (pc)))
17699 (clobber (reg:CC FLAGS_REG))]
17700 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
17701 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
17702 == GET_MODE_BITSIZE (<MODE>mode)-1
17703 && ix86_pre_reload_split ()"
17704 "#"
17705 "&& 1"
17706 [(set (reg:CCC FLAGS_REG)
17707 (compare:CCC
17708 (zero_extract:SWI48
17709 (match_dup 1)
17710 (const_int 1)
17711 (match_dup 2))
17712 (const_int 0)))
17713 (set (pc)
17714 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
17715 (label_ref (match_dup 4))
17716 (pc)))]
17717 {
17718 operands[0] = shallow_copy_rtx (operands[0]);
17719 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
17720 })
17721
17722 ;; Avoid useless masking of bit offset operand.
17723 (define_insn_and_split "*jcc_bt<SWI48:mode>_mask_1"
17724 [(set (pc)
17725 (if_then_else (match_operator 0 "bt_comparison_operator"
17726 [(zero_extract:SWI48
17727 (match_operand:SWI48 1 "register_operand")
17728 (const_int 1)
17729 (subreg:QI
17730 (and:SWI248
17731 (match_operand:SWI248 2 "register_operand")
17732 (match_operand 3 "const_int_operand")) 0))
17733 (const_int 0)])
17734 (label_ref (match_operand 4))
17735 (pc)))
17736 (clobber (reg:CC FLAGS_REG))]
17737 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
17738 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<SWI48:MODE>mode)-1))
17739 == GET_MODE_BITSIZE (<SWI48:MODE>mode)-1
17740 && ix86_pre_reload_split ()"
17741 "#"
17742 "&& 1"
17743 [(set (reg:CCC FLAGS_REG)
17744 (compare:CCC
17745 (zero_extract:SWI48
17746 (match_dup 1)
17747 (const_int 1)
17748 (match_dup 2))
17749 (const_int 0)))
17750 (set (pc)
17751 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
17752 (label_ref (match_dup 4))
17753 (pc)))]
17754 {
17755 operands[0] = shallow_copy_rtx (operands[0]);
17756 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
17757 operands[2] = gen_lowpart (QImode, operands[2]);
17758 })
17759
17760 ;; Help combine recognize bt followed by cmov
17761 (define_split
17762 [(set (match_operand:SWI248 0 "register_operand")
17763 (if_then_else:SWI248
17764 (match_operator 5 "bt_comparison_operator"
17765 [(zero_extract:SWI48
17766 (match_operand:SWI48 1 "register_operand")
17767 (const_int 1)
17768 (match_operand:QI 2 "register_operand"))
17769 (const_int 0)])
17770 (match_operand:SWI248 3 "nonimmediate_operand")
17771 (match_operand:SWI248 4 "nonimmediate_operand")))]
17772 "TARGET_USE_BT && TARGET_CMOVE
17773 && !(MEM_P (operands[3]) && MEM_P (operands[4]))
17774 && ix86_pre_reload_split ()"
17775 [(set (reg:CCC FLAGS_REG)
17776 (compare:CCC
17777 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
17778 (const_int 0)))
17779 (set (match_dup 0)
17780 (if_then_else:SWI248 (eq (reg:CCC FLAGS_REG) (const_int 0))
17781 (match_dup 3)
17782 (match_dup 4)))]
17783 {
17784 if (GET_CODE (operands[5]) == EQ)
17785 std::swap (operands[3], operands[4]);
17786 })
17787
17788 ;; Help combine recognize bt followed by setc
17789 (define_insn_and_split "*bt<mode>_setcqi"
17790 [(set (subreg:SWI48 (match_operand:QI 0 "register_operand") 0)
17791 (zero_extract:SWI48
17792 (match_operand:SWI48 1 "register_operand")
17793 (const_int 1)
17794 (match_operand:QI 2 "register_operand")))
17795 (clobber (reg:CC FLAGS_REG))]
17796 "TARGET_USE_BT && ix86_pre_reload_split ()"
17797 "#"
17798 "&& 1"
17799 [(set (reg:CCC FLAGS_REG)
17800 (compare:CCC
17801 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
17802 (const_int 0)))
17803 (set (match_dup 0)
17804 (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))])
17805
17806 ;; Help combine recognize bt followed by setnc
17807 (define_insn_and_split "*bt<mode>_setncqi"
17808 [(set (match_operand:QI 0 "register_operand")
17809 (and:QI
17810 (not:QI
17811 (subreg:QI
17812 (lshiftrt:SWI48 (match_operand:SWI48 1 "register_operand")
17813 (match_operand:QI 2 "register_operand")) 0))
17814 (const_int 1)))
17815 (clobber (reg:CC FLAGS_REG))]
17816 "TARGET_USE_BT && ix86_pre_reload_split ()"
17817 "#"
17818 "&& 1"
17819 [(set (reg:CCC FLAGS_REG)
17820 (compare:CCC
17821 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
17822 (const_int 0)))
17823 (set (match_dup 0)
17824 (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))])
17825
17826 (define_insn_and_split "*bt<mode>_setnc<mode>"
17827 [(set (match_operand:SWI48 0 "register_operand")
17828 (and:SWI48
17829 (not:SWI48
17830 (lshiftrt:SWI48 (match_operand:SWI48 1 "register_operand")
17831 (match_operand:QI 2 "register_operand")))
17832 (const_int 1)))
17833 (clobber (reg:CC FLAGS_REG))]
17834 "TARGET_USE_BT && ix86_pre_reload_split ()"
17835 "#"
17836 "&& 1"
17837 [(set (reg:CCC FLAGS_REG)
17838 (compare:CCC
17839 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
17840 (const_int 0)))
17841 (set (match_dup 3)
17842 (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))
17843 (set (match_dup 0) (zero_extend:SWI48 (match_dup 3)))]
17844 "operands[3] = gen_reg_rtx (QImode);")
17845
17846 ;; Help combine recognize bt followed by setnc (PR target/110588)
17847 (define_insn_and_split "*bt<mode>_setncqi_2"
17848 [(set (match_operand:QI 0 "register_operand")
17849 (eq:QI
17850 (zero_extract:SWI48
17851 (match_operand:SWI48 1 "register_operand")
17852 (const_int 1)
17853 (match_operand:QI 2 "register_operand"))
17854 (const_int 0)))
17855 (clobber (reg:CC FLAGS_REG))]
17856 "TARGET_USE_BT && ix86_pre_reload_split ()"
17857 "#"
17858 "&& 1"
17859 [(set (reg:CCC FLAGS_REG)
17860 (compare:CCC
17861 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
17862 (const_int 0)))
17863 (set (match_dup 0)
17864 (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))])
17865
17866 ;; Help combine recognize bt followed by setc
17867 (define_insn_and_split "*bt<mode>_setc<mode>_mask"
17868 [(set (match_operand:SWI48 0 "register_operand")
17869 (zero_extract:SWI48
17870 (match_operand:SWI48 1 "register_operand")
17871 (const_int 1)
17872 (subreg:QI
17873 (and:SWI48
17874 (match_operand:SWI48 2 "register_operand")
17875 (match_operand 3 "const_int_operand")) 0)))
17876 (clobber (reg:CC FLAGS_REG))]
17877 "TARGET_USE_BT
17878 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
17879 == GET_MODE_BITSIZE (<MODE>mode)-1
17880 && ix86_pre_reload_split ()"
17881 "#"
17882 "&& 1"
17883 [(set (reg:CCC FLAGS_REG)
17884 (compare:CCC
17885 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
17886 (const_int 0)))
17887 (set (match_dup 3)
17888 (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))
17889 (set (match_dup 0) (zero_extend:SWI48 (match_dup 3)))]
17890 {
17891 operands[2] = gen_lowpart (QImode, operands[2]);
17892 operands[3] = gen_reg_rtx (QImode);
17893 })
17894 \f
17895 ;; Store-flag instructions.
17896
17897 (define_split
17898 [(set (match_operand:QI 0 "nonimmediate_operand")
17899 (match_operator:QI 1 "add_comparison_operator"
17900 [(not:SWI (match_operand:SWI 2 "register_operand"))
17901 (match_operand:SWI 3 "nonimmediate_operand")]))]
17902 ""
17903 [(set (reg:CCC FLAGS_REG)
17904 (compare:CCC
17905 (plus:SWI (match_dup 2) (match_dup 3))
17906 (match_dup 2)))
17907 (set (match_dup 0)
17908 (match_op_dup 1 [(reg:CCC FLAGS_REG) (const_int 0)]))])
17909
17910 (define_split
17911 [(set (match_operand:QI 0 "nonimmediate_operand")
17912 (match_operator:QI 1 "shr_comparison_operator"
17913 [(match_operand:DI 2 "register_operand")
17914 (match_operand 3 "const_int_operand")]))]
17915 "TARGET_64BIT
17916 && IN_RANGE (exact_log2 (UINTVAL (operands[3]) + 1), 32, 63)"
17917 [(set (reg:CCZ FLAGS_REG)
17918 (compare:CCZ
17919 (lshiftrt:DI (match_dup 2) (match_dup 4))
17920 (const_int 0)))
17921 (set (match_dup 0)
17922 (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)]))]
17923 {
17924 enum rtx_code new_code;
17925
17926 operands[1] = shallow_copy_rtx (operands[1]);
17927 switch (GET_CODE (operands[1]))
17928 {
17929 case GTU: new_code = NE; break;
17930 case LEU: new_code = EQ; break;
17931 default: gcc_unreachable ();
17932 }
17933 PUT_CODE (operands[1], new_code);
17934
17935 operands[4] = GEN_INT (exact_log2 (UINTVAL (operands[3]) + 1));
17936 })
17937
17938 ;; For all sCOND expanders, also expand the compare or test insn that
17939 ;; generates cc0. Generate an equality comparison if `seq' or `sne'.
17940
17941 (define_insn_and_split "*setcc_di_1"
17942 [(set (match_operand:DI 0 "register_operand" "=q")
17943 (match_operator:DI 1 "ix86_comparison_operator"
17944 [(reg FLAGS_REG) (const_int 0)]))]
17945 "TARGET_64BIT && !TARGET_PARTIAL_REG_STALL"
17946 "#"
17947 "&& reload_completed"
17948 [(set (match_dup 2) (match_dup 1))
17949 (set (match_dup 0) (zero_extend:DI (match_dup 2)))]
17950 {
17951 operands[1] = shallow_copy_rtx (operands[1]);
17952 PUT_MODE (operands[1], QImode);
17953 operands[2] = gen_lowpart (QImode, operands[0]);
17954 })
17955
17956 (define_insn_and_split "*setcc_<mode>_1_and"
17957 [(set (match_operand:SWI24 0 "register_operand" "=q")
17958 (match_operator:SWI24 1 "ix86_comparison_operator"
17959 [(reg FLAGS_REG) (const_int 0)]))
17960 (clobber (reg:CC FLAGS_REG))]
17961 "!TARGET_PARTIAL_REG_STALL
17962 && TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
17963 "#"
17964 "&& reload_completed"
17965 [(set (match_dup 2) (match_dup 1))
17966 (parallel [(set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))
17967 (clobber (reg:CC FLAGS_REG))])]
17968 {
17969 operands[1] = shallow_copy_rtx (operands[1]);
17970 PUT_MODE (operands[1], QImode);
17971 operands[2] = gen_lowpart (QImode, operands[0]);
17972 })
17973
17974 (define_insn_and_split "*setcc_<mode>_1_movzbl"
17975 [(set (match_operand:SWI24 0 "register_operand" "=q")
17976 (match_operator:SWI24 1 "ix86_comparison_operator"
17977 [(reg FLAGS_REG) (const_int 0)]))]
17978 "!TARGET_PARTIAL_REG_STALL
17979 && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
17980 "#"
17981 "&& reload_completed"
17982 [(set (match_dup 2) (match_dup 1))
17983 (set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))]
17984 {
17985 operands[1] = shallow_copy_rtx (operands[1]);
17986 PUT_MODE (operands[1], QImode);
17987 operands[2] = gen_lowpart (QImode, operands[0]);
17988 })
17989
17990 (define_insn "*setcc_qi"
17991 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
17992 (match_operator:QI 1 "ix86_comparison_operator"
17993 [(reg FLAGS_REG) (const_int 0)]))]
17994 ""
17995 "set%C1\t%0"
17996 [(set_attr "type" "setcc")
17997 (set_attr "mode" "QI")])
17998
17999 (define_insn "*setcc_qi_slp"
18000 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+q"))
18001 (match_operator:QI 1 "ix86_comparison_operator"
18002 [(reg FLAGS_REG) (const_int 0)]))]
18003 ""
18004 "set%C1\t%0"
18005 [(set_attr "type" "setcc")
18006 (set_attr "mode" "QI")])
18007
18008 ;; In general it is not safe to assume too much about CCmode registers,
18009 ;; so simplify-rtx stops when it sees a second one. Under certain
18010 ;; conditions this is safe on x86, so help combine not create
18011 ;;
18012 ;; seta %al
18013 ;; testb %al, %al
18014 ;; sete %al
18015
18016 (define_split
18017 [(set (match_operand:QI 0 "nonimmediate_operand")
18018 (ne:QI (match_operator 1 "ix86_comparison_operator"
18019 [(reg FLAGS_REG) (const_int 0)])
18020 (const_int 0)))]
18021 ""
18022 [(set (match_dup 0) (match_dup 1))]
18023 {
18024 operands[1] = shallow_copy_rtx (operands[1]);
18025 PUT_MODE (operands[1], QImode);
18026 })
18027
18028 (define_split
18029 [(set (strict_low_part (match_operand:QI 0 "register_operand"))
18030 (ne:QI (match_operator 1 "ix86_comparison_operator"
18031 [(reg FLAGS_REG) (const_int 0)])
18032 (const_int 0)))]
18033 ""
18034 [(set (match_dup 0) (match_dup 1))]
18035 {
18036 operands[1] = shallow_copy_rtx (operands[1]);
18037 PUT_MODE (operands[1], QImode);
18038 })
18039
18040 (define_split
18041 [(set (match_operand:QI 0 "nonimmediate_operand")
18042 (eq:QI (match_operator 1 "ix86_comparison_operator"
18043 [(reg FLAGS_REG) (const_int 0)])
18044 (const_int 0)))]
18045 ""
18046 [(set (match_dup 0) (match_dup 1))]
18047 {
18048 operands[1] = shallow_copy_rtx (operands[1]);
18049 PUT_MODE (operands[1], QImode);
18050 PUT_CODE (operands[1],
18051 ix86_reverse_condition (GET_CODE (operands[1]),
18052 GET_MODE (XEXP (operands[1], 0))));
18053
18054 /* Make sure that (a) the CCmode we have for the flags is strong
18055 enough for the reversed compare or (b) we have a valid FP compare. */
18056 if (! ix86_comparison_operator (operands[1], VOIDmode))
18057 FAIL;
18058 })
18059
18060 (define_split
18061 [(set (strict_low_part (match_operand:QI 0 "register_operand"))
18062 (eq:QI (match_operator 1 "ix86_comparison_operator"
18063 [(reg FLAGS_REG) (const_int 0)])
18064 (const_int 0)))]
18065 ""
18066 [(set (match_dup 0) (match_dup 1))]
18067 {
18068 operands[1] = shallow_copy_rtx (operands[1]);
18069 PUT_MODE (operands[1], QImode);
18070 PUT_CODE (operands[1],
18071 ix86_reverse_condition (GET_CODE (operands[1]),
18072 GET_MODE (XEXP (operands[1], 0))));
18073
18074 /* Make sure that (a) the CCmode we have for the flags is strong
18075 enough for the reversed compare or (b) we have a valid FP compare. */
18076 if (! ix86_comparison_operator (operands[1], VOIDmode))
18077 FAIL;
18078 })
18079
18080 ;; Eliminate redundant compare between set{z,nz} and j{z,nz}:
18081 ;; setz %al; test %al,%al; jz <...> -> setz %al; jnz <...> and
18082 ;; setnz %al, test %al,%al; jz <...> -> setnz %al; jz <...>.
18083 (define_peephole2
18084 [(set (match_operand:QI 0 "nonimmediate_operand")
18085 (match_operator:QI 1 "bt_comparison_operator"
18086 [(reg:CCZ FLAGS_REG) (const_int 0)]))
18087 (set (reg:CCZ FLAGS_REG)
18088 (compare:CCZ (match_dup 0) (const_int 0)))
18089 (set (pc)
18090 (if_then_else (match_operator 2 "bt_comparison_operator"
18091 [(reg:CCZ FLAGS_REG) (const_int 0)])
18092 (match_operand 3)
18093 (pc)))]
18094 "peep2_regno_dead_p (3, FLAGS_REG)"
18095 [(set (match_dup 0)
18096 (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)]))
18097 (set (pc)
18098 (if_then_else (match_dup 2)
18099 (match_dup 3)
18100 (pc)))]
18101 {
18102 if (GET_CODE (operands[1]) == EQ)
18103 {
18104 operands[2] = shallow_copy_rtx (operands[2]);
18105 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
18106 }
18107 })
18108
18109 ;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
18110 ;; subsequent logical operations are used to imitate conditional moves.
18111 ;; 0xffffffff is NaN, but not in normalized form, so we can't represent
18112 ;; it directly.
18113
18114 (define_insn "setcc_<mode>_sse"
18115 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
18116 (match_operator:MODEF 3 "sse_comparison_operator"
18117 [(match_operand:MODEF 1 "register_operand" "0,x")
18118 (match_operand:MODEF 2 "nonimmediate_operand" "xm,xjm")]))]
18119 "SSE_FLOAT_MODE_P (<MODE>mode)"
18120 "@
18121 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
18122 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18123 [(set_attr "isa" "noavx,avx")
18124 (set_attr "addr" "*,gpr16")
18125 (set_attr "type" "ssecmp")
18126 (set_attr "length_immediate" "1")
18127 (set_attr "prefix" "orig,vex")
18128 (set_attr "mode" "<MODE>")])
18129
18130 (define_insn "setcc_hf_mask"
18131 [(set (match_operand:QI 0 "register_operand" "=k")
18132 (unspec:QI
18133 [(match_operand:HF 1 "register_operand" "v")
18134 (match_operand:HF 2 "nonimmediate_operand" "vm")
18135 (match_operand:SI 3 "const_0_to_31_operand")]
18136 UNSPEC_PCMP))]
18137 "TARGET_AVX512FP16"
18138 "vcmpsh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18139 [(set_attr "type" "ssecmp")
18140 (set_attr "prefix" "evex")
18141 (set_attr "mode" "HF")])
18142
18143 \f
18144 ;; Basic conditional jump instructions.
18145
18146 (define_split
18147 [(set (pc)
18148 (if_then_else
18149 (match_operator 1 "add_comparison_operator"
18150 [(not:SWI (match_operand:SWI 2 "register_operand"))
18151 (match_operand:SWI 3 "nonimmediate_operand")])
18152 (label_ref (match_operand 0))
18153 (pc)))]
18154 ""
18155 [(set (reg:CCC FLAGS_REG)
18156 (compare:CCC
18157 (plus:SWI (match_dup 2) (match_dup 3))
18158 (match_dup 2)))
18159 (set (pc)
18160 (if_then_else (match_op_dup 1 [(reg:CCC FLAGS_REG) (const_int 0)])
18161 (label_ref (match_operand 0))
18162 (pc)))])
18163
18164 (define_split
18165 [(set (pc)
18166 (if_then_else
18167 (match_operator 1 "shr_comparison_operator"
18168 [(match_operand:DI 2 "register_operand")
18169 (match_operand 3 "const_int_operand")])
18170 (label_ref (match_operand 0))
18171 (pc)))]
18172 "TARGET_64BIT
18173 && IN_RANGE (exact_log2 (UINTVAL (operands[3]) + 1), 32, 63)"
18174 [(set (reg:CCZ FLAGS_REG)
18175 (compare:CCZ
18176 (lshiftrt:DI (match_dup 2) (match_dup 4))
18177 (const_int 0)))
18178 (set (pc)
18179 (if_then_else (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)])
18180 (label_ref (match_operand 0))
18181 (pc)))]
18182 {
18183 enum rtx_code new_code;
18184
18185 operands[1] = shallow_copy_rtx (operands[1]);
18186 switch (GET_CODE (operands[1]))
18187 {
18188 case GTU: new_code = NE; break;
18189 case LEU: new_code = EQ; break;
18190 default: gcc_unreachable ();
18191 }
18192 PUT_CODE (operands[1], new_code);
18193
18194 operands[4] = GEN_INT (exact_log2 (UINTVAL (operands[3]) + 1));
18195 })
18196
18197 ;; We ignore the overflow flag for signed branch instructions.
18198
18199 (define_insn "*jcc"
18200 [(set (pc)
18201 (if_then_else (match_operator 1 "ix86_comparison_operator"
18202 [(reg FLAGS_REG) (const_int 0)])
18203 (label_ref (match_operand 0))
18204 (pc)))]
18205 ""
18206 "%!%+j%C1\t%l0"
18207 [(set_attr "type" "ibr")
18208 (set_attr "modrm" "0")
18209 (set (attr "length")
18210 (if_then_else
18211 (and (ge (minus (match_dup 0) (pc))
18212 (const_int -126))
18213 (lt (minus (match_dup 0) (pc))
18214 (const_int 128)))
18215 (const_int 2)
18216 (const_int 6)))])
18217
18218 ;; In general it is not safe to assume too much about CCmode registers,
18219 ;; so simplify-rtx stops when it sees a second one. Under certain
18220 ;; conditions this is safe on x86, so help combine not create
18221 ;;
18222 ;; seta %al
18223 ;; testb %al, %al
18224 ;; je Lfoo
18225
18226 (define_split
18227 [(set (pc)
18228 (if_then_else (ne (match_operator 0 "ix86_comparison_operator"
18229 [(reg FLAGS_REG) (const_int 0)])
18230 (const_int 0))
18231 (label_ref (match_operand 1))
18232 (pc)))]
18233 ""
18234 [(set (pc)
18235 (if_then_else (match_dup 0)
18236 (label_ref (match_dup 1))
18237 (pc)))]
18238 {
18239 operands[0] = shallow_copy_rtx (operands[0]);
18240 PUT_MODE (operands[0], VOIDmode);
18241 })
18242
18243 (define_split
18244 [(set (pc)
18245 (if_then_else (eq (match_operator 0 "ix86_comparison_operator"
18246 [(reg FLAGS_REG) (const_int 0)])
18247 (const_int 0))
18248 (label_ref (match_operand 1))
18249 (pc)))]
18250 ""
18251 [(set (pc)
18252 (if_then_else (match_dup 0)
18253 (label_ref (match_dup 1))
18254 (pc)))]
18255 {
18256 operands[0] = shallow_copy_rtx (operands[0]);
18257 PUT_MODE (operands[0], VOIDmode);
18258 PUT_CODE (operands[0],
18259 ix86_reverse_condition (GET_CODE (operands[0]),
18260 GET_MODE (XEXP (operands[0], 0))));
18261
18262 /* Make sure that (a) the CCmode we have for the flags is strong
18263 enough for the reversed compare or (b) we have a valid FP compare. */
18264 if (! ix86_comparison_operator (operands[0], VOIDmode))
18265 FAIL;
18266 })
18267 \f
18268 ;; Unconditional and other jump instructions
18269
18270 (define_insn "jump"
18271 [(set (pc)
18272 (label_ref (match_operand 0)))]
18273 ""
18274 "%!jmp\t%l0"
18275 [(set_attr "type" "ibr")
18276 (set_attr "modrm" "0")
18277 (set (attr "length")
18278 (if_then_else
18279 (and (ge (minus (match_dup 0) (pc))
18280 (const_int -126))
18281 (lt (minus (match_dup 0) (pc))
18282 (const_int 128)))
18283 (const_int 2)
18284 (const_int 5)))])
18285
18286 (define_expand "indirect_jump"
18287 [(set (pc) (match_operand 0 "indirect_branch_operand"))]
18288 ""
18289 {
18290 if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER)
18291 operands[0] = convert_memory_address (word_mode, operands[0]);
18292 cfun->machine->has_local_indirect_jump = true;
18293 })
18294
18295 (define_insn "*indirect_jump"
18296 [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))]
18297 ""
18298 "* return ix86_output_indirect_jmp (operands[0]);"
18299 [(set (attr "type")
18300 (if_then_else (match_test "(cfun->machine->indirect_branch_type
18301 != indirect_branch_keep)")
18302 (const_string "multi")
18303 (const_string "ibr")))
18304 (set_attr "length_immediate" "0")])
18305
18306 (define_expand "tablejump"
18307 [(parallel [(set (pc) (match_operand 0 "indirect_branch_operand"))
18308 (use (label_ref (match_operand 1)))])]
18309 ""
18310 {
18311 /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit)
18312 relative. Convert the relative address to an absolute address. */
18313 if (flag_pic)
18314 {
18315 rtx op0, op1;
18316 enum rtx_code code;
18317
18318 /* We can't use @GOTOFF for text labels on VxWorks;
18319 see gotoff_operand. */
18320 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
18321 {
18322 code = PLUS;
18323 op0 = operands[0];
18324 op1 = gen_rtx_LABEL_REF (Pmode, operands[1]);
18325 }
18326 else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA)
18327 {
18328 code = PLUS;
18329 op0 = operands[0];
18330 op1 = pic_offset_table_rtx;
18331 }
18332 else
18333 {
18334 code = MINUS;
18335 op0 = pic_offset_table_rtx;
18336 op1 = operands[0];
18337 }
18338
18339 operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0,
18340 OPTAB_DIRECT);
18341 }
18342
18343 if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER)
18344 operands[0] = convert_memory_address (word_mode, operands[0]);
18345 cfun->machine->has_local_indirect_jump = true;
18346 })
18347
18348 (define_insn "*tablejump_1"
18349 [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))
18350 (use (label_ref (match_operand 1)))]
18351 ""
18352 "* return ix86_output_indirect_jmp (operands[0]);"
18353 [(set (attr "type")
18354 (if_then_else (match_test "(cfun->machine->indirect_branch_type
18355 != indirect_branch_keep)")
18356 (const_string "multi")
18357 (const_string "ibr")))
18358 (set_attr "length_immediate" "0")])
18359 \f
18360 ;; Convert setcc + movzbl to xor + setcc if operands don't overlap.
18361
18362 (define_peephole2
18363 [(set (match_operand 4 "flags_reg_operand") (match_operand 0))
18364 (set (match_operand:QI 1 "register_operand")
18365 (match_operator:QI 2 "ix86_comparison_operator"
18366 [(reg FLAGS_REG) (const_int 0)]))
18367 (set (match_operand 3 "any_QIreg_operand")
18368 (zero_extend (match_dup 1)))]
18369 "(peep2_reg_dead_p (3, operands[1])
18370 || operands_match_p (operands[1], operands[3]))
18371 && ! reg_overlap_mentioned_p (operands[3], operands[0])
18372 && peep2_regno_dead_p (0, FLAGS_REG)"
18373 [(set (match_dup 4) (match_dup 0))
18374 (set (strict_low_part (match_dup 5))
18375 (match_dup 2))]
18376 {
18377 operands[5] = gen_lowpart (QImode, operands[3]);
18378 ix86_expand_clear (operands[3]);
18379 })
18380
18381 (define_peephole2
18382 [(parallel [(set (match_operand 5 "flags_reg_operand") (match_operand 0))
18383 (match_operand 4)])
18384 (set (match_operand:QI 1 "register_operand")
18385 (match_operator:QI 2 "ix86_comparison_operator"
18386 [(reg FLAGS_REG) (const_int 0)]))
18387 (set (match_operand 3 "any_QIreg_operand")
18388 (zero_extend (match_dup 1)))]
18389 "(peep2_reg_dead_p (3, operands[1])
18390 || operands_match_p (operands[1], operands[3]))
18391 && ! reg_overlap_mentioned_p (operands[3], operands[0])
18392 && ! reg_overlap_mentioned_p (operands[3], operands[4])
18393 && ! reg_set_p (operands[3], operands[4])
18394 && peep2_regno_dead_p (0, FLAGS_REG)"
18395 [(parallel [(set (match_dup 5) (match_dup 0))
18396 (match_dup 4)])
18397 (set (strict_low_part (match_dup 6))
18398 (match_dup 2))]
18399 {
18400 operands[6] = gen_lowpart (QImode, operands[3]);
18401 ix86_expand_clear (operands[3]);
18402 })
18403
18404 (define_peephole2
18405 [(set (match_operand 6 "flags_reg_operand") (match_operand 0))
18406 (parallel [(set (match_operand 7 "flags_reg_operand") (match_operand 1))
18407 (match_operand 5)])
18408 (set (match_operand:QI 2 "register_operand")
18409 (match_operator:QI 3 "ix86_comparison_operator"
18410 [(reg FLAGS_REG) (const_int 0)]))
18411 (set (match_operand 4 "any_QIreg_operand")
18412 (zero_extend (match_dup 2)))]
18413 "(peep2_reg_dead_p (4, operands[2])
18414 || operands_match_p (operands[2], operands[4]))
18415 && ! reg_overlap_mentioned_p (operands[4], operands[0])
18416 && ! reg_overlap_mentioned_p (operands[4], operands[1])
18417 && ! reg_overlap_mentioned_p (operands[4], operands[5])
18418 && ! reg_set_p (operands[4], operands[5])
18419 && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
18420 && peep2_regno_dead_p (0, FLAGS_REG)"
18421 [(set (match_dup 6) (match_dup 0))
18422 (parallel [(set (match_dup 7) (match_dup 1))
18423 (match_dup 5)])
18424 (set (strict_low_part (match_dup 8))
18425 (match_dup 3))]
18426 {
18427 operands[8] = gen_lowpart (QImode, operands[4]);
18428 ix86_expand_clear (operands[4]);
18429 })
18430
18431 ;; Similar, but match zero extend with andsi3.
18432
18433 (define_peephole2
18434 [(set (match_operand 4 "flags_reg_operand") (match_operand 0))
18435 (set (match_operand:QI 1 "register_operand")
18436 (match_operator:QI 2 "ix86_comparison_operator"
18437 [(reg FLAGS_REG) (const_int 0)]))
18438 (parallel [(set (match_operand:SI 3 "any_QIreg_operand")
18439 (and:SI (match_dup 3) (const_int 255)))
18440 (clobber (reg:CC FLAGS_REG))])]
18441 "REGNO (operands[1]) == REGNO (operands[3])
18442 && ! reg_overlap_mentioned_p (operands[3], operands[0])
18443 && peep2_regno_dead_p (0, FLAGS_REG)"
18444 [(set (match_dup 4) (match_dup 0))
18445 (set (strict_low_part (match_dup 5))
18446 (match_dup 2))]
18447 {
18448 operands[5] = gen_lowpart (QImode, operands[3]);
18449 ix86_expand_clear (operands[3]);
18450 })
18451
18452 (define_peephole2
18453 [(parallel [(set (match_operand 5 "flags_reg_operand") (match_operand 0))
18454 (match_operand 4)])
18455 (set (match_operand:QI 1 "register_operand")
18456 (match_operator:QI 2 "ix86_comparison_operator"
18457 [(reg FLAGS_REG) (const_int 0)]))
18458 (parallel [(set (match_operand 3 "any_QIreg_operand")
18459 (zero_extend (match_dup 1)))
18460 (clobber (reg:CC FLAGS_REG))])]
18461 "(peep2_reg_dead_p (3, operands[1])
18462 || operands_match_p (operands[1], operands[3]))
18463 && ! reg_overlap_mentioned_p (operands[3], operands[0])
18464 && ! reg_overlap_mentioned_p (operands[3], operands[4])
18465 && ! reg_set_p (operands[3], operands[4])
18466 && peep2_regno_dead_p (0, FLAGS_REG)"
18467 [(parallel [(set (match_dup 5) (match_dup 0))
18468 (match_dup 4)])
18469 (set (strict_low_part (match_dup 6))
18470 (match_dup 2))]
18471 {
18472 operands[6] = gen_lowpart (QImode, operands[3]);
18473 ix86_expand_clear (operands[3]);
18474 })
18475
18476 (define_peephole2
18477 [(set (match_operand 6 "flags_reg_operand") (match_operand 0))
18478 (parallel [(set (match_operand 7 "flags_reg_operand") (match_operand 1))
18479 (match_operand 5)])
18480 (set (match_operand:QI 2 "register_operand")
18481 (match_operator:QI 3 "ix86_comparison_operator"
18482 [(reg FLAGS_REG) (const_int 0)]))
18483 (parallel [(set (match_operand 4 "any_QIreg_operand")
18484 (zero_extend (match_dup 2)))
18485 (clobber (reg:CC FLAGS_REG))])]
18486 "(peep2_reg_dead_p (4, operands[2])
18487 || operands_match_p (operands[2], operands[4]))
18488 && ! reg_overlap_mentioned_p (operands[4], operands[0])
18489 && ! reg_overlap_mentioned_p (operands[4], operands[1])
18490 && ! reg_overlap_mentioned_p (operands[4], operands[5])
18491 && ! reg_set_p (operands[4], operands[5])
18492 && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
18493 && peep2_regno_dead_p (0, FLAGS_REG)"
18494 [(set (match_dup 6) (match_dup 0))
18495 (parallel [(set (match_dup 7) (match_dup 1))
18496 (match_dup 5)])
18497 (set (strict_low_part (match_dup 8))
18498 (match_dup 3))]
18499 {
18500 operands[8] = gen_lowpart (QImode, operands[4]);
18501 ix86_expand_clear (operands[4]);
18502 })
18503 \f
18504 ;; Call instructions.
18505
18506 ;; The predicates normally associated with named expanders are not properly
18507 ;; checked for calls. This is a bug in the generic code, but it isn't that
18508 ;; easy to fix. Ignore it for now and be prepared to fix things up.
18509
18510 ;; P6 processors will jump to the address after the decrement when %esp
18511 ;; is used as a call operand, so they will execute return address as a code.
18512 ;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17.
18513
18514 ;; Register constraint for call instruction.
18515 (define_mode_attr c [(SI "l") (DI "r")])
18516
18517 ;; Call subroutine returning no value.
18518
18519 (define_expand "call"
18520 [(call (match_operand:QI 0)
18521 (match_operand 1))
18522 (use (match_operand 2))]
18523 ""
18524 {
18525 ix86_expand_call (NULL, operands[0], operands[1],
18526 operands[2], NULL, false);
18527 DONE;
18528 })
18529
18530 (define_expand "sibcall"
18531 [(call (match_operand:QI 0)
18532 (match_operand 1))
18533 (use (match_operand 2))]
18534 ""
18535 {
18536 ix86_expand_call (NULL, operands[0], operands[1],
18537 operands[2], NULL, true);
18538 DONE;
18539 })
18540
18541 (define_insn "*call"
18542 [(call (mem:QI (match_operand:W 0 "call_insn_operand" "<c>BwBz"))
18543 (match_operand 1))]
18544 "!SIBLING_CALL_P (insn)"
18545 "* return ix86_output_call_insn (insn, operands[0]);"
18546 [(set_attr "type" "call")])
18547
18548 ;; This covers both call and sibcall since only GOT slot is allowed.
18549 (define_insn "*call_got_x32"
18550 [(call (mem:QI (zero_extend:DI
18551 (match_operand:SI 0 "GOT_memory_operand" "Bg")))
18552 (match_operand 1))]
18553 "TARGET_X32"
18554 {
18555 rtx fnaddr = gen_const_mem (DImode, XEXP (operands[0], 0));
18556 return ix86_output_call_insn (insn, fnaddr);
18557 }
18558 [(set_attr "type" "call")])
18559
18560 ;; Since sibcall never returns, we can only use call-clobbered register
18561 ;; as GOT base.
18562 (define_insn "*sibcall_GOT_32"
18563 [(call (mem:QI
18564 (mem:SI (plus:SI
18565 (match_operand:SI 0 "register_no_elim_operand" "U")
18566 (match_operand:SI 1 "GOT32_symbol_operand"))))
18567 (match_operand 2))]
18568 "!TARGET_MACHO
18569 && !TARGET_64BIT
18570 && !TARGET_INDIRECT_BRANCH_REGISTER
18571 && SIBLING_CALL_P (insn)"
18572 {
18573 rtx fnaddr = gen_rtx_PLUS (SImode, operands[0], operands[1]);
18574 fnaddr = gen_const_mem (SImode, fnaddr);
18575 return ix86_output_call_insn (insn, fnaddr);
18576 }
18577 [(set_attr "type" "call")])
18578
18579 (define_insn "*sibcall"
18580 [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "UBsBz"))
18581 (match_operand 1))]
18582 "SIBLING_CALL_P (insn)"
18583 "* return ix86_output_call_insn (insn, operands[0]);"
18584 [(set_attr "type" "call")])
18585
18586 (define_insn "*sibcall_memory"
18587 [(call (mem:QI (match_operand:W 0 "memory_operand" "m"))
18588 (match_operand 1))
18589 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
18590 "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
18591 "* return ix86_output_call_insn (insn, operands[0]);"
18592 [(set_attr "type" "call")])
18593
18594 (define_peephole2
18595 [(set (match_operand:W 0 "register_operand")
18596 (match_operand:W 1 "memory_operand"))
18597 (call (mem:QI (match_dup 0))
18598 (match_operand 3))]
18599 "!TARGET_X32
18600 && !TARGET_INDIRECT_BRANCH_REGISTER
18601 && SIBLING_CALL_P (peep2_next_insn (1))
18602 && !reg_mentioned_p (operands[0],
18603 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
18604 [(parallel [(call (mem:QI (match_dup 1))
18605 (match_dup 3))
18606 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
18607
18608 (define_peephole2
18609 [(set (match_operand:W 0 "register_operand")
18610 (match_operand:W 1 "memory_operand"))
18611 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
18612 (call (mem:QI (match_dup 0))
18613 (match_operand 3))]
18614 "!TARGET_X32
18615 && !TARGET_INDIRECT_BRANCH_REGISTER
18616 && SIBLING_CALL_P (peep2_next_insn (2))
18617 && !reg_mentioned_p (operands[0],
18618 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
18619 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
18620 (parallel [(call (mem:QI (match_dup 1))
18621 (match_dup 3))
18622 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
18623
18624 (define_expand "call_pop"
18625 [(parallel [(call (match_operand:QI 0)
18626 (match_operand:SI 1))
18627 (set (reg:SI SP_REG)
18628 (plus:SI (reg:SI SP_REG)
18629 (match_operand:SI 3)))])]
18630 "!TARGET_64BIT"
18631 {
18632 ix86_expand_call (NULL, operands[0], operands[1],
18633 operands[2], operands[3], false);
18634 DONE;
18635 })
18636
18637 (define_insn "*call_pop"
18638 [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lBwBz"))
18639 (match_operand 1))
18640 (set (reg:SI SP_REG)
18641 (plus:SI (reg:SI SP_REG)
18642 (match_operand:SI 2 "immediate_operand" "i")))]
18643 "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
18644 "* return ix86_output_call_insn (insn, operands[0]);"
18645 [(set_attr "type" "call")])
18646
18647 (define_insn "*sibcall_pop"
18648 [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "UBsBz"))
18649 (match_operand 1))
18650 (set (reg:SI SP_REG)
18651 (plus:SI (reg:SI SP_REG)
18652 (match_operand:SI 2 "immediate_operand" "i")))]
18653 "!TARGET_64BIT && SIBLING_CALL_P (insn)"
18654 "* return ix86_output_call_insn (insn, operands[0]);"
18655 [(set_attr "type" "call")])
18656
18657 (define_insn "*sibcall_pop_memory"
18658 [(call (mem:QI (match_operand:SI 0 "memory_operand" "Bs"))
18659 (match_operand 1))
18660 (set (reg:SI SP_REG)
18661 (plus:SI (reg:SI SP_REG)
18662 (match_operand:SI 2 "immediate_operand" "i")))
18663 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
18664 "!TARGET_64BIT"
18665 "* return ix86_output_call_insn (insn, operands[0]);"
18666 [(set_attr "type" "call")])
18667
18668 (define_peephole2
18669 [(set (match_operand:SI 0 "register_operand")
18670 (match_operand:SI 1 "memory_operand"))
18671 (parallel [(call (mem:QI (match_dup 0))
18672 (match_operand 3))
18673 (set (reg:SI SP_REG)
18674 (plus:SI (reg:SI SP_REG)
18675 (match_operand:SI 4 "immediate_operand")))])]
18676 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (1))
18677 && !reg_mentioned_p (operands[0],
18678 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
18679 [(parallel [(call (mem:QI (match_dup 1))
18680 (match_dup 3))
18681 (set (reg:SI SP_REG)
18682 (plus:SI (reg:SI SP_REG)
18683 (match_dup 4)))
18684 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
18685
18686 (define_peephole2
18687 [(set (match_operand:SI 0 "register_operand")
18688 (match_operand:SI 1 "memory_operand"))
18689 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
18690 (parallel [(call (mem:QI (match_dup 0))
18691 (match_operand 3))
18692 (set (reg:SI SP_REG)
18693 (plus:SI (reg:SI SP_REG)
18694 (match_operand:SI 4 "immediate_operand")))])]
18695 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (2))
18696 && !reg_mentioned_p (operands[0],
18697 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
18698 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
18699 (parallel [(call (mem:QI (match_dup 1))
18700 (match_dup 3))
18701 (set (reg:SI SP_REG)
18702 (plus:SI (reg:SI SP_REG)
18703 (match_dup 4)))
18704 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
18705
18706 ;; Combining simple memory jump instruction
18707
18708 (define_peephole2
18709 [(set (match_operand:W 0 "register_operand")
18710 (match_operand:W 1 "memory_operand"))
18711 (set (pc) (match_dup 0))]
18712 "!TARGET_X32
18713 && !TARGET_INDIRECT_BRANCH_REGISTER
18714 && peep2_reg_dead_p (2, operands[0])"
18715 [(set (pc) (match_dup 1))])
18716
18717 ;; Call subroutine, returning value in operand 0
18718
18719 (define_expand "call_value"
18720 [(set (match_operand 0)
18721 (call (match_operand:QI 1)
18722 (match_operand 2)))
18723 (use (match_operand 3))]
18724 ""
18725 {
18726 ix86_expand_call (operands[0], operands[1], operands[2],
18727 operands[3], NULL, false);
18728 DONE;
18729 })
18730
18731 (define_expand "sibcall_value"
18732 [(set (match_operand 0)
18733 (call (match_operand:QI 1)
18734 (match_operand 2)))
18735 (use (match_operand 3))]
18736 ""
18737 {
18738 ix86_expand_call (operands[0], operands[1], operands[2],
18739 operands[3], NULL, true);
18740 DONE;
18741 })
18742
18743 (define_insn "*call_value"
18744 [(set (match_operand 0)
18745 (call (mem:QI (match_operand:W 1 "call_insn_operand" "<c>BwBz"))
18746 (match_operand 2)))]
18747 "!SIBLING_CALL_P (insn)"
18748 "* return ix86_output_call_insn (insn, operands[1]);"
18749 [(set_attr "type" "callv")])
18750
18751 ;; This covers both call and sibcall since only GOT slot is allowed.
18752 (define_insn "*call_value_got_x32"
18753 [(set (match_operand 0)
18754 (call (mem:QI
18755 (zero_extend:DI
18756 (match_operand:SI 1 "GOT_memory_operand" "Bg")))
18757 (match_operand 2)))]
18758 "TARGET_X32"
18759 {
18760 rtx fnaddr = gen_const_mem (DImode, XEXP (operands[1], 0));
18761 return ix86_output_call_insn (insn, fnaddr);
18762 }
18763 [(set_attr "type" "callv")])
18764
18765 ;; Since sibcall never returns, we can only use call-clobbered register
18766 ;; as GOT base.
18767 (define_insn "*sibcall_value_GOT_32"
18768 [(set (match_operand 0)
18769 (call (mem:QI
18770 (mem:SI (plus:SI
18771 (match_operand:SI 1 "register_no_elim_operand" "U")
18772 (match_operand:SI 2 "GOT32_symbol_operand"))))
18773 (match_operand 3)))]
18774 "!TARGET_MACHO
18775 && !TARGET_64BIT
18776 && !TARGET_INDIRECT_BRANCH_REGISTER
18777 && SIBLING_CALL_P (insn)"
18778 {
18779 rtx fnaddr = gen_rtx_PLUS (SImode, operands[1], operands[2]);
18780 fnaddr = gen_const_mem (SImode, fnaddr);
18781 return ix86_output_call_insn (insn, fnaddr);
18782 }
18783 [(set_attr "type" "callv")])
18784
18785 (define_insn "*sibcall_value"
18786 [(set (match_operand 0)
18787 (call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "UBsBz"))
18788 (match_operand 2)))]
18789 "SIBLING_CALL_P (insn)"
18790 "* return ix86_output_call_insn (insn, operands[1]);"
18791 [(set_attr "type" "callv")])
18792
18793 (define_insn "*sibcall_value_memory"
18794 [(set (match_operand 0)
18795 (call (mem:QI (match_operand:W 1 "memory_operand" "m"))
18796 (match_operand 2)))
18797 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
18798 "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
18799 "* return ix86_output_call_insn (insn, operands[1]);"
18800 [(set_attr "type" "callv")])
18801
18802 (define_peephole2
18803 [(set (match_operand:W 0 "register_operand")
18804 (match_operand:W 1 "memory_operand"))
18805 (set (match_operand 2)
18806 (call (mem:QI (match_dup 0))
18807 (match_operand 3)))]
18808 "!TARGET_X32
18809 && !TARGET_INDIRECT_BRANCH_REGISTER
18810 && SIBLING_CALL_P (peep2_next_insn (1))
18811 && !reg_mentioned_p (operands[0],
18812 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
18813 [(parallel [(set (match_dup 2)
18814 (call (mem:QI (match_dup 1))
18815 (match_dup 3)))
18816 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
18817
18818 (define_peephole2
18819 [(set (match_operand:W 0 "register_operand")
18820 (match_operand:W 1 "memory_operand"))
18821 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
18822 (set (match_operand 2)
18823 (call (mem:QI (match_dup 0))
18824 (match_operand 3)))]
18825 "!TARGET_X32
18826 && !TARGET_INDIRECT_BRANCH_REGISTER
18827 && SIBLING_CALL_P (peep2_next_insn (2))
18828 && !reg_mentioned_p (operands[0],
18829 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
18830 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
18831 (parallel [(set (match_dup 2)
18832 (call (mem:QI (match_dup 1))
18833 (match_dup 3)))
18834 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
18835
18836 (define_expand "call_value_pop"
18837 [(parallel [(set (match_operand 0)
18838 (call (match_operand:QI 1)
18839 (match_operand:SI 2)))
18840 (set (reg:SI SP_REG)
18841 (plus:SI (reg:SI SP_REG)
18842 (match_operand:SI 4)))])]
18843 "!TARGET_64BIT"
18844 {
18845 ix86_expand_call (operands[0], operands[1], operands[2],
18846 operands[3], operands[4], false);
18847 DONE;
18848 })
18849
18850 (define_insn "*call_value_pop"
18851 [(set (match_operand 0)
18852 (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lBwBz"))
18853 (match_operand 2)))
18854 (set (reg:SI SP_REG)
18855 (plus:SI (reg:SI SP_REG)
18856 (match_operand:SI 3 "immediate_operand" "i")))]
18857 "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
18858 "* return ix86_output_call_insn (insn, operands[1]);"
18859 [(set_attr "type" "callv")])
18860
18861 (define_insn "*sibcall_value_pop"
18862 [(set (match_operand 0)
18863 (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "UBsBz"))
18864 (match_operand 2)))
18865 (set (reg:SI SP_REG)
18866 (plus:SI (reg:SI SP_REG)
18867 (match_operand:SI 3 "immediate_operand" "i")))]
18868 "!TARGET_64BIT && SIBLING_CALL_P (insn)"
18869 "* return ix86_output_call_insn (insn, operands[1]);"
18870 [(set_attr "type" "callv")])
18871
18872 (define_insn "*sibcall_value_pop_memory"
18873 [(set (match_operand 0)
18874 (call (mem:QI (match_operand:SI 1 "memory_operand" "m"))
18875 (match_operand 2)))
18876 (set (reg:SI SP_REG)
18877 (plus:SI (reg:SI SP_REG)
18878 (match_operand:SI 3 "immediate_operand" "i")))
18879 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
18880 "!TARGET_64BIT"
18881 "* return ix86_output_call_insn (insn, operands[1]);"
18882 [(set_attr "type" "callv")])
18883
18884 (define_peephole2
18885 [(set (match_operand:SI 0 "register_operand")
18886 (match_operand:SI 1 "memory_operand"))
18887 (parallel [(set (match_operand 2)
18888 (call (mem:QI (match_dup 0))
18889 (match_operand 3)))
18890 (set (reg:SI SP_REG)
18891 (plus:SI (reg:SI SP_REG)
18892 (match_operand:SI 4 "immediate_operand")))])]
18893 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (1))
18894 && !reg_mentioned_p (operands[0],
18895 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
18896 [(parallel [(set (match_dup 2)
18897 (call (mem:QI (match_dup 1))
18898 (match_dup 3)))
18899 (set (reg:SI SP_REG)
18900 (plus:SI (reg:SI SP_REG)
18901 (match_dup 4)))
18902 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
18903
18904 (define_peephole2
18905 [(set (match_operand:SI 0 "register_operand")
18906 (match_operand:SI 1 "memory_operand"))
18907 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
18908 (parallel [(set (match_operand 2)
18909 (call (mem:QI (match_dup 0))
18910 (match_operand 3)))
18911 (set (reg:SI SP_REG)
18912 (plus:SI (reg:SI SP_REG)
18913 (match_operand:SI 4 "immediate_operand")))])]
18914 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (2))
18915 && !reg_mentioned_p (operands[0],
18916 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
18917 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
18918 (parallel [(set (match_dup 2)
18919 (call (mem:QI (match_dup 1))
18920 (match_dup 3)))
18921 (set (reg:SI SP_REG)
18922 (plus:SI (reg:SI SP_REG)
18923 (match_dup 4)))
18924 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
18925
18926 ;; Call subroutine returning any type.
18927
18928 (define_expand "untyped_call"
18929 [(parallel [(call (match_operand 0)
18930 (const_int 0))
18931 (match_operand 1)
18932 (match_operand 2)])]
18933 ""
18934 {
18935 int i;
18936
18937 /* In order to give reg-stack an easier job in validating two
18938 coprocessor registers as containing a possible return value,
18939 simply pretend the untyped call returns a complex long double
18940 value.
18941
18942 We can't use SSE_REGPARM_MAX here since callee is unprototyped
18943 and should have the default ABI. */
18944
18945 ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387
18946 ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
18947 operands[0], const0_rtx,
18948 GEN_INT ((TARGET_64BIT
18949 ? (ix86_abi == SYSV_ABI
18950 ? X86_64_SSE_REGPARM_MAX
18951 : X86_64_MS_SSE_REGPARM_MAX)
18952 : X86_32_SSE_REGPARM_MAX)
18953 - 1),
18954 NULL, false);
18955
18956 for (i = 0; i < XVECLEN (operands[2], 0); i++)
18957 {
18958 rtx set = XVECEXP (operands[2], 0, i);
18959 emit_move_insn (SET_DEST (set), SET_SRC (set));
18960 }
18961
18962 /* The optimizer does not know that the call sets the function value
18963 registers we stored in the result block. We avoid problems by
18964 claiming that all hard registers are used and clobbered at this
18965 point. */
18966 emit_insn (gen_blockage ());
18967
18968 DONE;
18969 })
18970 \f
18971 ;; Prologue and epilogue instructions
18972
18973 ;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
18974 ;; all of memory. This blocks insns from being moved across this point.
18975
18976 (define_insn "blockage"
18977 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
18978 ""
18979 ""
18980 [(set_attr "length" "0")])
18981
18982 ;; Do not schedule instructions accessing memory across this point.
18983
18984 (define_expand "memory_blockage"
18985 [(set (match_dup 0)
18986 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
18987 ""
18988 {
18989 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
18990 MEM_VOLATILE_P (operands[0]) = 1;
18991 })
18992
18993 (define_insn "*memory_blockage"
18994 [(set (match_operand:BLK 0)
18995 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
18996 ""
18997 ""
18998 [(set_attr "length" "0")])
18999
19000 ;; As USE insns aren't meaningful after reload, this is used instead
19001 ;; to prevent deleting instructions setting registers for PIC code
19002 (define_insn "prologue_use"
19003 [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)]
19004 ""
19005 ""
19006 [(set_attr "length" "0")])
19007
19008 ;; Insn emitted into the body of a function to return from a function.
19009 ;; This is only done if the function's epilogue is known to be simple.
19010 ;; See comments for ix86_can_use_return_insn_p in i386.cc.
19011
19012 (define_expand "return"
19013 [(simple_return)]
19014 "ix86_can_use_return_insn_p ()"
19015 {
19016 if (crtl->args.pops_args)
19017 {
19018 rtx popc = GEN_INT (crtl->args.pops_args);
19019 emit_jump_insn (gen_simple_return_pop_internal (popc));
19020 DONE;
19021 }
19022 })
19023
19024 ;; We need to disable this for TARGET_SEH, as otherwise
19025 ;; shrink-wrapped prologue gets enabled too. This might exceed
19026 ;; the maximum size of prologue in unwind information.
19027 ;; Also disallow shrink-wrapping if using stack slot to pass the
19028 ;; static chain pointer - the first instruction has to be pushl %esi
19029 ;; and it can't be moved around, as we use alternate entry points
19030 ;; in that case.
19031 ;; Also disallow for ms_hook_prologue functions which have frame
19032 ;; pointer set up in function label which is correctly handled in
19033 ;; ix86_expand_{prologue|epligoue}() only.
19034
19035 (define_expand "simple_return"
19036 [(simple_return)]
19037 "!TARGET_SEH && !ix86_static_chain_on_stack && !ix86_function_ms_hook_prologue (cfun->decl)"
19038 {
19039 if (crtl->args.pops_args)
19040 {
19041 rtx popc = GEN_INT (crtl->args.pops_args);
19042 emit_jump_insn (gen_simple_return_pop_internal (popc));
19043 DONE;
19044 }
19045 })
19046
19047 (define_insn "simple_return_internal"
19048 [(simple_return)]
19049 "reload_completed"
19050 "* return ix86_output_function_return (false);"
19051 [(set_attr "length" "1")
19052 (set_attr "atom_unit" "jeu")
19053 (set_attr "length_immediate" "0")
19054 (set_attr "modrm" "0")])
19055
19056 (define_insn "interrupt_return"
19057 [(simple_return)
19058 (unspec [(const_int 0)] UNSPEC_INTERRUPT_RETURN)]
19059 "reload_completed"
19060 {
19061 return TARGET_64BIT ? (TARGET_UINTR ? "uiret" : "iretq") : "iret";
19062 })
19063
19064 ;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
19065 ;; instruction Athlon and K8 have.
19066
19067 (define_insn "simple_return_internal_long"
19068 [(simple_return)
19069 (unspec [(const_int 0)] UNSPEC_REP)]
19070 "reload_completed"
19071 "* return ix86_output_function_return (true);"
19072 [(set_attr "length" "2")
19073 (set_attr "atom_unit" "jeu")
19074 (set_attr "length_immediate" "0")
19075 (set_attr "prefix_rep" "1")
19076 (set_attr "modrm" "0")])
19077
19078 (define_insn_and_split "simple_return_pop_internal"
19079 [(simple_return)
19080 (use (match_operand:SI 0 "const_int_operand"))]
19081 "reload_completed"
19082 "ret\t%0"
19083 "&& cfun->machine->function_return_type != indirect_branch_keep"
19084 [(const_int 0)]
19085 "ix86_split_simple_return_pop_internal (operands[0]); DONE;"
19086 [(set_attr "length" "3")
19087 (set_attr "atom_unit" "jeu")
19088 (set_attr "length_immediate" "2")
19089 (set_attr "modrm" "0")])
19090
19091 (define_expand "simple_return_indirect_internal"
19092 [(parallel
19093 [(simple_return)
19094 (use (match_operand 0 "register_operand"))])])
19095
19096 (define_insn "*simple_return_indirect_internal<mode>"
19097 [(simple_return)
19098 (use (match_operand:W 0 "register_operand" "r"))]
19099 "reload_completed"
19100 "* return ix86_output_indirect_function_return (operands[0]);"
19101 [(set (attr "type")
19102 (if_then_else (match_test "(cfun->machine->indirect_branch_type
19103 != indirect_branch_keep)")
19104 (const_string "multi")
19105 (const_string "ibr")))
19106 (set_attr "length_immediate" "0")])
19107
19108 (define_insn "nop"
19109 [(const_int 0)]
19110 ""
19111 "nop"
19112 [(set_attr "length" "1")
19113 (set_attr "length_immediate" "0")
19114 (set_attr "modrm" "0")])
19115
19116 ;; Generate nops. Operand 0 is the number of nops, up to 8.
19117 (define_insn "nops"
19118 [(unspec_volatile [(match_operand 0 "const_int_operand")]
19119 UNSPECV_NOPS)]
19120 "reload_completed"
19121 {
19122 int num = INTVAL (operands[0]);
19123
19124 gcc_assert (IN_RANGE (num, 1, 8));
19125
19126 while (num--)
19127 fputs ("\tnop\n", asm_out_file);
19128
19129 return "";
19130 }
19131 [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
19132 (set_attr "length_immediate" "0")
19133 (set_attr "modrm" "0")])
19134
19135 ;; Pad to 16-byte boundary, max skip in op0. Used to avoid
19136 ;; branch prediction penalty for the third jump in a 16-byte
19137 ;; block on K8.
19138
19139 (define_insn "pad"
19140 [(unspec_volatile [(match_operand 0)] UNSPECV_ALIGN)]
19141 ""
19142 {
19143 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
19144 ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, 4, (int)INTVAL (operands[0]));
19145 #else
19146 /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that.
19147 The align insn is used to avoid 3 jump instructions in the row to improve
19148 branch prediction and the benefits hardly outweigh the cost of extra 8
19149 nops on the average inserted by full alignment pseudo operation. */
19150 #endif
19151 return "";
19152 }
19153 [(set_attr "length" "16")])
19154
19155 (define_expand "prologue"
19156 [(const_int 0)]
19157 ""
19158 "ix86_expand_prologue (); DONE;")
19159
19160 (define_expand "set_got"
19161 [(parallel
19162 [(set (match_operand:SI 0 "register_operand")
19163 (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
19164 (clobber (reg:CC FLAGS_REG))])]
19165 "!TARGET_64BIT"
19166 {
19167 if (flag_pic && !TARGET_VXWORKS_RTP)
19168 ix86_pc_thunk_call_expanded = true;
19169 })
19170
19171 (define_insn "*set_got"
19172 [(set (match_operand:SI 0 "register_operand" "=r")
19173 (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
19174 (clobber (reg:CC FLAGS_REG))]
19175 "!TARGET_64BIT"
19176 "* return output_set_got (operands[0], NULL_RTX);"
19177 [(set_attr "type" "multi")
19178 (set_attr "length" "12")])
19179
19180 (define_expand "set_got_labelled"
19181 [(parallel
19182 [(set (match_operand:SI 0 "register_operand")
19183 (unspec:SI [(label_ref (match_operand 1))]
19184 UNSPEC_SET_GOT))
19185 (clobber (reg:CC FLAGS_REG))])]
19186 "!TARGET_64BIT"
19187 {
19188 if (flag_pic && !TARGET_VXWORKS_RTP)
19189 ix86_pc_thunk_call_expanded = true;
19190 })
19191
19192 (define_insn "*set_got_labelled"
19193 [(set (match_operand:SI 0 "register_operand" "=r")
19194 (unspec:SI [(label_ref (match_operand 1))]
19195 UNSPEC_SET_GOT))
19196 (clobber (reg:CC FLAGS_REG))]
19197 "!TARGET_64BIT"
19198 "* return output_set_got (operands[0], operands[1]);"
19199 [(set_attr "type" "multi")
19200 (set_attr "length" "12")])
19201
19202 (define_insn "set_got_rex64"
19203 [(set (match_operand:DI 0 "register_operand" "=r")
19204 (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))]
19205 "TARGET_64BIT"
19206 "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}"
19207 [(set_attr "type" "lea")
19208 (set_attr "length_address" "4")
19209 (set_attr "mode" "DI")])
19210
19211 (define_insn "set_rip_rex64"
19212 [(set (match_operand:DI 0 "register_operand" "=r")
19213 (unspec:DI [(label_ref (match_operand 1))] UNSPEC_SET_RIP))]
19214 "TARGET_64BIT"
19215 "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}"
19216 [(set_attr "type" "lea")
19217 (set_attr "length_address" "4")
19218 (set_attr "mode" "DI")])
19219
19220 (define_insn "set_got_offset_rex64"
19221 [(set (match_operand:DI 0 "register_operand" "=r")
19222 (unspec:DI
19223 [(label_ref (match_operand 1))]
19224 UNSPEC_SET_GOT_OFFSET))]
19225 "TARGET_LP64"
19226 "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}"
19227 [(set_attr "type" "imov")
19228 (set_attr "length_immediate" "0")
19229 (set_attr "length_address" "8")
19230 (set_attr "mode" "DI")])
19231
19232 (define_expand "epilogue"
19233 [(const_int 0)]
19234 ""
19235 "ix86_expand_epilogue (1); DONE;")
19236
19237 (define_expand "sibcall_epilogue"
19238 [(const_int 0)]
19239 ""
19240 "ix86_expand_epilogue (0); DONE;")
19241
19242 (define_expand "eh_return"
19243 [(use (match_operand 0 "register_operand"))]
19244 ""
19245 {
19246 rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0];
19247
19248 /* Tricky bit: we write the address of the handler to which we will
19249 be returning into someone else's stack frame, one word below the
19250 stack address we wish to restore. */
19251 tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa);
19252 tmp = plus_constant (Pmode, tmp, -UNITS_PER_WORD);
19253 /* Return address is always in word_mode. */
19254 tmp = gen_rtx_MEM (word_mode, tmp);
19255 if (GET_MODE (ra) != word_mode)
19256 ra = convert_to_mode (word_mode, ra, 1);
19257 emit_move_insn (tmp, ra);
19258
19259 emit_jump_insn (gen_eh_return_internal ());
19260 emit_barrier ();
19261 DONE;
19262 })
19263
19264 (define_insn_and_split "eh_return_internal"
19265 [(eh_return)]
19266 ""
19267 "#"
19268 "epilogue_completed"
19269 [(const_int 0)]
19270 "ix86_expand_epilogue (2); DONE;")
19271
19272 (define_expand "@leave_<mode>"
19273 [(parallel
19274 [(set (reg:W SP_REG) (plus:W (reg:W BP_REG) (match_dup 0)))
19275 (set (reg:W BP_REG) (mem:W (reg:W BP_REG)))
19276 (clobber (mem:BLK (scratch)))])]
19277 ""
19278 "operands[0] = GEN_INT (<MODE_SIZE>);")
19279
19280 (define_insn "*leave"
19281 [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4)))
19282 (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG)))
19283 (clobber (mem:BLK (scratch)))]
19284 "!TARGET_64BIT"
19285 "leave"
19286 [(set_attr "type" "leave")])
19287
19288 (define_insn "*leave_rex64"
19289 [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
19290 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
19291 (clobber (mem:BLK (scratch)))]
19292 "TARGET_64BIT"
19293 "leave"
19294 [(set_attr "type" "leave")])
19295 \f
19296 ;; Handle -fsplit-stack.
19297
19298 (define_expand "split_stack_prologue"
19299 [(const_int 0)]
19300 ""
19301 {
19302 ix86_expand_split_stack_prologue ();
19303 DONE;
19304 })
19305
19306 ;; In order to support the call/return predictor, we use a return
19307 ;; instruction which the middle-end doesn't see.
19308 (define_insn "split_stack_return"
19309 [(unspec_volatile [(match_operand:SI 0 "const_int_operand")]
19310 UNSPECV_SPLIT_STACK_RETURN)]
19311 ""
19312 {
19313 if (operands[0] == const0_rtx)
19314 return "ret";
19315 else
19316 return "ret\t%0";
19317 }
19318 [(set_attr "atom_unit" "jeu")
19319 (set_attr "modrm" "0")
19320 (set (attr "length")
19321 (if_then_else (match_operand:SI 0 "const0_operand")
19322 (const_int 1)
19323 (const_int 3)))
19324 (set (attr "length_immediate")
19325 (if_then_else (match_operand:SI 0 "const0_operand")
19326 (const_int 0)
19327 (const_int 2)))])
19328
19329 ;; If there are operand 0 bytes available on the stack, jump to
19330 ;; operand 1.
19331
19332 (define_expand "split_stack_space_check"
19333 [(set (pc) (if_then_else
19334 (ltu (minus (reg SP_REG)
19335 (match_operand 0 "register_operand"))
19336 (match_dup 2))
19337 (label_ref (match_operand 1))
19338 (pc)))]
19339 ""
19340 {
19341 rtx reg = gen_reg_rtx (Pmode);
19342
19343 emit_insn (gen_sub3_insn (reg, stack_pointer_rtx, operands[0]));
19344
19345 operands[2] = ix86_split_stack_guard ();
19346 ix86_expand_branch (GEU, reg, operands[2], operands[1]);
19347
19348 DONE;
19349 })
19350 \f
19351 ;; Bit manipulation instructions.
19352
19353 (define_expand "ffs<mode>2"
19354 [(set (match_dup 2) (const_int -1))
19355 (parallel [(set (match_dup 3) (match_dup 4))
19356 (set (match_operand:SWI48 0 "register_operand")
19357 (ctz:SWI48
19358 (match_operand:SWI48 1 "nonimmediate_operand")))])
19359 (set (match_dup 0) (if_then_else:SWI48
19360 (eq (match_dup 3) (const_int 0))
19361 (match_dup 2)
19362 (match_dup 0)))
19363 (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (const_int 1)))
19364 (clobber (reg:CC FLAGS_REG))])]
19365 ""
19366 {
19367 machine_mode flags_mode;
19368
19369 if (<MODE>mode == SImode && !TARGET_CMOVE)
19370 {
19371 emit_insn (gen_ffssi2_no_cmove (operands[0], operands [1]));
19372 DONE;
19373 }
19374
19375 flags_mode = TARGET_BMI ? CCCmode : CCZmode;
19376
19377 operands[2] = gen_reg_rtx (<MODE>mode);
19378 operands[3] = gen_rtx_REG (flags_mode, FLAGS_REG);
19379 operands[4] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx);
19380 })
19381
19382 (define_insn_and_split "ffssi2_no_cmove"
19383 [(set (match_operand:SI 0 "register_operand" "=r")
19384 (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
19385 (clobber (match_scratch:SI 2 "=&q"))
19386 (clobber (reg:CC FLAGS_REG))]
19387 "!TARGET_CMOVE"
19388 "#"
19389 "&& reload_completed"
19390 [(parallel [(set (match_dup 4) (match_dup 5))
19391 (set (match_dup 0) (ctz:SI (match_dup 1)))])
19392 (set (strict_low_part (match_dup 3))
19393 (eq:QI (match_dup 4) (const_int 0)))
19394 (parallel [(set (match_dup 2) (neg:SI (match_dup 2)))
19395 (clobber (reg:CC FLAGS_REG))])
19396 (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2)))
19397 (clobber (reg:CC FLAGS_REG))])
19398 (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
19399 (clobber (reg:CC FLAGS_REG))])]
19400 {
19401 machine_mode flags_mode = TARGET_BMI ? CCCmode : CCZmode;
19402
19403 operands[3] = gen_lowpart (QImode, operands[2]);
19404 operands[4] = gen_rtx_REG (flags_mode, FLAGS_REG);
19405 operands[5] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx);
19406
19407 ix86_expand_clear (operands[2]);
19408 })
19409
19410 (define_insn_and_split "*tzcnt<mode>_1"
19411 [(set (reg:CCC FLAGS_REG)
19412 (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm")
19413 (const_int 0)))
19414 (set (match_operand:SWI48 0 "register_operand" "=r")
19415 (ctz:SWI48 (match_dup 1)))]
19416 "TARGET_BMI"
19417 "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
19418 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
19419 && optimize_function_for_speed_p (cfun)
19420 && !reg_mentioned_p (operands[0], operands[1])"
19421 [(parallel
19422 [(set (reg:CCC FLAGS_REG)
19423 (compare:CCC (match_dup 1) (const_int 0)))
19424 (set (match_dup 0)
19425 (ctz:SWI48 (match_dup 1)))
19426 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
19427 "ix86_expand_clear (operands[0]);"
19428 [(set_attr "type" "alu1")
19429 (set_attr "prefix_0f" "1")
19430 (set_attr "prefix_rep" "1")
19431 (set_attr "btver2_decode" "double")
19432 (set_attr "mode" "<MODE>")])
19433
19434 ; False dependency happens when destination is only updated by tzcnt,
19435 ; lzcnt or popcnt. There is no false dependency when destination is
19436 ; also used in source.
19437 (define_insn "*tzcnt<mode>_1_falsedep"
19438 [(set (reg:CCC FLAGS_REG)
19439 (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm")
19440 (const_int 0)))
19441 (set (match_operand:SWI48 0 "register_operand" "=r")
19442 (ctz:SWI48 (match_dup 1)))
19443 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
19444 UNSPEC_INSN_FALSE_DEP)]
19445 "TARGET_BMI"
19446 "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
19447 [(set_attr "type" "alu1")
19448 (set_attr "prefix_0f" "1")
19449 (set_attr "prefix_rep" "1")
19450 (set_attr "btver2_decode" "double")
19451 (set_attr "mode" "<MODE>")])
19452
19453 (define_insn "*bsf<mode>_1"
19454 [(set (reg:CCZ FLAGS_REG)
19455 (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
19456 (const_int 0)))
19457 (set (match_operand:SWI48 0 "register_operand" "=r")
19458 (ctz:SWI48 (match_dup 1)))]
19459 ""
19460 "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"
19461 [(set_attr "type" "alu1")
19462 (set_attr "prefix_0f" "1")
19463 (set_attr "btver2_decode" "double")
19464 (set_attr "znver1_decode" "vector")
19465 (set_attr "mode" "<MODE>")])
19466
19467 (define_insn_and_split "ctz<mode>2"
19468 [(set (match_operand:SWI48 0 "register_operand" "=r")
19469 (ctz:SWI48
19470 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
19471 (clobber (reg:CC FLAGS_REG))]
19472 ""
19473 {
19474 if (TARGET_BMI)
19475 return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
19476 else if (optimize_function_for_size_p (cfun))
19477 ;
19478 else if (TARGET_CPU_P (GENERIC))
19479 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
19480 return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
19481
19482 return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
19483 }
19484 "(TARGET_BMI || TARGET_CPU_P (GENERIC))
19485 && TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
19486 && optimize_function_for_speed_p (cfun)
19487 && !reg_mentioned_p (operands[0], operands[1])"
19488 [(parallel
19489 [(set (match_dup 0)
19490 (ctz:SWI48 (match_dup 1)))
19491 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
19492 (clobber (reg:CC FLAGS_REG))])]
19493 "ix86_expand_clear (operands[0]);"
19494 [(set_attr "type" "alu1")
19495 (set_attr "prefix_0f" "1")
19496 (set (attr "prefix_rep")
19497 (if_then_else
19498 (ior (match_test "TARGET_BMI")
19499 (and (not (match_test "optimize_function_for_size_p (cfun)"))
19500 (match_test "TARGET_CPU_P (GENERIC)")))
19501 (const_string "1")
19502 (const_string "0")))
19503 (set_attr "mode" "<MODE>")])
19504
19505 ; False dependency happens when destination is only updated by tzcnt,
19506 ; lzcnt or popcnt. There is no false dependency when destination is
19507 ; also used in source.
19508 (define_insn "*ctz<mode>2_falsedep"
19509 [(set (match_operand:SWI48 0 "register_operand" "=r")
19510 (ctz:SWI48
19511 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
19512 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
19513 UNSPEC_INSN_FALSE_DEP)
19514 (clobber (reg:CC FLAGS_REG))]
19515 ""
19516 {
19517 if (TARGET_BMI)
19518 return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
19519 else if (TARGET_CPU_P (GENERIC))
19520 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
19521 return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
19522 else
19523 gcc_unreachable ();
19524 }
19525 [(set_attr "type" "alu1")
19526 (set_attr "prefix_0f" "1")
19527 (set_attr "prefix_rep" "1")
19528 (set_attr "mode" "<MODE>")])
19529
19530 (define_insn_and_split "*ctzsi2_zext"
19531 [(set (match_operand:DI 0 "register_operand" "=r")
19532 (and:DI
19533 (subreg:DI
19534 (ctz:SI
19535 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
19536 (const_int 63)))
19537 (clobber (reg:CC FLAGS_REG))]
19538 "TARGET_BMI && TARGET_64BIT"
19539 "tzcnt{l}\t{%1, %k0|%k0, %1}"
19540 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
19541 && optimize_function_for_speed_p (cfun)
19542 && !reg_mentioned_p (operands[0], operands[1])"
19543 [(parallel
19544 [(set (match_dup 0)
19545 (and:DI (subreg:DI (ctz:SI (match_dup 1)) 0) (const_int 63)))
19546 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
19547 (clobber (reg:CC FLAGS_REG))])]
19548 "ix86_expand_clear (operands[0]);"
19549 [(set_attr "type" "alu1")
19550 (set_attr "prefix_0f" "1")
19551 (set_attr "prefix_rep" "1")
19552 (set_attr "mode" "SI")])
19553
19554 ; False dependency happens when destination is only updated by tzcnt,
19555 ; lzcnt or popcnt. There is no false dependency when destination is
19556 ; also used in source.
19557 (define_insn "*ctzsi2_zext_falsedep"
19558 [(set (match_operand:DI 0 "register_operand" "=r")
19559 (and:DI
19560 (subreg:DI
19561 (ctz:SI
19562 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
19563 (const_int 63)))
19564 (unspec [(match_operand:DI 2 "register_operand" "0")]
19565 UNSPEC_INSN_FALSE_DEP)
19566 (clobber (reg:CC FLAGS_REG))]
19567 "TARGET_BMI && TARGET_64BIT"
19568 "tzcnt{l}\t{%1, %k0|%k0, %1}"
19569 [(set_attr "type" "alu1")
19570 (set_attr "prefix_0f" "1")
19571 (set_attr "prefix_rep" "1")
19572 (set_attr "mode" "SI")])
19573
19574 (define_insn_and_split "*ctzsidi2_<s>ext"
19575 [(set (match_operand:DI 0 "register_operand" "=r")
19576 (any_extend:DI
19577 (ctz:SI
19578 (match_operand:SI 1 "nonimmediate_operand" "rm"))))
19579 (clobber (reg:CC FLAGS_REG))]
19580 "TARGET_64BIT"
19581 {
19582 if (TARGET_BMI)
19583 return "tzcnt{l}\t{%1, %k0|%k0, %1}";
19584 else if (TARGET_CPU_P (GENERIC)
19585 && !optimize_function_for_size_p (cfun))
19586 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
19587 return "rep%; bsf{l}\t{%1, %k0|%k0, %1}";
19588 return "bsf{l}\t{%1, %k0|%k0, %1}";
19589 }
19590 "(TARGET_BMI || TARGET_CPU_P (GENERIC))
19591 && TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
19592 && optimize_function_for_speed_p (cfun)
19593 && !reg_mentioned_p (operands[0], operands[1])"
19594 [(parallel
19595 [(set (match_dup 0)
19596 (any_extend:DI (ctz:SI (match_dup 1))))
19597 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
19598 (clobber (reg:CC FLAGS_REG))])]
19599 "ix86_expand_clear (operands[0]);"
19600 [(set_attr "type" "alu1")
19601 (set_attr "prefix_0f" "1")
19602 (set (attr "prefix_rep")
19603 (if_then_else
19604 (ior (match_test "TARGET_BMI")
19605 (and (not (match_test "optimize_function_for_size_p (cfun)"))
19606 (match_test "TARGET_CPU_P (GENERIC)")))
19607 (const_string "1")
19608 (const_string "0")))
19609 (set_attr "mode" "SI")])
19610
19611 (define_insn "*ctzsidi2_<s>ext_falsedep"
19612 [(set (match_operand:DI 0 "register_operand" "=r")
19613 (any_extend:DI
19614 (ctz:SI
19615 (match_operand:SI 1 "nonimmediate_operand" "rm"))))
19616 (unspec [(match_operand:DI 2 "register_operand" "0")]
19617 UNSPEC_INSN_FALSE_DEP)
19618 (clobber (reg:CC FLAGS_REG))]
19619 "TARGET_64BIT"
19620 {
19621 if (TARGET_BMI)
19622 return "tzcnt{l}\t{%1, %k0|%k0, %1}";
19623 else if (TARGET_CPU_P (GENERIC))
19624 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
19625 return "rep%; bsf{l}\t{%1, %k0|%k0, %1}";
19626 else
19627 gcc_unreachable ();
19628 }
19629 [(set_attr "type" "alu1")
19630 (set_attr "prefix_0f" "1")
19631 (set_attr "prefix_rep" "1")
19632 (set_attr "mode" "SI")])
19633
19634 (define_insn "bsr_rex64"
19635 [(set (reg:CCZ FLAGS_REG)
19636 (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "rm")
19637 (const_int 0)))
19638 (set (match_operand:DI 0 "register_operand" "=r")
19639 (minus:DI (const_int 63)
19640 (clz:DI (match_dup 1))))]
19641 "TARGET_64BIT"
19642 "bsr{q}\t{%1, %0|%0, %1}"
19643 [(set_attr "type" "alu1")
19644 (set_attr "prefix_0f" "1")
19645 (set_attr "znver1_decode" "vector")
19646 (set_attr "mode" "DI")])
19647
19648 (define_insn "bsr_rex64_1"
19649 [(set (match_operand:DI 0 "register_operand" "=r")
19650 (minus:DI (const_int 63)
19651 (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))))
19652 (clobber (reg:CC FLAGS_REG))]
19653 "!TARGET_LZCNT && TARGET_64BIT"
19654 "bsr{q}\t{%1, %0|%0, %1}"
19655 [(set_attr "type" "alu1")
19656 (set_attr "prefix_0f" "1")
19657 (set_attr "znver1_decode" "vector")
19658 (set_attr "mode" "DI")])
19659
19660 (define_insn "bsr_rex64_1_zext"
19661 [(set (match_operand:DI 0 "register_operand" "=r")
19662 (zero_extend:DI
19663 (minus:SI (const_int 63)
19664 (subreg:SI
19665 (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))
19666 0))))
19667 (clobber (reg:CC FLAGS_REG))]
19668 "!TARGET_LZCNT && TARGET_64BIT"
19669 "bsr{q}\t{%1, %0|%0, %1}"
19670 [(set_attr "type" "alu1")
19671 (set_attr "prefix_0f" "1")
19672 (set_attr "znver1_decode" "vector")
19673 (set_attr "mode" "DI")])
19674
19675 (define_insn "bsr"
19676 [(set (reg:CCZ FLAGS_REG)
19677 (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
19678 (const_int 0)))
19679 (set (match_operand:SI 0 "register_operand" "=r")
19680 (minus:SI (const_int 31)
19681 (clz:SI (match_dup 1))))]
19682 ""
19683 "bsr{l}\t{%1, %0|%0, %1}"
19684 [(set_attr "type" "alu1")
19685 (set_attr "prefix_0f" "1")
19686 (set_attr "znver1_decode" "vector")
19687 (set_attr "mode" "SI")])
19688
19689 (define_insn "bsr_1"
19690 [(set (match_operand:SI 0 "register_operand" "=r")
19691 (minus:SI (const_int 31)
19692 (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
19693 (clobber (reg:CC FLAGS_REG))]
19694 "!TARGET_LZCNT"
19695 "bsr{l}\t{%1, %0|%0, %1}"
19696 [(set_attr "type" "alu1")
19697 (set_attr "prefix_0f" "1")
19698 (set_attr "znver1_decode" "vector")
19699 (set_attr "mode" "SI")])
19700
19701 (define_insn "bsr_zext_1"
19702 [(set (match_operand:DI 0 "register_operand" "=r")
19703 (zero_extend:DI
19704 (minus:SI
19705 (const_int 31)
19706 (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))))
19707 (clobber (reg:CC FLAGS_REG))]
19708 "!TARGET_LZCNT && TARGET_64BIT"
19709 "bsr{l}\t{%1, %k0|%k0, %1}"
19710 [(set_attr "type" "alu1")
19711 (set_attr "prefix_0f" "1")
19712 (set_attr "znver1_decode" "vector")
19713 (set_attr "mode" "SI")])
19714
19715 ; As bsr is undefined behavior on zero and for other input
19716 ; values it is in range 0 to 63, we can optimize away sign-extends.
19717 (define_insn_and_split "*bsr_rex64_2"
19718 [(set (match_operand:DI 0 "register_operand")
19719 (xor:DI
19720 (sign_extend:DI
19721 (minus:SI
19722 (const_int 63)
19723 (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
19724 0)))
19725 (const_int 63)))
19726 (clobber (reg:CC FLAGS_REG))]
19727 "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
19728 "#"
19729 "&& 1"
19730 [(parallel [(set (reg:CCZ FLAGS_REG)
19731 (compare:CCZ (match_dup 1) (const_int 0)))
19732 (set (match_dup 2)
19733 (minus:DI (const_int 63) (clz:DI (match_dup 1))))])
19734 (parallel [(set (match_dup 0)
19735 (zero_extend:DI (xor:SI (match_dup 3) (const_int 63))))
19736 (clobber (reg:CC FLAGS_REG))])]
19737 {
19738 operands[2] = gen_reg_rtx (DImode);
19739 operands[3] = lowpart_subreg (SImode, operands[2], DImode);
19740 })
19741
19742 (define_insn_and_split "*bsr_2"
19743 [(set (match_operand:DI 0 "register_operand")
19744 (sign_extend:DI
19745 (xor:SI
19746 (minus:SI
19747 (const_int 31)
19748 (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
19749 (const_int 31))))
19750 (clobber (reg:CC FLAGS_REG))]
19751 "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
19752 "#"
19753 "&& 1"
19754 [(parallel [(set (reg:CCZ FLAGS_REG)
19755 (compare:CCZ (match_dup 1) (const_int 0)))
19756 (set (match_dup 2)
19757 (minus:SI (const_int 31) (clz:SI (match_dup 1))))])
19758 (parallel [(set (match_dup 0)
19759 (zero_extend:DI (xor:SI (match_dup 2) (const_int 31))))
19760 (clobber (reg:CC FLAGS_REG))])]
19761 "operands[2] = gen_reg_rtx (SImode);")
19762
19763 ; Splitters to optimize 64 - __builtin_clzl (x) or 32 - __builtin_clz (x).
19764 ; Again, as for !TARGET_LZCNT CLZ is UB at zero, CLZ is guaranteed to be
19765 ; in [0, 63] or [0, 31] range.
19766 (define_split
19767 [(set (match_operand:SI 0 "register_operand")
19768 (minus:SI
19769 (match_operand:SI 2 "const_int_operand")
19770 (xor:SI
19771 (minus:SI (const_int 63)
19772 (subreg:SI
19773 (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
19774 0))
19775 (const_int 63))))]
19776 "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
19777 [(set (match_dup 3)
19778 (minus:DI (const_int 63) (clz:DI (match_dup 1))))
19779 (set (match_dup 0)
19780 (plus:SI (match_dup 5) (match_dup 4)))]
19781 {
19782 operands[3] = gen_reg_rtx (DImode);
19783 operands[5] = lowpart_subreg (SImode, operands[3], DImode);
19784 if (INTVAL (operands[2]) == 63)
19785 {
19786 emit_insn (gen_bsr_rex64_1_zext (operands[3], operands[1]));
19787 emit_move_insn (operands[0], operands[5]);
19788 DONE;
19789 }
19790 operands[4] = gen_int_mode (UINTVAL (operands[2]) - 63, SImode);
19791 })
19792
19793 (define_split
19794 [(set (match_operand:SI 0 "register_operand")
19795 (minus:SI
19796 (match_operand:SI 2 "const_int_operand")
19797 (xor:SI
19798 (minus:SI (const_int 31)
19799 (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
19800 (const_int 31))))]
19801 "!TARGET_LZCNT && ix86_pre_reload_split ()"
19802 [(set (match_dup 3)
19803 (minus:SI (const_int 31) (clz:SI (match_dup 1))))
19804 (set (match_dup 0)
19805 (plus:SI (match_dup 3) (match_dup 4)))]
19806 {
19807 if (INTVAL (operands[2]) == 31)
19808 {
19809 emit_insn (gen_bsr_1 (operands[0], operands[1]));
19810 DONE;
19811 }
19812 operands[3] = gen_reg_rtx (SImode);
19813 operands[4] = gen_int_mode (UINTVAL (operands[2]) - 31, SImode);
19814 })
19815
19816 (define_split
19817 [(set (match_operand:DI 0 "register_operand")
19818 (minus:DI
19819 (match_operand:DI 2 "const_int_operand")
19820 (xor:DI
19821 (sign_extend:DI
19822 (minus:SI (const_int 63)
19823 (subreg:SI
19824 (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
19825 0)))
19826 (const_int 63))))]
19827 "!TARGET_LZCNT
19828 && TARGET_64BIT
19829 && ix86_pre_reload_split ()
19830 && ((unsigned HOST_WIDE_INT)
19831 trunc_int_for_mode (UINTVAL (operands[2]) - 63, SImode)
19832 == UINTVAL (operands[2]) - 63)"
19833 [(set (match_dup 3)
19834 (minus:DI (const_int 63) (clz:DI (match_dup 1))))
19835 (set (match_dup 0)
19836 (plus:DI (match_dup 3) (match_dup 4)))]
19837 {
19838 if (INTVAL (operands[2]) == 63)
19839 {
19840 emit_insn (gen_bsr_rex64_1 (operands[0], operands[1]));
19841 DONE;
19842 }
19843 operands[3] = gen_reg_rtx (DImode);
19844 operands[4] = GEN_INT (UINTVAL (operands[2]) - 63);
19845 })
19846
19847 (define_split
19848 [(set (match_operand:DI 0 "register_operand")
19849 (minus:DI
19850 (match_operand:DI 2 "const_int_operand")
19851 (sign_extend:DI
19852 (xor:SI
19853 (minus:SI (const_int 31)
19854 (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
19855 (const_int 31)))))]
19856 "!TARGET_LZCNT
19857 && TARGET_64BIT
19858 && ix86_pre_reload_split ()
19859 && ((unsigned HOST_WIDE_INT)
19860 trunc_int_for_mode (UINTVAL (operands[2]) - 31, SImode)
19861 == UINTVAL (operands[2]) - 31)"
19862 [(set (match_dup 3)
19863 (zero_extend:DI (minus:SI (const_int 31) (clz:SI (match_dup 1)))))
19864 (set (match_dup 0)
19865 (plus:DI (match_dup 3) (match_dup 4)))]
19866 {
19867 if (INTVAL (operands[2]) == 31)
19868 {
19869 emit_insn (gen_bsr_zext_1 (operands[0], operands[1]));
19870 DONE;
19871 }
19872 operands[3] = gen_reg_rtx (DImode);
19873 operands[4] = GEN_INT (UINTVAL (operands[2]) - 31);
19874 })
19875
19876 (define_expand "clz<mode>2"
19877 [(parallel
19878 [(set (reg:CCZ FLAGS_REG)
19879 (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
19880 (const_int 0)))
19881 (set (match_dup 3) (minus:SWI48
19882 (match_dup 2)
19883 (clz:SWI48 (match_dup 1))))])
19884 (parallel
19885 [(set (match_operand:SWI48 0 "register_operand")
19886 (xor:SWI48 (match_dup 3) (match_dup 2)))
19887 (clobber (reg:CC FLAGS_REG))])]
19888 ""
19889 {
19890 if (TARGET_LZCNT)
19891 {
19892 emit_insn (gen_clz<mode>2_lzcnt (operands[0], operands[1]));
19893 DONE;
19894 }
19895 operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
19896 operands[3] = gen_reg_rtx (<MODE>mode);
19897 })
19898
19899 (define_insn_and_split "clz<mode>2_lzcnt"
19900 [(set (match_operand:SWI48 0 "register_operand" "=r")
19901 (clz:SWI48
19902 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
19903 (clobber (reg:CC FLAGS_REG))]
19904 "TARGET_LZCNT"
19905 "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
19906 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
19907 && optimize_function_for_speed_p (cfun)
19908 && !reg_mentioned_p (operands[0], operands[1])"
19909 [(parallel
19910 [(set (match_dup 0)
19911 (clz:SWI48 (match_dup 1)))
19912 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
19913 (clobber (reg:CC FLAGS_REG))])]
19914 "ix86_expand_clear (operands[0]);"
19915 [(set_attr "prefix_rep" "1")
19916 (set_attr "type" "bitmanip")
19917 (set_attr "mode" "<MODE>")])
19918
19919 ; False dependency happens when destination is only updated by tzcnt,
19920 ; lzcnt or popcnt. There is no false dependency when destination is
19921 ; also used in source.
19922 (define_insn "*clz<mode>2_lzcnt_falsedep"
19923 [(set (match_operand:SWI48 0 "register_operand" "=r")
19924 (clz:SWI48
19925 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
19926 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
19927 UNSPEC_INSN_FALSE_DEP)
19928 (clobber (reg:CC FLAGS_REG))]
19929 "TARGET_LZCNT"
19930 "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
19931 [(set_attr "prefix_rep" "1")
19932 (set_attr "type" "bitmanip")
19933 (set_attr "mode" "<MODE>")])
19934
19935 (define_insn_and_split "*clzsi2_lzcnt_zext"
19936 [(set (match_operand:DI 0 "register_operand" "=r")
19937 (and:DI
19938 (subreg:DI
19939 (clz:SI
19940 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
19941 (const_int 63)))
19942 (clobber (reg:CC FLAGS_REG))]
19943 "TARGET_LZCNT && TARGET_64BIT"
19944 "lzcnt{l}\t{%1, %k0|%k0, %1}"
19945 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
19946 && optimize_function_for_speed_p (cfun)
19947 && !reg_mentioned_p (operands[0], operands[1])"
19948 [(parallel
19949 [(set (match_dup 0)
19950 (and:DI (subreg:DI (clz:SI (match_dup 1)) 0) (const_int 63)))
19951 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
19952 (clobber (reg:CC FLAGS_REG))])]
19953 "ix86_expand_clear (operands[0]);"
19954 [(set_attr "prefix_rep" "1")
19955 (set_attr "type" "bitmanip")
19956 (set_attr "mode" "SI")])
19957
19958 ; False dependency happens when destination is only updated by tzcnt,
19959 ; lzcnt or popcnt. There is no false dependency when destination is
19960 ; also used in source.
19961 (define_insn "*clzsi2_lzcnt_zext_falsedep"
19962 [(set (match_operand:DI 0 "register_operand" "=r")
19963 (and:DI
19964 (subreg:DI
19965 (clz:SI
19966 (match_operand:SWI48 1 "nonimmediate_operand" "rm")) 0)
19967 (const_int 63)))
19968 (unspec [(match_operand:DI 2 "register_operand" "0")]
19969 UNSPEC_INSN_FALSE_DEP)
19970 (clobber (reg:CC FLAGS_REG))]
19971 "TARGET_LZCNT"
19972 "lzcnt{l}\t{%1, %k0|%k0, %1}"
19973 [(set_attr "prefix_rep" "1")
19974 (set_attr "type" "bitmanip")
19975 (set_attr "mode" "SI")])
19976
19977 (define_insn_and_split "*clzsi2_lzcnt_zext_2"
19978 [(set (match_operand:DI 0 "register_operand" "=r")
19979 (zero_extend:DI
19980 (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
19981 (clobber (reg:CC FLAGS_REG))]
19982 "TARGET_LZCNT && TARGET_64BIT"
19983 "lzcnt{l}\t{%1, %k0|%k0, %1}"
19984 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
19985 && optimize_function_for_speed_p (cfun)
19986 && !reg_mentioned_p (operands[0], operands[1])"
19987 [(parallel
19988 [(set (match_dup 0)
19989 (zero_extend:DI (clz:SI (match_dup 1))))
19990 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
19991 (clobber (reg:CC FLAGS_REG))])]
19992 "ix86_expand_clear (operands[0]);"
19993 [(set_attr "prefix_rep" "1")
19994 (set_attr "type" "bitmanip")
19995 (set_attr "mode" "SI")])
19996
19997 ; False dependency happens when destination is only updated by tzcnt,
19998 ; lzcnt or popcnt. There is no false dependency when destination is
19999 ; also used in source.
20000 (define_insn "*clzsi2_lzcnt_zext_2_falsedep"
20001 [(set (match_operand:DI 0 "register_operand" "=r")
20002 (zero_extend:DI
20003 (clz:SI (match_operand:SWI48 1 "nonimmediate_operand" "rm"))))
20004 (unspec [(match_operand:DI 2 "register_operand" "0")]
20005 UNSPEC_INSN_FALSE_DEP)
20006 (clobber (reg:CC FLAGS_REG))]
20007 "TARGET_LZCNT"
20008 "lzcnt{l}\t{%1, %k0|%k0, %1}"
20009 [(set_attr "prefix_rep" "1")
20010 (set_attr "type" "bitmanip")
20011 (set_attr "mode" "SI")])
20012
20013 (define_int_iterator LT_ZCNT
20014 [(UNSPEC_TZCNT "TARGET_BMI")
20015 (UNSPEC_LZCNT "TARGET_LZCNT")])
20016
20017 (define_int_attr lt_zcnt
20018 [(UNSPEC_TZCNT "tzcnt")
20019 (UNSPEC_LZCNT "lzcnt")])
20020
20021 (define_int_attr lt_zcnt_type
20022 [(UNSPEC_TZCNT "alu1")
20023 (UNSPEC_LZCNT "bitmanip")])
20024
20025 ;; Version of lzcnt/tzcnt that is expanded from intrinsics. This version
20026 ;; provides operand size as output when source operand is zero.
20027
20028 (define_insn_and_split "<lt_zcnt>_<mode>"
20029 [(set (match_operand:SWI48 0 "register_operand" "=r")
20030 (unspec:SWI48
20031 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
20032 (clobber (reg:CC FLAGS_REG))]
20033 ""
20034 "<lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
20035 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
20036 && optimize_function_for_speed_p (cfun)
20037 && !reg_mentioned_p (operands[0], operands[1])"
20038 [(parallel
20039 [(set (match_dup 0)
20040 (unspec:SWI48 [(match_dup 1)] LT_ZCNT))
20041 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
20042 (clobber (reg:CC FLAGS_REG))])]
20043 "ix86_expand_clear (operands[0]);"
20044 [(set_attr "type" "<lt_zcnt_type>")
20045 (set_attr "prefix_0f" "1")
20046 (set_attr "prefix_rep" "1")
20047 (set_attr "mode" "<MODE>")])
20048
20049 ; False dependency happens when destination is only updated by tzcnt,
20050 ; lzcnt or popcnt. There is no false dependency when destination is
20051 ; also used in source.
20052 (define_insn "*<lt_zcnt>_<mode>_falsedep"
20053 [(set (match_operand:SWI48 0 "register_operand" "=r")
20054 (unspec:SWI48
20055 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
20056 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
20057 UNSPEC_INSN_FALSE_DEP)
20058 (clobber (reg:CC FLAGS_REG))]
20059 ""
20060 "<lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
20061 [(set_attr "type" "<lt_zcnt_type>")
20062 (set_attr "prefix_0f" "1")
20063 (set_attr "prefix_rep" "1")
20064 (set_attr "mode" "<MODE>")])
20065
20066 (define_insn "<lt_zcnt>_hi"
20067 [(set (match_operand:HI 0 "register_operand" "=r")
20068 (unspec:HI
20069 [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))
20070 (clobber (reg:CC FLAGS_REG))]
20071 ""
20072 "<lt_zcnt>{w}\t{%1, %0|%0, %1}"
20073 [(set_attr "type" "<lt_zcnt_type>")
20074 (set_attr "prefix_0f" "1")
20075 (set_attr "prefix_rep" "1")
20076 (set_attr "mode" "HI")])
20077
20078 ;; BMI instructions.
20079
20080 (define_insn "bmi_bextr_<mode>"
20081 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
20082 (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m")
20083 (match_operand:SWI48 2 "register_operand" "r,r")]
20084 UNSPEC_BEXTR))
20085 (clobber (reg:CC FLAGS_REG))]
20086 "TARGET_BMI"
20087 "bextr\t{%2, %1, %0|%0, %1, %2}"
20088 [(set_attr "type" "bitmanip")
20089 (set_attr "btver2_decode" "direct, double")
20090 (set_attr "mode" "<MODE>")])
20091
20092 (define_insn "*bmi_bextr_<mode>_ccz"
20093 [(set (reg:CCZ FLAGS_REG)
20094 (compare:CCZ
20095 (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m")
20096 (match_operand:SWI48 2 "register_operand" "r,r")]
20097 UNSPEC_BEXTR)
20098 (const_int 0)))
20099 (clobber (match_scratch:SWI48 0 "=r,r"))]
20100 "TARGET_BMI"
20101 "bextr\t{%2, %1, %0|%0, %1, %2}"
20102 [(set_attr "type" "bitmanip")
20103 (set_attr "btver2_decode" "direct, double")
20104 (set_attr "mode" "<MODE>")])
20105
20106 (define_insn "*bmi_blsi_<mode>"
20107 [(set (match_operand:SWI48 0 "register_operand" "=r")
20108 (and:SWI48
20109 (neg:SWI48
20110 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
20111 (match_dup 1)))
20112 (clobber (reg:CC FLAGS_REG))]
20113 "TARGET_BMI"
20114 "blsi\t{%1, %0|%0, %1}"
20115 [(set_attr "type" "bitmanip")
20116 (set_attr "btver2_decode" "double")
20117 (set_attr "mode" "<MODE>")])
20118
20119 (define_insn "*bmi_blsi_<mode>_cmp"
20120 [(set (reg FLAGS_REG)
20121 (compare
20122 (and:SWI48
20123 (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
20124 (match_dup 1))
20125 (const_int 0)))
20126 (set (match_operand:SWI48 0 "register_operand" "=r")
20127 (and:SWI48 (neg:SWI48 (match_dup 1)) (match_dup 1)))]
20128 "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
20129 "blsi\t{%1, %0|%0, %1}"
20130 [(set_attr "type" "bitmanip")
20131 (set_attr "btver2_decode" "double")
20132 (set_attr "mode" "<MODE>")])
20133
20134 (define_insn "*bmi_blsi_<mode>_ccno"
20135 [(set (reg FLAGS_REG)
20136 (compare
20137 (and:SWI48
20138 (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
20139 (match_dup 1))
20140 (const_int 0)))
20141 (clobber (match_scratch:SWI48 0 "=r"))]
20142 "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
20143 "blsi\t{%1, %0|%0, %1}"
20144 [(set_attr "type" "bitmanip")
20145 (set_attr "btver2_decode" "double")
20146 (set_attr "mode" "<MODE>")])
20147
20148 (define_insn "*bmi_blsmsk_<mode>"
20149 [(set (match_operand:SWI48 0 "register_operand" "=r")
20150 (xor:SWI48
20151 (plus:SWI48
20152 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20153 (const_int -1))
20154 (match_dup 1)))
20155 (clobber (reg:CC FLAGS_REG))]
20156 "TARGET_BMI"
20157 "blsmsk\t{%1, %0|%0, %1}"
20158 [(set_attr "type" "bitmanip")
20159 (set_attr "btver2_decode" "double")
20160 (set_attr "mode" "<MODE>")])
20161
20162 (define_insn "*bmi_blsr_<mode>"
20163 [(set (match_operand:SWI48 0 "register_operand" "=r")
20164 (and:SWI48
20165 (plus:SWI48
20166 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20167 (const_int -1))
20168 (match_dup 1)))
20169 (clobber (reg:CC FLAGS_REG))]
20170 "TARGET_BMI"
20171 "blsr\t{%1, %0|%0, %1}"
20172 [(set_attr "type" "bitmanip")
20173 (set_attr "btver2_decode" "double")
20174 (set_attr "mode" "<MODE>")])
20175
20176 (define_insn "*bmi_blsr_<mode>_cmp"
20177 [(set (reg:CCZ FLAGS_REG)
20178 (compare:CCZ
20179 (and:SWI48
20180 (plus:SWI48
20181 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20182 (const_int -1))
20183 (match_dup 1))
20184 (const_int 0)))
20185 (set (match_operand:SWI48 0 "register_operand" "=r")
20186 (and:SWI48
20187 (plus:SWI48
20188 (match_dup 1)
20189 (const_int -1))
20190 (match_dup 1)))]
20191 "TARGET_BMI"
20192 "blsr\t{%1, %0|%0, %1}"
20193 [(set_attr "type" "bitmanip")
20194 (set_attr "btver2_decode" "double")
20195 (set_attr "mode" "<MODE>")])
20196
20197 (define_insn "*bmi_blsr_<mode>_ccz"
20198 [(set (reg:CCZ FLAGS_REG)
20199 (compare:CCZ
20200 (and:SWI48
20201 (plus:SWI48
20202 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20203 (const_int -1))
20204 (match_dup 1))
20205 (const_int 0)))
20206 (clobber (match_scratch:SWI48 0 "=r"))]
20207 "TARGET_BMI"
20208 "blsr\t{%1, %0|%0, %1}"
20209 [(set_attr "type" "bitmanip")
20210 (set_attr "btver2_decode" "double")
20211 (set_attr "mode" "<MODE>")])
20212
20213 ;; BMI2 instructions.
20214 (define_expand "bmi2_bzhi_<mode>3"
20215 [(parallel
20216 [(set (match_operand:SWI48 0 "register_operand")
20217 (if_then_else:SWI48
20218 (ne:QI (match_operand:QI 2 "register_operand")
20219 (const_int 0))
20220 (zero_extract:SWI48
20221 (match_operand:SWI48 1 "nonimmediate_operand")
20222 (umin:QI (match_dup 2) (match_dup 3))
20223 (const_int 0))
20224 (const_int 0)))
20225 (clobber (reg:CC FLAGS_REG))])]
20226 "TARGET_BMI2"
20227 {
20228 operands[2] = gen_lowpart (QImode, operands[2]);
20229 operands[3] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
20230 })
20231
20232 (define_insn "*bmi2_bzhi_<mode>3"
20233 [(set (match_operand:SWI48 0 "register_operand" "=r")
20234 (if_then_else:SWI48
20235 (ne:QI (match_operand:QI 2 "register_operand" "q")
20236 (const_int 0))
20237 (zero_extract:SWI48
20238 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20239 (umin:QI (match_dup 2)
20240 (match_operand:QI 3 "const_int_operand"))
20241 (const_int 0))
20242 (const_int 0)))
20243 (clobber (reg:CC FLAGS_REG))]
20244 "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
20245 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
20246 [(set_attr "type" "bitmanip")
20247 (set_attr "prefix" "vex")
20248 (set_attr "mode" "<MODE>")])
20249
20250 (define_insn "*bmi2_bzhi_<mode>3_1_ccz"
20251 [(set (reg:CCZ FLAGS_REG)
20252 (compare:CCZ
20253 (if_then_else:SWI48
20254 (ne:QI (match_operand:QI 2 "register_operand" "r") (const_int 0))
20255 (zero_extract:SWI48
20256 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20257 (umin:QI (match_dup 2)
20258 (match_operand:QI 3 "const_int_operand"))
20259 (const_int 0))
20260 (const_int 0))
20261 (const_int 0)))
20262 (clobber (match_scratch:SWI48 0 "=r"))]
20263 "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
20264 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
20265 [(set_attr "type" "bitmanip")
20266 (set_attr "prefix" "vex")
20267 (set_attr "mode" "<MODE>")])
20268
20269 (define_insn "*bmi2_bzhi_<mode>3_2"
20270 [(set (match_operand:SWI48 0 "register_operand" "=r")
20271 (and:SWI48
20272 (plus:SWI48
20273 (ashift:SWI48 (const_int 1)
20274 (match_operand:QI 2 "register_operand" "r"))
20275 (const_int -1))
20276 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
20277 (clobber (reg:CC FLAGS_REG))]
20278 "TARGET_BMI2"
20279 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
20280 [(set_attr "type" "bitmanip")
20281 (set_attr "prefix" "vex")
20282 (set_attr "mode" "<MODE>")])
20283
20284 (define_insn "*bmi2_bzhi_<mode>3_3"
20285 [(set (match_operand:SWI48 0 "register_operand" "=r")
20286 (and:SWI48
20287 (not:SWI48
20288 (ashift:SWI48 (const_int -1)
20289 (match_operand:QI 2 "register_operand" "r")))
20290 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
20291 (clobber (reg:CC FLAGS_REG))]
20292 "TARGET_BMI2"
20293 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
20294 [(set_attr "type" "bitmanip")
20295 (set_attr "prefix" "vex")
20296 (set_attr "mode" "<MODE>")])
20297
20298 (define_insn "*bmi2_bzhi_zero_extendsidi_4"
20299 [(set (match_operand:DI 0 "register_operand" "=r")
20300 (zero_extend:DI
20301 (and:SI
20302 (plus:SI
20303 (ashift:SI (const_int 1)
20304 (match_operand:QI 2 "register_operand" "r"))
20305 (const_int -1))
20306 (match_operand:SI 1 "nonimmediate_operand" "rm"))))
20307 (clobber (reg:CC FLAGS_REG))]
20308 "TARGET_64BIT && TARGET_BMI2"
20309 "bzhi\t{%q2, %q1, %q0|%q0, %q1, %q2}"
20310 [(set_attr "type" "bitmanip")
20311 (set_attr "prefix" "vex")
20312 (set_attr "mode" "DI")])
20313
20314 (define_insn "*bmi2_bzhi_zero_extendsidi_5"
20315 [(set (match_operand:DI 0 "register_operand" "=r")
20316 (and:DI
20317 (zero_extend:DI
20318 (plus:SI
20319 (ashift:SI (const_int 1)
20320 (match_operand:QI 2 "register_operand" "r"))
20321 (const_int -1)))
20322 (match_operand:DI 1 "nonimmediate_operand" "rm")))
20323 (clobber (reg:CC FLAGS_REG))]
20324 "TARGET_64BIT && TARGET_BMI2"
20325 "bzhi\t{%q2, %q1, %q0|%q0, %q1, %q2}"
20326 [(set_attr "type" "bitmanip")
20327 (set_attr "prefix" "vex")
20328 (set_attr "mode" "DI")])
20329
20330 (define_insn "bmi2_pdep_<mode>3"
20331 [(set (match_operand:SWI48 0 "register_operand" "=r")
20332 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
20333 (match_operand:SWI48 2 "nonimmediate_operand" "rm")]
20334 UNSPEC_PDEP))]
20335 "TARGET_BMI2"
20336 "pdep\t{%2, %1, %0|%0, %1, %2}"
20337 [(set_attr "type" "bitmanip")
20338 (set_attr "prefix" "vex")
20339 (set_attr "mode" "<MODE>")])
20340
20341 (define_insn "bmi2_pext_<mode>3"
20342 [(set (match_operand:SWI48 0 "register_operand" "=r")
20343 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
20344 (match_operand:SWI48 2 "nonimmediate_operand" "rm")]
20345 UNSPEC_PEXT))]
20346 "TARGET_BMI2"
20347 "pext\t{%2, %1, %0|%0, %1, %2}"
20348 [(set_attr "type" "bitmanip")
20349 (set_attr "prefix" "vex")
20350 (set_attr "mode" "<MODE>")])
20351
20352 ;; TBM instructions.
20353 (define_insn "@tbm_bextri_<mode>"
20354 [(set (match_operand:SWI48 0 "register_operand" "=r")
20355 (zero_extract:SWI48
20356 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20357 (match_operand:QI 2 "const_0_to_255_operand")
20358 (match_operand:QI 3 "const_0_to_255_operand")))
20359 (clobber (reg:CC FLAGS_REG))]
20360 "TARGET_TBM"
20361 {
20362 operands[2] = GEN_INT (INTVAL (operands[2]) << 8 | INTVAL (operands[3]));
20363 return "bextr\t{%2, %1, %0|%0, %1, %2}";
20364 }
20365 [(set_attr "type" "bitmanip")
20366 (set_attr "mode" "<MODE>")])
20367
20368 (define_insn "*tbm_blcfill_<mode>"
20369 [(set (match_operand:SWI48 0 "register_operand" "=r")
20370 (and:SWI48
20371 (plus:SWI48
20372 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20373 (const_int 1))
20374 (match_dup 1)))
20375 (clobber (reg:CC FLAGS_REG))]
20376 "TARGET_TBM"
20377 "blcfill\t{%1, %0|%0, %1}"
20378 [(set_attr "type" "bitmanip")
20379 (set_attr "mode" "<MODE>")])
20380
20381 (define_insn "*tbm_blci_<mode>"
20382 [(set (match_operand:SWI48 0 "register_operand" "=r")
20383 (ior:SWI48
20384 (not:SWI48
20385 (plus:SWI48
20386 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20387 (const_int 1)))
20388 (match_dup 1)))
20389 (clobber (reg:CC FLAGS_REG))]
20390 "TARGET_TBM"
20391 "blci\t{%1, %0|%0, %1}"
20392 [(set_attr "type" "bitmanip")
20393 (set_attr "mode" "<MODE>")])
20394
20395 (define_insn "*tbm_blcic_<mode>"
20396 [(set (match_operand:SWI48 0 "register_operand" "=r")
20397 (and:SWI48
20398 (plus:SWI48
20399 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20400 (const_int 1))
20401 (not:SWI48
20402 (match_dup 1))))
20403 (clobber (reg:CC FLAGS_REG))]
20404 "TARGET_TBM"
20405 "blcic\t{%1, %0|%0, %1}"
20406 [(set_attr "type" "bitmanip")
20407 (set_attr "mode" "<MODE>")])
20408
20409 (define_insn "*tbm_blcmsk_<mode>"
20410 [(set (match_operand:SWI48 0 "register_operand" "=r")
20411 (xor:SWI48
20412 (plus:SWI48
20413 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20414 (const_int 1))
20415 (match_dup 1)))
20416 (clobber (reg:CC FLAGS_REG))]
20417 "TARGET_TBM"
20418 "blcmsk\t{%1, %0|%0, %1}"
20419 [(set_attr "type" "bitmanip")
20420 (set_attr "mode" "<MODE>")])
20421
20422 (define_insn "*tbm_blcs_<mode>"
20423 [(set (match_operand:SWI48 0 "register_operand" "=r")
20424 (ior:SWI48
20425 (plus:SWI48
20426 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20427 (const_int 1))
20428 (match_dup 1)))
20429 (clobber (reg:CC FLAGS_REG))]
20430 "TARGET_TBM"
20431 "blcs\t{%1, %0|%0, %1}"
20432 [(set_attr "type" "bitmanip")
20433 (set_attr "mode" "<MODE>")])
20434
20435 (define_insn "*tbm_blsfill_<mode>"
20436 [(set (match_operand:SWI48 0 "register_operand" "=r")
20437 (ior:SWI48
20438 (plus:SWI48
20439 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20440 (const_int -1))
20441 (match_dup 1)))
20442 (clobber (reg:CC FLAGS_REG))]
20443 "TARGET_TBM"
20444 "blsfill\t{%1, %0|%0, %1}"
20445 [(set_attr "type" "bitmanip")
20446 (set_attr "mode" "<MODE>")])
20447
20448 (define_insn "*tbm_blsic_<mode>"
20449 [(set (match_operand:SWI48 0 "register_operand" "=r")
20450 (ior:SWI48
20451 (plus:SWI48
20452 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20453 (const_int -1))
20454 (not:SWI48
20455 (match_dup 1))))
20456 (clobber (reg:CC FLAGS_REG))]
20457 "TARGET_TBM"
20458 "blsic\t{%1, %0|%0, %1}"
20459 [(set_attr "type" "bitmanip")
20460 (set_attr "mode" "<MODE>")])
20461
20462 (define_insn "*tbm_t1mskc_<mode>"
20463 [(set (match_operand:SWI48 0 "register_operand" "=r")
20464 (ior:SWI48
20465 (plus:SWI48
20466 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20467 (const_int 1))
20468 (not:SWI48
20469 (match_dup 1))))
20470 (clobber (reg:CC FLAGS_REG))]
20471 "TARGET_TBM"
20472 "t1mskc\t{%1, %0|%0, %1}"
20473 [(set_attr "type" "bitmanip")
20474 (set_attr "mode" "<MODE>")])
20475
20476 (define_insn "*tbm_tzmsk_<mode>"
20477 [(set (match_operand:SWI48 0 "register_operand" "=r")
20478 (and:SWI48
20479 (plus:SWI48
20480 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20481 (const_int -1))
20482 (not:SWI48
20483 (match_dup 1))))
20484 (clobber (reg:CC FLAGS_REG))]
20485 "TARGET_TBM"
20486 "tzmsk\t{%1, %0|%0, %1}"
20487 [(set_attr "type" "bitmanip")
20488 (set_attr "mode" "<MODE>")])
20489
20490 (define_insn_and_split "popcount<mode>2"
20491 [(set (match_operand:SWI48 0 "register_operand" "=r")
20492 (popcount:SWI48
20493 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
20494 (clobber (reg:CC FLAGS_REG))]
20495 "TARGET_POPCNT"
20496 {
20497 #if TARGET_MACHO
20498 return "popcnt\t{%1, %0|%0, %1}";
20499 #else
20500 return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
20501 #endif
20502 }
20503 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
20504 && optimize_function_for_speed_p (cfun)
20505 && !reg_mentioned_p (operands[0], operands[1])"
20506 [(parallel
20507 [(set (match_dup 0)
20508 (popcount:SWI48 (match_dup 1)))
20509 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
20510 (clobber (reg:CC FLAGS_REG))])]
20511 "ix86_expand_clear (operands[0]);"
20512 [(set_attr "prefix_rep" "1")
20513 (set_attr "type" "bitmanip")
20514 (set_attr "mode" "<MODE>")])
20515
20516 ; False dependency happens when destination is only updated by tzcnt,
20517 ; lzcnt or popcnt. There is no false dependency when destination is
20518 ; also used in source.
20519 (define_insn "*popcount<mode>2_falsedep"
20520 [(set (match_operand:SWI48 0 "register_operand" "=r")
20521 (popcount:SWI48
20522 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
20523 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
20524 UNSPEC_INSN_FALSE_DEP)
20525 (clobber (reg:CC FLAGS_REG))]
20526 "TARGET_POPCNT"
20527 {
20528 #if TARGET_MACHO
20529 return "popcnt\t{%1, %0|%0, %1}";
20530 #else
20531 return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
20532 #endif
20533 }
20534 [(set_attr "prefix_rep" "1")
20535 (set_attr "type" "bitmanip")
20536 (set_attr "mode" "<MODE>")])
20537
20538 (define_insn_and_split "*popcountsi2_zext"
20539 [(set (match_operand:DI 0 "register_operand" "=r")
20540 (and:DI
20541 (subreg:DI
20542 (popcount:SI
20543 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
20544 (const_int 63)))
20545 (clobber (reg:CC FLAGS_REG))]
20546 "TARGET_POPCNT && TARGET_64BIT"
20547 {
20548 #if TARGET_MACHO
20549 return "popcnt\t{%1, %k0|%k0, %1}";
20550 #else
20551 return "popcnt{l}\t{%1, %k0|%k0, %1}";
20552 #endif
20553 }
20554 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
20555 && optimize_function_for_speed_p (cfun)
20556 && !reg_mentioned_p (operands[0], operands[1])"
20557 [(parallel
20558 [(set (match_dup 0)
20559 (and:DI (subreg:DI (popcount:SI (match_dup 1)) 0) (const_int 63)))
20560 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
20561 (clobber (reg:CC FLAGS_REG))])]
20562 "ix86_expand_clear (operands[0]);"
20563 [(set_attr "prefix_rep" "1")
20564 (set_attr "type" "bitmanip")
20565 (set_attr "mode" "SI")])
20566
20567 ; False dependency happens when destination is only updated by tzcnt,
20568 ; lzcnt or popcnt. There is no false dependency when destination is
20569 ; also used in source.
20570 (define_insn "*popcountsi2_zext_falsedep"
20571 [(set (match_operand:DI 0 "register_operand" "=r")
20572 (and:DI
20573 (subreg:DI
20574 (popcount:SI
20575 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
20576 (const_int 63)))
20577 (unspec [(match_operand:DI 2 "register_operand" "0")]
20578 UNSPEC_INSN_FALSE_DEP)
20579 (clobber (reg:CC FLAGS_REG))]
20580 "TARGET_POPCNT && TARGET_64BIT"
20581 {
20582 #if TARGET_MACHO
20583 return "popcnt\t{%1, %k0|%k0, %1}";
20584 #else
20585 return "popcnt{l}\t{%1, %k0|%k0, %1}";
20586 #endif
20587 }
20588 [(set_attr "prefix_rep" "1")
20589 (set_attr "type" "bitmanip")
20590 (set_attr "mode" "SI")])
20591
20592 (define_insn_and_split "*popcountsi2_zext_2"
20593 [(set (match_operand:DI 0 "register_operand" "=r")
20594 (zero_extend:DI
20595 (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
20596 (clobber (reg:CC FLAGS_REG))]
20597 "TARGET_POPCNT && TARGET_64BIT"
20598 {
20599 #if TARGET_MACHO
20600 return "popcnt\t{%1, %k0|%k0, %1}";
20601 #else
20602 return "popcnt{l}\t{%1, %k0|%k0, %1}";
20603 #endif
20604 }
20605 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
20606 && optimize_function_for_speed_p (cfun)
20607 && !reg_mentioned_p (operands[0], operands[1])"
20608 [(parallel
20609 [(set (match_dup 0)
20610 (zero_extend:DI (popcount:SI (match_dup 1))))
20611 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
20612 (clobber (reg:CC FLAGS_REG))])]
20613 "ix86_expand_clear (operands[0]);"
20614 [(set_attr "prefix_rep" "1")
20615 (set_attr "type" "bitmanip")
20616 (set_attr "mode" "SI")])
20617
20618 ; False dependency happens when destination is only updated by tzcnt,
20619 ; lzcnt or popcnt. There is no false dependency when destination is
20620 ; also used in source.
20621 (define_insn "*popcountsi2_zext_2_falsedep"
20622 [(set (match_operand:DI 0 "register_operand" "=r")
20623 (zero_extend:DI
20624 (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
20625 (unspec [(match_operand:DI 2 "register_operand" "0")]
20626 UNSPEC_INSN_FALSE_DEP)
20627 (clobber (reg:CC FLAGS_REG))]
20628 "TARGET_POPCNT && TARGET_64BIT"
20629 {
20630 #if TARGET_MACHO
20631 return "popcnt\t{%1, %k0|%k0, %1}";
20632 #else
20633 return "popcnt{l}\t{%1, %k0|%k0, %1}";
20634 #endif
20635 }
20636 [(set_attr "prefix_rep" "1")
20637 (set_attr "type" "bitmanip")
20638 (set_attr "mode" "SI")])
20639
20640 (define_insn_and_split "*popcounthi2_1"
20641 [(set (match_operand:SI 0 "register_operand")
20642 (popcount:SI
20643 (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand"))))
20644 (clobber (reg:CC FLAGS_REG))]
20645 "TARGET_POPCNT
20646 && ix86_pre_reload_split ()"
20647 "#"
20648 "&& 1"
20649 [(const_int 0)]
20650 {
20651 rtx tmp = gen_reg_rtx (HImode);
20652
20653 emit_insn (gen_popcounthi2 (tmp, operands[1]));
20654 emit_insn (gen_zero_extendhisi2 (operands[0], tmp));
20655 DONE;
20656 })
20657
20658 (define_insn_and_split "*popcounthi2_2"
20659 [(set (match_operand:SI 0 "register_operand")
20660 (zero_extend:SI
20661 (popcount:HI (match_operand:HI 1 "nonimmediate_operand"))))
20662 (clobber (reg:CC FLAGS_REG))]
20663 "TARGET_POPCNT
20664 && ix86_pre_reload_split ()"
20665 "#"
20666 "&& 1"
20667 [(const_int 0)]
20668 {
20669 rtx tmp = gen_reg_rtx (HImode);
20670
20671 emit_insn (gen_popcounthi2 (tmp, operands[1]));
20672 emit_insn (gen_zero_extendhisi2 (operands[0], tmp));
20673 DONE;
20674 })
20675
20676 (define_insn "popcounthi2"
20677 [(set (match_operand:HI 0 "register_operand" "=r")
20678 (popcount:HI
20679 (match_operand:HI 1 "nonimmediate_operand" "rm")))
20680 (clobber (reg:CC FLAGS_REG))]
20681 "TARGET_POPCNT"
20682 {
20683 #if TARGET_MACHO
20684 return "popcnt\t{%1, %0|%0, %1}";
20685 #else
20686 return "popcnt{w}\t{%1, %0|%0, %1}";
20687 #endif
20688 }
20689 [(set_attr "prefix_rep" "1")
20690 (set_attr "type" "bitmanip")
20691 (set_attr "mode" "HI")])
20692
20693 (define_expand "bswapdi2"
20694 [(set (match_operand:DI 0 "register_operand")
20695 (bswap:DI (match_operand:DI 1 "nonimmediate_operand")))]
20696 "TARGET_64BIT"
20697 {
20698 if (!TARGET_MOVBE)
20699 operands[1] = force_reg (DImode, operands[1]);
20700 })
20701
20702 (define_expand "bswapsi2"
20703 [(set (match_operand:SI 0 "register_operand")
20704 (bswap:SI (match_operand:SI 1 "nonimmediate_operand")))]
20705 ""
20706 {
20707 if (TARGET_MOVBE)
20708 ;
20709 else if (TARGET_BSWAP)
20710 operands[1] = force_reg (SImode, operands[1]);
20711 else
20712 {
20713 rtx x = operands[0];
20714
20715 emit_move_insn (x, operands[1]);
20716 emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
20717 emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
20718 emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
20719 DONE;
20720 }
20721 })
20722
20723 (define_insn "*bswap<mode>2_movbe"
20724 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,m")
20725 (bswap:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,m,r")))]
20726 "TARGET_MOVBE
20727 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
20728 "@
20729 bswap\t%0
20730 movbe{<imodesuffix>}\t{%1, %0|%0, %1}
20731 movbe{<imodesuffix>}\t{%1, %0|%0, %1}"
20732 [(set_attr "type" "bitmanip,imov,imov")
20733 (set_attr "modrm" "0,1,1")
20734 (set_attr "prefix_0f" "*,1,1")
20735 (set_attr "prefix_extra" "*,1,1")
20736 (set_attr "mode" "<MODE>")])
20737
20738 (define_insn "*bswap<mode>2"
20739 [(set (match_operand:SWI48 0 "register_operand" "=r")
20740 (bswap:SWI48 (match_operand:SWI48 1 "register_operand" "0")))]
20741 "TARGET_BSWAP"
20742 "bswap\t%0"
20743 [(set_attr "type" "bitmanip")
20744 (set_attr "modrm" "0")
20745 (set_attr "mode" "<MODE>")])
20746
20747 (define_expand "bswaphi2"
20748 [(set (match_operand:HI 0 "register_operand")
20749 (bswap:HI (match_operand:HI 1 "nonimmediate_operand")))]
20750 "TARGET_MOVBE")
20751
20752 (define_insn "*bswaphi2_movbe"
20753 [(set (match_operand:HI 0 "nonimmediate_operand" "=Q,r,m")
20754 (bswap:HI (match_operand:HI 1 "nonimmediate_operand" "0,m,r")))]
20755 "TARGET_MOVBE
20756 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
20757 "@
20758 xchg{b}\t{%h0, %b0|%b0, %h0}
20759 movbe{w}\t{%1, %0|%0, %1}
20760 movbe{w}\t{%1, %0|%0, %1}"
20761 [(set_attr "type" "imov")
20762 (set_attr "modrm" "*,1,1")
20763 (set_attr "prefix_0f" "*,1,1")
20764 (set_attr "prefix_extra" "*,1,1")
20765 (set_attr "pent_pair" "np,*,*")
20766 (set_attr "athlon_decode" "vector,*,*")
20767 (set_attr "amdfam10_decode" "double,*,*")
20768 (set_attr "bdver1_decode" "double,*,*")
20769 (set_attr "mode" "QI,HI,HI")])
20770
20771 (define_peephole2
20772 [(set (match_operand:HI 0 "general_reg_operand")
20773 (bswap:HI (match_dup 0)))]
20774 "TARGET_MOVBE
20775 && !(TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))
20776 && peep2_regno_dead_p (0, FLAGS_REG)"
20777 [(parallel [(set (match_dup 0) (rotate:HI (match_dup 0) (const_int 8)))
20778 (clobber (reg:CC FLAGS_REG))])])
20779
20780 (define_insn "bswaphi_lowpart"
20781 [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r"))
20782 (bswap:HI (match_dup 0)))
20783 (clobber (reg:CC FLAGS_REG))]
20784 ""
20785 "@
20786 xchg{b}\t{%h0, %b0|%b0, %h0}
20787 rol{w}\t{$8, %0|%0, 8}"
20788 [(set (attr "preferred_for_size")
20789 (cond [(eq_attr "alternative" "0")
20790 (symbol_ref "true")]
20791 (symbol_ref "false")))
20792 (set (attr "preferred_for_speed")
20793 (cond [(eq_attr "alternative" "0")
20794 (symbol_ref "TARGET_USE_XCHGB")]
20795 (symbol_ref "!TARGET_USE_XCHGB")))
20796 (set_attr "length" "2,4")
20797 (set_attr "mode" "QI,HI")])
20798
20799 (define_expand "paritydi2"
20800 [(set (match_operand:DI 0 "register_operand")
20801 (parity:DI (match_operand:DI 1 "register_operand")))]
20802 "! TARGET_POPCNT"
20803 {
20804 rtx scratch = gen_reg_rtx (QImode);
20805 rtx hipart1 = gen_reg_rtx (SImode);
20806 rtx lopart1 = gen_reg_rtx (SImode);
20807 rtx xor1 = gen_reg_rtx (SImode);
20808 rtx shift2 = gen_reg_rtx (SImode);
20809 rtx hipart2 = gen_reg_rtx (HImode);
20810 rtx lopart2 = gen_reg_rtx (HImode);
20811 rtx xor2 = gen_reg_rtx (HImode);
20812
20813 if (TARGET_64BIT)
20814 {
20815 rtx shift1 = gen_reg_rtx (DImode);
20816 emit_insn (gen_lshrdi3 (shift1, operands[1], GEN_INT (32)));
20817 emit_move_insn (hipart1, gen_lowpart (SImode, shift1));
20818 }
20819 else
20820 emit_move_insn (hipart1, gen_highpart (SImode, operands[1]));
20821
20822 emit_move_insn (lopart1, gen_lowpart (SImode, operands[1]));
20823 emit_insn (gen_xorsi3 (xor1, hipart1, lopart1));
20824
20825 emit_insn (gen_lshrsi3 (shift2, xor1, GEN_INT (16)));
20826 emit_move_insn (hipart2, gen_lowpart (HImode, shift2));
20827 emit_move_insn (lopart2, gen_lowpart (HImode, xor1));
20828 emit_insn (gen_xorhi3 (xor2, hipart2, lopart2));
20829
20830 emit_insn (gen_parityhi2_cmp (xor2));
20831
20832 ix86_expand_setcc (scratch, ORDERED,
20833 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
20834
20835 if (TARGET_64BIT)
20836 emit_insn (gen_zero_extendqidi2 (operands[0], scratch));
20837 else
20838 {
20839 rtx tmp = gen_reg_rtx (SImode);
20840
20841 emit_insn (gen_zero_extendqisi2 (tmp, scratch));
20842 emit_insn (gen_zero_extendsidi2 (operands[0], tmp));
20843 }
20844 DONE;
20845 })
20846
20847 (define_expand "paritysi2"
20848 [(set (match_operand:SI 0 "register_operand")
20849 (parity:SI (match_operand:SI 1 "register_operand")))]
20850 "! TARGET_POPCNT"
20851 {
20852 rtx scratch = gen_reg_rtx (QImode);
20853 rtx shift = gen_reg_rtx (SImode);
20854 rtx hipart = gen_reg_rtx (HImode);
20855 rtx lopart = gen_reg_rtx (HImode);
20856 rtx tmp = gen_reg_rtx (HImode);
20857
20858 emit_insn (gen_lshrsi3 (shift, operands[1], GEN_INT (16)));
20859 emit_move_insn (hipart, gen_lowpart (HImode, shift));
20860 emit_move_insn (lopart, gen_lowpart (HImode, operands[1]));
20861 emit_insn (gen_xorhi3 (tmp, hipart, lopart));
20862
20863 emit_insn (gen_parityhi2_cmp (tmp));
20864
20865 ix86_expand_setcc (scratch, ORDERED,
20866 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
20867
20868 emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
20869 DONE;
20870 })
20871
20872 (define_expand "parityhi2"
20873 [(set (match_operand:HI 0 "register_operand")
20874 (parity:HI (match_operand:HI 1 "register_operand")))]
20875 "! TARGET_POPCNT"
20876 {
20877 rtx scratch = gen_reg_rtx (QImode);
20878 rtx tmp = gen_reg_rtx (HImode);
20879
20880 emit_move_insn (tmp, operands[1]);
20881 emit_insn (gen_parityhi2_cmp (tmp));
20882
20883 ix86_expand_setcc (scratch, ORDERED,
20884 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
20885
20886 emit_insn (gen_zero_extendqihi2 (operands[0], scratch));
20887 DONE;
20888 })
20889
20890 (define_expand "parityqi2"
20891 [(set (match_operand:QI 0 "register_operand")
20892 (parity:QI (match_operand:QI 1 "register_operand")))]
20893 "! TARGET_POPCNT"
20894 {
20895 emit_insn (gen_parityqi2_cmp (operands[1]));
20896
20897 ix86_expand_setcc (operands[0], ORDERED,
20898 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
20899 DONE;
20900 })
20901
20902 (define_insn "parityhi2_cmp"
20903 [(set (reg:CC FLAGS_REG)
20904 (unspec:CC [(match_operand:HI 0 "register_operand" "+Q")]
20905 UNSPEC_PARITY))
20906 (clobber (match_dup 0))]
20907 ""
20908 "xor{b}\t{%h0, %b0|%b0, %h0}"
20909 [(set_attr "length" "2")
20910 (set_attr "mode" "QI")])
20911
20912 (define_insn "parityqi2_cmp"
20913 [(set (reg:CC FLAGS_REG)
20914 (unspec:CC [(match_operand:QI 0 "register_operand" "q")]
20915 UNSPEC_PARITY))]
20916 ""
20917 "test{b}\t%0, %0"
20918 [(set_attr "mode" "QI")])
20919
20920 ;; Replace zero_extend:HI followed by parityhi2_cmp with parityqi2_cmp
20921 (define_peephole2
20922 [(set (match_operand:HI 0 "register_operand")
20923 (zero_extend:HI (match_operand:QI 1 "general_reg_operand")))
20924 (parallel [(set (reg:CC FLAGS_REG)
20925 (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
20926 (clobber (match_dup 0))])]
20927 ""
20928 [(set (reg:CC FLAGS_REG)
20929 (unspec:CC [(match_dup 1)] UNSPEC_PARITY))])
20930
20931 ;; Eliminate QImode popcount&1 using parity flag
20932 (define_peephole2
20933 [(set (match_operand:SI 0 "register_operand")
20934 (zero_extend:SI (match_operand:QI 1 "general_reg_operand")))
20935 (parallel [(set (match_operand:SI 2 "register_operand")
20936 (popcount:SI (match_dup 0)))
20937 (clobber (reg:CC FLAGS_REG))])
20938 (set (reg:CCZ FLAGS_REG)
20939 (compare:CCZ (and:QI (match_operand:QI 3 "register_operand")
20940 (const_int 1))
20941 (const_int 0)))
20942 (set (pc) (if_then_else (match_operator 4 "bt_comparison_operator"
20943 [(reg:CCZ FLAGS_REG)
20944 (const_int 0)])
20945 (label_ref (match_operand 5))
20946 (pc)))]
20947 "REGNO (operands[2]) == REGNO (operands[3])
20948 && peep2_reg_dead_p (3, operands[0])
20949 && peep2_reg_dead_p (3, operands[2])
20950 && peep2_regno_dead_p (4, FLAGS_REG)"
20951 [(set (reg:CC FLAGS_REG)
20952 (unspec:CC [(match_dup 1)] UNSPEC_PARITY))
20953 (set (pc) (if_then_else (match_op_dup 4 [(reg:CC FLAGS_REG)
20954 (const_int 0)])
20955 (label_ref (match_dup 5))
20956 (pc)))]
20957 {
20958 operands[4] = shallow_copy_rtx (operands[4]);
20959 PUT_CODE (operands[4], GET_CODE (operands[4]) == EQ ? UNORDERED : ORDERED);
20960 })
20961
20962 ;; Eliminate HImode popcount&1 using parity flag
20963 (define_peephole2
20964 [(match_scratch:HI 0 "Q")
20965 (parallel [(set (match_operand:HI 1 "register_operand")
20966 (popcount:HI
20967 (match_operand:HI 2 "nonimmediate_operand")))
20968 (clobber (reg:CC FLAGS_REG))])
20969 (set (match_operand 3 "register_operand")
20970 (zero_extend (match_dup 1)))
20971 (set (reg:CCZ FLAGS_REG)
20972 (compare:CCZ (and:QI (match_operand:QI 4 "register_operand")
20973 (const_int 1))
20974 (const_int 0)))
20975 (set (pc) (if_then_else (match_operator 5 "bt_comparison_operator"
20976 [(reg:CCZ FLAGS_REG)
20977 (const_int 0)])
20978 (label_ref (match_operand 6))
20979 (pc)))]
20980 "REGNO (operands[3]) == REGNO (operands[4])
20981 && peep2_reg_dead_p (3, operands[1])
20982 && peep2_reg_dead_p (3, operands[3])
20983 && peep2_regno_dead_p (4, FLAGS_REG)"
20984 [(set (match_dup 0) (match_dup 2))
20985 (parallel [(set (reg:CC FLAGS_REG)
20986 (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
20987 (clobber (match_dup 0))])
20988 (set (pc) (if_then_else (match_op_dup 5 [(reg:CC FLAGS_REG)
20989 (const_int 0)])
20990 (label_ref (match_dup 6))
20991 (pc)))]
20992 {
20993 operands[5] = shallow_copy_rtx (operands[5]);
20994 PUT_CODE (operands[5], GET_CODE (operands[5]) == EQ ? UNORDERED : ORDERED);
20995 })
20996
20997 ;; Eliminate HImode popcount&1 using parity flag (variant 2)
20998 (define_peephole2
20999 [(match_scratch:HI 0 "Q")
21000 (parallel [(set (match_operand:HI 1 "register_operand")
21001 (popcount:HI
21002 (match_operand:HI 2 "nonimmediate_operand")))
21003 (clobber (reg:CC FLAGS_REG))])
21004 (set (reg:CCZ FLAGS_REG)
21005 (compare:CCZ (and:QI (match_operand:QI 3 "register_operand")
21006 (const_int 1))
21007 (const_int 0)))
21008 (set (pc) (if_then_else (match_operator 4 "bt_comparison_operator"
21009 [(reg:CCZ FLAGS_REG)
21010 (const_int 0)])
21011 (label_ref (match_operand 5))
21012 (pc)))]
21013 "REGNO (operands[1]) == REGNO (operands[3])
21014 && peep2_reg_dead_p (2, operands[1])
21015 && peep2_reg_dead_p (2, operands[3])
21016 && peep2_regno_dead_p (3, FLAGS_REG)"
21017 [(set (match_dup 0) (match_dup 2))
21018 (parallel [(set (reg:CC FLAGS_REG)
21019 (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
21020 (clobber (match_dup 0))])
21021 (set (pc) (if_then_else (match_op_dup 4 [(reg:CC FLAGS_REG)
21022 (const_int 0)])
21023 (label_ref (match_dup 5))
21024 (pc)))]
21025 {
21026 operands[4] = shallow_copy_rtx (operands[4]);
21027 PUT_CODE (operands[4], GET_CODE (operands[4]) == EQ ? UNORDERED : ORDERED);
21028 })
21029
21030 \f
21031 ;; Thread-local storage patterns for ELF.
21032 ;;
21033 ;; Note that these code sequences must appear exactly as shown
21034 ;; in order to allow linker relaxation.
21035
21036 (define_insn "*tls_global_dynamic_32_gnu"
21037 [(set (match_operand:SI 0 "register_operand" "=a")
21038 (unspec:SI
21039 [(match_operand:SI 1 "register_operand" "Yb")
21040 (match_operand 2 "tls_symbolic_operand")
21041 (match_operand 3 "constant_call_address_operand" "Bz")
21042 (reg:SI SP_REG)]
21043 UNSPEC_TLS_GD))
21044 (clobber (match_scratch:SI 4 "=d"))
21045 (clobber (match_scratch:SI 5 "=c"))
21046 (clobber (reg:CC FLAGS_REG))]
21047 "!TARGET_64BIT && TARGET_GNU_TLS"
21048 {
21049 if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
21050 output_asm_insn
21051 ("lea{l}\t{%E2@tlsgd(,%1,1), %0|%0, %E2@tlsgd[%1*1]}", operands);
21052 else
21053 output_asm_insn
21054 ("lea{l}\t{%E2@tlsgd(%1), %0|%0, %E2@tlsgd[%1]}", operands);
21055 if (TARGET_SUN_TLS)
21056 #ifdef HAVE_AS_IX86_TLSGDPLT
21057 return "call\t%a2@tlsgdplt";
21058 #else
21059 return "call\t%p3@plt";
21060 #endif
21061 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
21062 return "call\t%P3";
21063 return "call\t{*%p3@GOT(%1)|[DWORD PTR %p3@GOT[%1]]}";
21064 }
21065 [(set_attr "type" "multi")
21066 (set_attr "length" "12")])
21067
21068 (define_expand "tls_global_dynamic_32"
21069 [(parallel
21070 [(set (match_operand:SI 0 "register_operand")
21071 (unspec:SI [(match_operand:SI 2 "register_operand")
21072 (match_operand 1 "tls_symbolic_operand")
21073 (match_operand 3 "constant_call_address_operand")
21074 (reg:SI SP_REG)]
21075 UNSPEC_TLS_GD))
21076 (clobber (scratch:SI))
21077 (clobber (scratch:SI))
21078 (clobber (reg:CC FLAGS_REG))])]
21079 ""
21080 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
21081
21082 (define_insn "*tls_global_dynamic_64_<mode>"
21083 [(set (match_operand:P 0 "register_operand" "=a")
21084 (call:P
21085 (mem:QI (match_operand 2 "constant_call_address_operand" "Bz"))
21086 (match_operand 3)))
21087 (unspec:P [(match_operand 1 "tls_symbolic_operand")
21088 (reg:P SP_REG)]
21089 UNSPEC_TLS_GD)]
21090 "TARGET_64BIT"
21091 {
21092 if (!TARGET_X32)
21093 /* The .loc directive has effect for 'the immediately following assembly
21094 instruction'. So for a sequence:
21095 .loc f l
21096 .byte x
21097 insn1
21098 the 'immediately following assembly instruction' is insn1.
21099 We want to emit an insn prefix here, but if we use .byte (as shown in
21100 'ELF Handling For Thread-Local Storage'), a preceding .loc will point
21101 inside the insn sequence, rather than to the start. After relaxation
21102 of the sequence by the linker, the .loc might point inside an insn.
21103 Use data16 prefix instead, which doesn't have this problem. */
21104 fputs ("\tdata16", asm_out_file);
21105 output_asm_insn
21106 ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
21107 if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
21108 fputs (ASM_SHORT "0x6666\n", asm_out_file);
21109 else
21110 fputs (ASM_BYTE "0x66\n", asm_out_file);
21111 fputs ("\trex64\n", asm_out_file);
21112 if (TARGET_SUN_TLS)
21113 return "call\t%p2@plt";
21114 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
21115 return "call\t%P2";
21116 return "call\t{*%p2@GOTPCREL(%%rip)|[QWORD PTR %p2@GOTPCREL[rip]]}";
21117 }
21118 [(set_attr "type" "multi")
21119 (set (attr "length")
21120 (symbol_ref "TARGET_X32 ? 15 : 16"))])
21121
21122 (define_insn "*tls_global_dynamic_64_largepic"
21123 [(set (match_operand:DI 0 "register_operand" "=a")
21124 (call:DI
21125 (mem:QI (plus:DI (match_operand:DI 2 "register_operand" "b")
21126 (match_operand:DI 3 "immediate_operand" "i")))
21127 (match_operand 4)))
21128 (unspec:DI [(match_operand 1 "tls_symbolic_operand")
21129 (reg:DI SP_REG)]
21130 UNSPEC_TLS_GD)]
21131 "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
21132 && GET_CODE (operands[3]) == CONST
21133 && GET_CODE (XEXP (operands[3], 0)) == UNSPEC
21134 && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF"
21135 {
21136 output_asm_insn
21137 ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
21138 output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands);
21139 output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands);
21140 return "call\t{*%%rax|rax}";
21141 }
21142 [(set_attr "type" "multi")
21143 (set_attr "length" "22")])
21144
21145 (define_expand "@tls_global_dynamic_64_<mode>"
21146 [(parallel
21147 [(set (match_operand:P 0 "register_operand")
21148 (call:P
21149 (mem:QI (match_operand 2))
21150 (const_int 0)))
21151 (unspec:P [(match_operand 1 "tls_symbolic_operand")
21152 (reg:P SP_REG)]
21153 UNSPEC_TLS_GD)])]
21154 "TARGET_64BIT"
21155 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
21156
21157 (define_insn "*tls_local_dynamic_base_32_gnu"
21158 [(set (match_operand:SI 0 "register_operand" "=a")
21159 (unspec:SI
21160 [(match_operand:SI 1 "register_operand" "Yb")
21161 (match_operand 2 "constant_call_address_operand" "Bz")
21162 (reg:SI SP_REG)]
21163 UNSPEC_TLS_LD_BASE))
21164 (clobber (match_scratch:SI 3 "=d"))
21165 (clobber (match_scratch:SI 4 "=c"))
21166 (clobber (reg:CC FLAGS_REG))]
21167 "!TARGET_64BIT && TARGET_GNU_TLS"
21168 {
21169 output_asm_insn
21170 ("lea{l}\t{%&@tlsldm(%1), %0|%0, %&@tlsldm[%1]}", operands);
21171 if (TARGET_SUN_TLS)
21172 {
21173 if (HAVE_AS_IX86_TLSLDMPLT)
21174 return "call\t%&@tlsldmplt";
21175 else
21176 return "call\t%p2@plt";
21177 }
21178 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
21179 return "call\t%P2";
21180 return "call\t{*%p2@GOT(%1)|[DWORD PTR %p2@GOT[%1]]}";
21181 }
21182 [(set_attr "type" "multi")
21183 (set_attr "length" "11")])
21184
21185 (define_expand "tls_local_dynamic_base_32"
21186 [(parallel
21187 [(set (match_operand:SI 0 "register_operand")
21188 (unspec:SI
21189 [(match_operand:SI 1 "register_operand")
21190 (match_operand 2 "constant_call_address_operand")
21191 (reg:SI SP_REG)]
21192 UNSPEC_TLS_LD_BASE))
21193 (clobber (scratch:SI))
21194 (clobber (scratch:SI))
21195 (clobber (reg:CC FLAGS_REG))])]
21196 ""
21197 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
21198
21199 (define_insn "*tls_local_dynamic_base_64_<mode>"
21200 [(set (match_operand:P 0 "register_operand" "=a")
21201 (call:P
21202 (mem:QI (match_operand 1 "constant_call_address_operand" "Bz"))
21203 (match_operand 2)))
21204 (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)]
21205 "TARGET_64BIT"
21206 {
21207 output_asm_insn
21208 ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
21209 if (TARGET_SUN_TLS)
21210 return "call\t%p1@plt";
21211 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
21212 return "call\t%P1";
21213 return "call\t{*%p1@GOTPCREL(%%rip)|[QWORD PTR %p1@GOTPCREL[rip]]}";
21214 }
21215 [(set_attr "type" "multi")
21216 (set_attr "length" "12")])
21217
21218 (define_insn "*tls_local_dynamic_base_64_largepic"
21219 [(set (match_operand:DI 0 "register_operand" "=a")
21220 (call:DI
21221 (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b")
21222 (match_operand:DI 2 "immediate_operand" "i")))
21223 (match_operand 3)))
21224 (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)]
21225 "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
21226 && GET_CODE (operands[2]) == CONST
21227 && GET_CODE (XEXP (operands[2], 0)) == UNSPEC
21228 && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF"
21229 {
21230 output_asm_insn
21231 ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
21232 output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands);
21233 output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands);
21234 return "call\t{*%%rax|rax}";
21235 }
21236 [(set_attr "type" "multi")
21237 (set_attr "length" "22")])
21238
21239 (define_expand "@tls_local_dynamic_base_64_<mode>"
21240 [(parallel
21241 [(set (match_operand:P 0 "register_operand")
21242 (call:P
21243 (mem:QI (match_operand 1))
21244 (const_int 0)))
21245 (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)])]
21246 "TARGET_64BIT"
21247 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
21248
21249 ;; Local dynamic of a single variable is a lose. Show combine how
21250 ;; to convert that back to global dynamic.
21251
21252 (define_insn_and_split "*tls_local_dynamic_32_once"
21253 [(set (match_operand:SI 0 "register_operand" "=a")
21254 (plus:SI
21255 (unspec:SI [(match_operand:SI 1 "register_operand" "b")
21256 (match_operand 2 "constant_call_address_operand" "Bz")
21257 (reg:SI SP_REG)]
21258 UNSPEC_TLS_LD_BASE)
21259 (const:SI (unspec:SI
21260 [(match_operand 3 "tls_symbolic_operand")]
21261 UNSPEC_DTPOFF))))
21262 (clobber (match_scratch:SI 4 "=d"))
21263 (clobber (match_scratch:SI 5 "=c"))
21264 (clobber (reg:CC FLAGS_REG))]
21265 ""
21266 "#"
21267 ""
21268 [(parallel
21269 [(set (match_dup 0)
21270 (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)
21271 (reg:SI SP_REG)]
21272 UNSPEC_TLS_GD))
21273 (clobber (match_dup 4))
21274 (clobber (match_dup 5))
21275 (clobber (reg:CC FLAGS_REG))])])
21276
21277 ;; Load and add the thread base pointer from %<tp_seg>:0.
21278 (define_expand "get_thread_pointer<mode>"
21279 [(set (match_operand:PTR 0 "register_operand")
21280 (unspec:PTR [(const_int 0)] UNSPEC_TP))]
21281 ""
21282 {
21283 /* targetm is not visible in the scope of the condition. */
21284 if (!targetm.have_tls)
21285 error ("%<__builtin_thread_pointer%> is not supported on this target");
21286 })
21287
21288 (define_insn_and_split "*load_tp_<mode>"
21289 [(set (match_operand:PTR 0 "register_operand" "=r")
21290 (unspec:PTR [(const_int 0)] UNSPEC_TP))]
21291 ""
21292 "#"
21293 ""
21294 [(set (match_dup 0)
21295 (match_dup 1))]
21296 {
21297 addr_space_t as = DEFAULT_TLS_SEG_REG;
21298
21299 operands[1] = gen_const_mem (<MODE>mode, const0_rtx);
21300 set_mem_addr_space (operands[1], as);
21301 })
21302
21303 (define_insn_and_split "*load_tp_x32_zext"
21304 [(set (match_operand:DI 0 "register_operand" "=r")
21305 (zero_extend:DI
21306 (unspec:SI [(const_int 0)] UNSPEC_TP)))]
21307 "TARGET_X32"
21308 "#"
21309 "&& 1"
21310 [(set (match_dup 0)
21311 (zero_extend:DI (match_dup 1)))]
21312 {
21313 addr_space_t as = DEFAULT_TLS_SEG_REG;
21314
21315 operands[1] = gen_const_mem (SImode, const0_rtx);
21316 set_mem_addr_space (operands[1], as);
21317 })
21318
21319 (define_insn_and_split "*add_tp_<mode>"
21320 [(set (match_operand:PTR 0 "register_operand" "=r")
21321 (plus:PTR
21322 (unspec:PTR [(const_int 0)] UNSPEC_TP)
21323 (match_operand:PTR 1 "register_operand" "0")))
21324 (clobber (reg:CC FLAGS_REG))]
21325 ""
21326 "#"
21327 ""
21328 [(parallel
21329 [(set (match_dup 0)
21330 (plus:PTR (match_dup 1) (match_dup 2)))
21331 (clobber (reg:CC FLAGS_REG))])]
21332 {
21333 addr_space_t as = DEFAULT_TLS_SEG_REG;
21334
21335 operands[2] = gen_const_mem (<MODE>mode, const0_rtx);
21336 set_mem_addr_space (operands[2], as);
21337 })
21338
21339 (define_insn_and_split "*add_tp_x32_zext"
21340 [(set (match_operand:DI 0 "register_operand" "=r")
21341 (zero_extend:DI
21342 (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
21343 (match_operand:SI 1 "register_operand" "0"))))
21344 (clobber (reg:CC FLAGS_REG))]
21345 "TARGET_X32"
21346 "#"
21347 "&& 1"
21348 [(parallel
21349 [(set (match_dup 0)
21350 (zero_extend:DI
21351 (plus:SI (match_dup 1) (match_dup 2))))
21352 (clobber (reg:CC FLAGS_REG))])]
21353 {
21354 addr_space_t as = DEFAULT_TLS_SEG_REG;
21355
21356 operands[2] = gen_const_mem (SImode, const0_rtx);
21357 set_mem_addr_space (operands[2], as);
21358 })
21359
21360 ;; The Sun linker took the AMD64 TLS spec literally and can only handle
21361 ;; %rax as destination of the initial executable code sequence.
21362 (define_insn "tls_initial_exec_64_sun"
21363 [(set (match_operand:DI 0 "register_operand" "=a")
21364 (unspec:DI
21365 [(match_operand 1 "tls_symbolic_operand")]
21366 UNSPEC_TLS_IE_SUN))
21367 (clobber (reg:CC FLAGS_REG))]
21368 "TARGET_64BIT && TARGET_SUN_TLS"
21369 {
21370 output_asm_insn
21371 ("mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}", operands);
21372 return "add{q}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}";
21373 }
21374 [(set_attr "type" "multi")])
21375
21376 ;; GNU2 TLS patterns can be split.
21377
21378 (define_expand "tls_dynamic_gnu2_32"
21379 [(set (match_dup 3)
21380 (plus:SI (match_operand:SI 2 "register_operand")
21381 (const:SI
21382 (unspec:SI [(match_operand 1 "tls_symbolic_operand")]
21383 UNSPEC_TLSDESC))))
21384 (parallel
21385 [(set (match_operand:SI 0 "register_operand")
21386 (unspec:SI [(match_dup 1) (match_dup 3)
21387 (match_dup 2) (reg:SI SP_REG)]
21388 UNSPEC_TLSDESC))
21389 (clobber (reg:CC FLAGS_REG))])]
21390 "!TARGET_64BIT && TARGET_GNU2_TLS"
21391 {
21392 operands[3] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
21393 ix86_tls_descriptor_calls_expanded_in_cfun = true;
21394 })
21395
21396 (define_insn "*tls_dynamic_gnu2_lea_32"
21397 [(set (match_operand:SI 0 "register_operand" "=r")
21398 (plus:SI (match_operand:SI 1 "register_operand" "b")
21399 (const:SI
21400 (unspec:SI [(match_operand 2 "tls_symbolic_operand")]
21401 UNSPEC_TLSDESC))))]
21402 "!TARGET_64BIT && TARGET_GNU2_TLS"
21403 "lea{l}\t{%E2@TLSDESC(%1), %0|%0, %E2@TLSDESC[%1]}"
21404 [(set_attr "type" "lea")
21405 (set_attr "mode" "SI")
21406 (set_attr "length" "6")
21407 (set_attr "length_address" "4")])
21408
21409 (define_insn "*tls_dynamic_gnu2_call_32"
21410 [(set (match_operand:SI 0 "register_operand" "=a")
21411 (unspec:SI [(match_operand 1 "tls_symbolic_operand")
21412 (match_operand:SI 2 "register_operand" "0")
21413 ;; we have to make sure %ebx still points to the GOT
21414 (match_operand:SI 3 "register_operand" "b")
21415 (reg:SI SP_REG)]
21416 UNSPEC_TLSDESC))
21417 (clobber (reg:CC FLAGS_REG))]
21418 "!TARGET_64BIT && TARGET_GNU2_TLS"
21419 "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}"
21420 [(set_attr "type" "call")
21421 (set_attr "length" "2")
21422 (set_attr "length_address" "0")])
21423
21424 (define_insn_and_split "*tls_dynamic_gnu2_combine_32"
21425 [(set (match_operand:SI 0 "register_operand" "=&a")
21426 (plus:SI
21427 (unspec:SI [(match_operand 3 "tls_modbase_operand")
21428 (match_operand:SI 4)
21429 (match_operand:SI 2 "register_operand" "b")
21430 (reg:SI SP_REG)]
21431 UNSPEC_TLSDESC)
21432 (const:SI (unspec:SI
21433 [(match_operand 1 "tls_symbolic_operand")]
21434 UNSPEC_DTPOFF))))
21435 (clobber (reg:CC FLAGS_REG))]
21436 "!TARGET_64BIT && TARGET_GNU2_TLS"
21437 "#"
21438 "&& 1"
21439 [(set (match_dup 0) (match_dup 5))]
21440 {
21441 operands[5] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
21442 emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2]));
21443 })
21444
21445 (define_expand "@tls_dynamic_gnu2_64_<mode>"
21446 [(set (match_dup 2)
21447 (unspec:PTR [(match_operand 1 "tls_symbolic_operand")]
21448 UNSPEC_TLSDESC))
21449 (parallel
21450 [(set (match_operand:PTR 0 "register_operand")
21451 (unspec:PTR [(match_dup 1) (match_dup 2) (reg:PTR SP_REG)]
21452 UNSPEC_TLSDESC))
21453 (clobber (reg:CC FLAGS_REG))])]
21454 "TARGET_64BIT && TARGET_GNU2_TLS"
21455 {
21456 operands[2] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
21457 ix86_tls_descriptor_calls_expanded_in_cfun = true;
21458 })
21459
21460 (define_insn "*tls_dynamic_gnu2_lea_64_<mode>"
21461 [(set (match_operand:PTR 0 "register_operand" "=r")
21462 (unspec:PTR [(match_operand 1 "tls_symbolic_operand")]
21463 UNSPEC_TLSDESC))]
21464 "TARGET_64BIT && TARGET_GNU2_TLS"
21465 "lea%z0\t{%E1@TLSDESC(%%rip), %0|%0, %E1@TLSDESC[rip]}"
21466 [(set_attr "type" "lea")
21467 (set_attr "mode" "<MODE>")
21468 (set_attr "length" "7")
21469 (set_attr "length_address" "4")])
21470
21471 (define_insn "*tls_dynamic_gnu2_call_64_<mode>"
21472 [(set (match_operand:PTR 0 "register_operand" "=a")
21473 (unspec:PTR [(match_operand 1 "tls_symbolic_operand")
21474 (match_operand:PTR 2 "register_operand" "0")
21475 (reg:PTR SP_REG)]
21476 UNSPEC_TLSDESC))
21477 (clobber (reg:CC FLAGS_REG))]
21478 "TARGET_64BIT && TARGET_GNU2_TLS"
21479 "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
21480 [(set_attr "type" "call")
21481 (set_attr "length" "2")
21482 (set_attr "length_address" "0")])
21483
21484 (define_insn_and_split "*tls_dynamic_gnu2_combine_64_<mode>"
21485 [(set (match_operand:PTR 0 "register_operand" "=&a")
21486 (plus:PTR
21487 (unspec:PTR [(match_operand 2 "tls_modbase_operand")
21488 (match_operand:PTR 3)
21489 (reg:PTR SP_REG)]
21490 UNSPEC_TLSDESC)
21491 (const:PTR (unspec:PTR
21492 [(match_operand 1 "tls_symbolic_operand")]
21493 UNSPEC_DTPOFF))))
21494 (clobber (reg:CC FLAGS_REG))]
21495 "TARGET_64BIT && TARGET_GNU2_TLS"
21496 "#"
21497 "&& 1"
21498 [(set (match_dup 0) (match_dup 4))]
21499 {
21500 operands[4] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
21501 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, operands[4], operands[1]));
21502 })
21503
21504 (define_split
21505 [(match_operand 0 "tls_address_pattern")]
21506 "TARGET_TLS_DIRECT_SEG_REFS"
21507 [(match_dup 0)]
21508 "operands[0] = ix86_rewrite_tls_address (operands[0]);")
21509
21510 \f
21511 ;; These patterns match the binary 387 instructions for addM3, subM3,
21512 ;; mulM3 and divM3. There are three patterns for each of DFmode and
21513 ;; SFmode. The first is the normal insn, the second the same insn but
21514 ;; with one operand a conversion, and the third the same insn but with
21515 ;; the other operand a conversion. The conversion may be SFmode or
21516 ;; SImode if the target mode DFmode, but only SImode if the target mode
21517 ;; is SFmode.
21518
21519 ;; Gcc is slightly more smart about handling normal two address instructions
21520 ;; so use special patterns for add and mull.
21521
21522 (define_insn "*fop_xf_comm_i387"
21523 [(set (match_operand:XF 0 "register_operand" "=f")
21524 (match_operator:XF 3 "binary_fp_operator"
21525 [(match_operand:XF 1 "register_operand" "%0")
21526 (match_operand:XF 2 "register_operand" "f")]))]
21527 "TARGET_80387
21528 && COMMUTATIVE_ARITH_P (operands[3])"
21529 "* return output_387_binary_op (insn, operands);"
21530 [(set (attr "type")
21531 (if_then_else (match_operand:XF 3 "mult_operator")
21532 (const_string "fmul")
21533 (const_string "fop")))
21534 (set_attr "mode" "XF")])
21535
21536 (define_insn "*fop_<mode>_comm"
21537 [(set (match_operand:MODEF 0 "register_operand" "=f,x,v")
21538 (match_operator:MODEF 3 "binary_fp_operator"
21539 [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0,v")
21540 (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm,vm")]))]
21541 "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
21542 || (TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)))
21543 && COMMUTATIVE_ARITH_P (operands[3])
21544 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
21545 "* return output_387_binary_op (insn, operands);"
21546 [(set (attr "type")
21547 (if_then_else (eq_attr "alternative" "1,2")
21548 (if_then_else (match_operand:MODEF 3 "mult_operator")
21549 (const_string "ssemul")
21550 (const_string "sseadd"))
21551 (if_then_else (match_operand:MODEF 3 "mult_operator")
21552 (const_string "fmul")
21553 (const_string "fop"))))
21554 (set_attr "isa" "*,noavx,avx")
21555 (set_attr "prefix" "orig,orig,vex")
21556 (set_attr "mode" "<MODE>")
21557 (set (attr "enabled")
21558 (if_then_else
21559 (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
21560 (if_then_else
21561 (eq_attr "alternative" "0")
21562 (symbol_ref "TARGET_MIX_SSE_I387
21563 && X87_ENABLE_ARITH (<MODE>mode)")
21564 (const_string "*"))
21565 (if_then_else
21566 (eq_attr "alternative" "0")
21567 (symbol_ref "true")
21568 (symbol_ref "false"))))])
21569
21570 (define_insn "*<insn>hf"
21571 [(set (match_operand:HF 0 "register_operand" "=v")
21572 (plusminusmultdiv:HF
21573 (match_operand:HF 1 "nonimmediate_operand" "<comm>v")
21574 (match_operand:HF 2 "nonimmediate_operand" "vm")))]
21575 "TARGET_AVX512FP16
21576 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
21577 "v<insn>sh\t{%2, %1, %0|%0, %1, %2}"
21578 [(set_attr "prefix" "evex")
21579 (set_attr "mode" "HF")])
21580
21581 (define_insn "*rcpsf2_sse"
21582 [(set (match_operand:SF 0 "register_operand" "=x,x,x,x")
21583 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m,ja")]
21584 UNSPEC_RCP))]
21585 "TARGET_SSE && TARGET_SSE_MATH"
21586 "@
21587 %vrcpss\t{%d1, %0|%0, %d1}
21588 %vrcpss\t{%d1, %0|%0, %d1}
21589 rcpss\t{%1, %d0|%d0, %1}
21590 vrcpss\t{%1, %d0|%d0, %1}"
21591 [(set_attr "isa" "*,*,noavx,avx")
21592 (set_attr "addr" "*,*,*,gpr16")
21593 (set_attr "type" "sse")
21594 (set_attr "atom_sse_attr" "rcp")
21595 (set_attr "btver2_sse_attr" "rcp")
21596 (set_attr "prefix" "maybe_vex")
21597 (set_attr "mode" "SF")
21598 (set_attr "avx_partial_xmm_update" "false,false,true,true")
21599 (set (attr "preferred_for_speed")
21600 (cond [(match_test "TARGET_AVX")
21601 (symbol_ref "true")
21602 (eq_attr "alternative" "1,2,3")
21603 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
21604 ]
21605 (symbol_ref "true")))])
21606
21607 (define_insn "rcphf2"
21608 [(set (match_operand:HF 0 "register_operand" "=v,v")
21609 (unspec:HF [(match_operand:HF 1 "nonimmediate_operand" "v,m")]
21610 UNSPEC_RCP))]
21611 "TARGET_AVX512FP16"
21612 "@
21613 vrcpsh\t{%d1, %0|%0, %d1}
21614 vrcpsh\t{%1, %d0|%d0, %1}"
21615 [(set_attr "type" "sse")
21616 (set_attr "prefix" "evex")
21617 (set_attr "mode" "HF")
21618 (set_attr "avx_partial_xmm_update" "false,true")])
21619
21620 (define_insn "*fop_xf_1_i387"
21621 [(set (match_operand:XF 0 "register_operand" "=f,f")
21622 (match_operator:XF 3 "binary_fp_operator"
21623 [(match_operand:XF 1 "register_operand" "0,f")
21624 (match_operand:XF 2 "register_operand" "f,0")]))]
21625 "TARGET_80387
21626 && !COMMUTATIVE_ARITH_P (operands[3])"
21627 "* return output_387_binary_op (insn, operands);"
21628 [(set (attr "type")
21629 (if_then_else (match_operand:XF 3 "div_operator")
21630 (const_string "fdiv")
21631 (const_string "fop")))
21632 (set_attr "mode" "XF")])
21633
21634 (define_insn "*fop_<mode>_1"
21635 [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,v")
21636 (match_operator:MODEF 3 "binary_fp_operator"
21637 [(match_operand:MODEF 1
21638 "x87nonimm_ssenomem_operand" "0,fm,0,v")
21639 (match_operand:MODEF 2
21640 "nonimmediate_operand" "fm,0,xm,vm")]))]
21641 "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
21642 || (TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)))
21643 && !COMMUTATIVE_ARITH_P (operands[3])
21644 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
21645 "* return output_387_binary_op (insn, operands);"
21646 [(set (attr "type")
21647 (if_then_else (eq_attr "alternative" "2,3")
21648 (if_then_else (match_operand:MODEF 3 "div_operator")
21649 (const_string "ssediv")
21650 (const_string "sseadd"))
21651 (if_then_else (match_operand:MODEF 3 "div_operator")
21652 (const_string "fdiv")
21653 (const_string "fop"))))
21654 (set_attr "isa" "*,*,noavx,avx")
21655 (set_attr "prefix" "orig,orig,orig,vex")
21656 (set_attr "mode" "<MODE>")
21657 (set (attr "enabled")
21658 (if_then_else
21659 (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
21660 (if_then_else
21661 (eq_attr "alternative" "0,1")
21662 (symbol_ref "TARGET_MIX_SSE_I387
21663 && X87_ENABLE_ARITH (<MODE>mode)")
21664 (const_string "*"))
21665 (if_then_else
21666 (eq_attr "alternative" "0,1")
21667 (symbol_ref "true")
21668 (symbol_ref "false"))))])
21669
21670 (define_insn "*fop_<X87MODEF:mode>_2_i387"
21671 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
21672 (match_operator:X87MODEF 3 "binary_fp_operator"
21673 [(float:X87MODEF
21674 (match_operand:SWI24 1 "nonimmediate_operand" "m"))
21675 (match_operand:X87MODEF 2 "register_operand" "0")]))]
21676 "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode)
21677 && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
21678 && (TARGET_USE_<SWI24:MODE>MODE_FIOP
21679 || optimize_function_for_size_p (cfun))"
21680 "* return output_387_binary_op (insn, operands);"
21681 [(set (attr "type")
21682 (cond [(match_operand:X87MODEF 3 "mult_operator")
21683 (const_string "fmul")
21684 (match_operand:X87MODEF 3 "div_operator")
21685 (const_string "fdiv")
21686 ]
21687 (const_string "fop")))
21688 (set_attr "fp_int_src" "true")
21689 (set_attr "mode" "<SWI24:MODE>")])
21690
21691 (define_insn "*fop_<X87MODEF:mode>_3_i387"
21692 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
21693 (match_operator:X87MODEF 3 "binary_fp_operator"
21694 [(match_operand:X87MODEF 1 "register_operand" "0")
21695 (float:X87MODEF
21696 (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))]
21697 "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode)
21698 && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
21699 && (TARGET_USE_<SWI24:MODE>MODE_FIOP
21700 || optimize_function_for_size_p (cfun))"
21701 "* return output_387_binary_op (insn, operands);"
21702 [(set (attr "type")
21703 (cond [(match_operand:X87MODEF 3 "mult_operator")
21704 (const_string "fmul")
21705 (match_operand:X87MODEF 3 "div_operator")
21706 (const_string "fdiv")
21707 ]
21708 (const_string "fop")))
21709 (set_attr "fp_int_src" "true")
21710 (set_attr "mode" "<SWI24:MODE>")])
21711
21712 (define_insn "*fop_xf_4_i387"
21713 [(set (match_operand:XF 0 "register_operand" "=f,f")
21714 (match_operator:XF 3 "binary_fp_operator"
21715 [(float_extend:XF
21716 (match_operand:MODEF 1 "nonimmediate_operand" "fm,0"))
21717 (match_operand:XF 2 "register_operand" "0,f")]))]
21718 "TARGET_80387"
21719 "* return output_387_binary_op (insn, operands);"
21720 [(set (attr "type")
21721 (cond [(match_operand:XF 3 "mult_operator")
21722 (const_string "fmul")
21723 (match_operand:XF 3 "div_operator")
21724 (const_string "fdiv")
21725 ]
21726 (const_string "fop")))
21727 (set_attr "mode" "<MODE>")])
21728
21729 (define_insn "*fop_df_4_i387"
21730 [(set (match_operand:DF 0 "register_operand" "=f,f")
21731 (match_operator:DF 3 "binary_fp_operator"
21732 [(float_extend:DF
21733 (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
21734 (match_operand:DF 2 "register_operand" "0,f")]))]
21735 "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
21736 && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
21737 "* return output_387_binary_op (insn, operands);"
21738 [(set (attr "type")
21739 (cond [(match_operand:DF 3 "mult_operator")
21740 (const_string "fmul")
21741 (match_operand:DF 3 "div_operator")
21742 (const_string "fdiv")
21743 ]
21744 (const_string "fop")))
21745 (set_attr "mode" "SF")])
21746
21747 (define_insn "*fop_xf_5_i387"
21748 [(set (match_operand:XF 0 "register_operand" "=f,f")
21749 (match_operator:XF 3 "binary_fp_operator"
21750 [(match_operand:XF 1 "register_operand" "0,f")
21751 (float_extend:XF
21752 (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
21753 "TARGET_80387"
21754 "* return output_387_binary_op (insn, operands);"
21755 [(set (attr "type")
21756 (cond [(match_operand:XF 3 "mult_operator")
21757 (const_string "fmul")
21758 (match_operand:XF 3 "div_operator")
21759 (const_string "fdiv")
21760 ]
21761 (const_string "fop")))
21762 (set_attr "mode" "<MODE>")])
21763
21764 (define_insn "*fop_df_5_i387"
21765 [(set (match_operand:DF 0 "register_operand" "=f,f")
21766 (match_operator:DF 3 "binary_fp_operator"
21767 [(match_operand:DF 1 "register_operand" "0,f")
21768 (float_extend:DF
21769 (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
21770 "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
21771 && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
21772 "* return output_387_binary_op (insn, operands);"
21773 [(set (attr "type")
21774 (cond [(match_operand:DF 3 "mult_operator")
21775 (const_string "fmul")
21776 (match_operand:DF 3 "div_operator")
21777 (const_string "fdiv")
21778 ]
21779 (const_string "fop")))
21780 (set_attr "mode" "SF")])
21781
21782 (define_insn "*fop_xf_6_i387"
21783 [(set (match_operand:XF 0 "register_operand" "=f,f")
21784 (match_operator:XF 3 "binary_fp_operator"
21785 [(float_extend:XF
21786 (match_operand:MODEF 1 "register_operand" "0,f"))
21787 (float_extend:XF
21788 (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
21789 "TARGET_80387"
21790 "* return output_387_binary_op (insn, operands);"
21791 [(set (attr "type")
21792 (cond [(match_operand:XF 3 "mult_operator")
21793 (const_string "fmul")
21794 (match_operand:XF 3 "div_operator")
21795 (const_string "fdiv")
21796 ]
21797 (const_string "fop")))
21798 (set_attr "mode" "<MODE>")])
21799
21800 (define_insn "*fop_df_6_i387"
21801 [(set (match_operand:DF 0 "register_operand" "=f,f")
21802 (match_operator:DF 3 "binary_fp_operator"
21803 [(float_extend:DF
21804 (match_operand:SF 1 "register_operand" "0,f"))
21805 (float_extend:DF
21806 (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
21807 "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
21808 && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
21809 "* return output_387_binary_op (insn, operands);"
21810 [(set (attr "type")
21811 (cond [(match_operand:DF 3 "mult_operator")
21812 (const_string "fmul")
21813 (match_operand:DF 3 "div_operator")
21814 (const_string "fdiv")
21815 ]
21816 (const_string "fop")))
21817 (set_attr "mode" "SF")])
21818 \f
21819 ;; FPU special functions.
21820
21821 ;; This pattern implements a no-op XFmode truncation for
21822 ;; all fancy i386 XFmode math functions.
21823
21824 (define_insn "truncxf<mode>2_i387_noop_unspec"
21825 [(set (match_operand:MODEF 0 "nonimmediate_operand" "=mf")
21826 (unspec:MODEF [(match_operand:XF 1 "register_operand" "f")]
21827 UNSPEC_TRUNC_NOOP))]
21828 "TARGET_USE_FANCY_MATH_387"
21829 "* return output_387_reg_move (insn, operands);"
21830 [(set_attr "type" "fmov")
21831 (set_attr "mode" "<MODE>")])
21832
21833 (define_insn "sqrtxf2"
21834 [(set (match_operand:XF 0 "register_operand" "=f")
21835 (sqrt:XF (match_operand:XF 1 "register_operand" "0")))]
21836 "TARGET_USE_FANCY_MATH_387"
21837 "fsqrt"
21838 [(set_attr "type" "fpspc")
21839 (set_attr "mode" "XF")
21840 (set_attr "athlon_decode" "direct")
21841 (set_attr "amdfam10_decode" "direct")
21842 (set_attr "bdver1_decode" "direct")])
21843
21844 (define_insn "*rsqrtsf2_sse"
21845 [(set (match_operand:SF 0 "register_operand" "=x,x,x,x")
21846 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m,ja")]
21847 UNSPEC_RSQRT))]
21848 "TARGET_SSE && TARGET_SSE_MATH"
21849 "@
21850 %vrsqrtss\t{%d1, %0|%0, %d1}
21851 %vrsqrtss\t{%d1, %0|%0, %d1}
21852 rsqrtss\t{%1, %d0|%d0, %1}
21853 vrsqrtss\t{%1, %d0|%d0, %1}"
21854 [(set_attr "isa" "*,*,noavx,avx")
21855 (set_attr "addr" "*,*,*,gpr16")
21856 (set_attr "type" "sse")
21857 (set_attr "atom_sse_attr" "rcp")
21858 (set_attr "btver2_sse_attr" "rcp")
21859 (set_attr "prefix" "maybe_vex")
21860 (set_attr "mode" "SF")
21861 (set_attr "avx_partial_xmm_update" "false,false,true,true")
21862 (set (attr "preferred_for_speed")
21863 (cond [(match_test "TARGET_AVX")
21864 (symbol_ref "true")
21865 (eq_attr "alternative" "1,2,3")
21866 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
21867 ]
21868 (symbol_ref "true")))])
21869
21870 (define_expand "rsqrtsf2"
21871 [(set (match_operand:SF 0 "register_operand")
21872 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand")]
21873 UNSPEC_RSQRT))]
21874 "TARGET_SSE && TARGET_SSE_MATH"
21875 {
21876 ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1);
21877 DONE;
21878 })
21879
21880 (define_insn "rsqrthf2"
21881 [(set (match_operand:HF 0 "register_operand" "=v,v")
21882 (unspec:HF [(match_operand:HF 1 "nonimmediate_operand" "v,m")]
21883 UNSPEC_RSQRT))]
21884 "TARGET_AVX512FP16"
21885 "@
21886 vrsqrtsh\t{%d1, %0|%0, %d1}
21887 vrsqrtsh\t{%1, %d0|%d0, %1}"
21888 [(set_attr "type" "sse")
21889 (set_attr "prefix" "evex")
21890 (set_attr "avx_partial_xmm_update" "false,true")
21891 (set_attr "mode" "HF")])
21892
21893 (define_insn "sqrthf2"
21894 [(set (match_operand:HF 0 "register_operand" "=v,v")
21895 (sqrt:HF
21896 (match_operand:HF 1 "nonimmediate_operand" "v,m")))]
21897 "TARGET_AVX512FP16"
21898 "@
21899 vsqrtsh\t{%d1, %0|%0, %d1}
21900 vsqrtsh\t{%1, %d0|%d0, %1}"
21901 [(set_attr "type" "sse")
21902 (set_attr "prefix" "evex")
21903 (set_attr "avx_partial_xmm_update" "false,true")
21904 (set_attr "mode" "HF")])
21905
21906 (define_insn "*sqrt<mode>2_sse"
21907 [(set (match_operand:MODEF 0 "register_operand" "=v,v,v")
21908 (sqrt:MODEF
21909 (match_operand:MODEF 1 "nonimmediate_operand" "0,v,m")))]
21910 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
21911 "@
21912 %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
21913 %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
21914 %vsqrt<ssemodesuffix>\t{%1, %d0|%d0, %1}"
21915 [(set_attr "type" "sse")
21916 (set_attr "atom_sse_attr" "sqrt")
21917 (set_attr "btver2_sse_attr" "sqrt")
21918 (set_attr "prefix" "maybe_vex")
21919 (set_attr "avx_partial_xmm_update" "false,false,true")
21920 (set_attr "mode" "<MODE>")
21921 (set (attr "preferred_for_speed")
21922 (cond [(match_test "TARGET_AVX")
21923 (symbol_ref "true")
21924 (eq_attr "alternative" "1,2")
21925 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
21926 ]
21927 (symbol_ref "true")))])
21928
21929 (define_expand "sqrt<mode>2"
21930 [(set (match_operand:MODEF 0 "register_operand")
21931 (sqrt:MODEF
21932 (match_operand:MODEF 1 "nonimmediate_operand")))]
21933 "(TARGET_USE_FANCY_MATH_387 && X87_ENABLE_ARITH (<MODE>mode))
21934 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
21935 {
21936 if (<MODE>mode == SFmode
21937 && TARGET_SSE && TARGET_SSE_MATH
21938 && TARGET_RECIP_SQRT
21939 && !optimize_function_for_size_p (cfun)
21940 && flag_finite_math_only && !flag_trapping_math
21941 && flag_unsafe_math_optimizations)
21942 {
21943 ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0);
21944 DONE;
21945 }
21946
21947 if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
21948 {
21949 rtx op0 = gen_reg_rtx (XFmode);
21950 rtx op1 = gen_reg_rtx (XFmode);
21951
21952 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
21953 emit_insn (gen_sqrtxf2 (op0, op1));
21954 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
21955 DONE;
21956 }
21957 })
21958
21959 (define_expand "hypot<mode>3"
21960 [(use (match_operand:MODEF 0 "register_operand"))
21961 (use (match_operand:MODEF 1 "general_operand"))
21962 (use (match_operand:MODEF 2 "general_operand"))]
21963 "TARGET_USE_FANCY_MATH_387
21964 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
21965 || TARGET_MIX_SSE_I387)
21966 && flag_finite_math_only
21967 && flag_unsafe_math_optimizations"
21968 {
21969 rtx op0 = gen_reg_rtx (XFmode);
21970 rtx op1 = gen_reg_rtx (XFmode);
21971 rtx op2 = gen_reg_rtx (XFmode);
21972
21973 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
21974 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
21975
21976 emit_insn (gen_mulxf3 (op1, op1, op1));
21977 emit_insn (gen_mulxf3 (op2, op2, op2));
21978 emit_insn (gen_addxf3 (op0, op2, op1));
21979 emit_insn (gen_sqrtxf2 (op0, op0));
21980
21981 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
21982 DONE;
21983 })
21984
21985 (define_insn "x86_fnstsw_1"
21986 [(set (match_operand:HI 0 "register_operand" "=a")
21987 (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
21988 "TARGET_80387"
21989 "fnstsw\t%0"
21990 [(set_attr "length" "2")
21991 (set_attr "mode" "SI")
21992 (set_attr "unit" "i387")])
21993
21994 (define_insn "fpremxf4_i387"
21995 [(set (match_operand:XF 0 "register_operand" "=f")
21996 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
21997 (match_operand:XF 3 "register_operand" "1")]
21998 UNSPEC_FPREM_F))
21999 (set (match_operand:XF 1 "register_operand" "=f")
22000 (unspec:XF [(match_dup 2) (match_dup 3)]
22001 UNSPEC_FPREM_U))
22002 (set (reg:CCFP FPSR_REG)
22003 (unspec:CCFP [(match_dup 2) (match_dup 3)]
22004 UNSPEC_C2_FLAG))]
22005 "TARGET_USE_FANCY_MATH_387"
22006 "fprem"
22007 [(set_attr "type" "fpspc")
22008 (set_attr "znver1_decode" "vector")
22009 (set_attr "mode" "XF")])
22010
22011 (define_expand "fmodxf3"
22012 [(use (match_operand:XF 0 "register_operand"))
22013 (use (match_operand:XF 1 "general_operand"))
22014 (use (match_operand:XF 2 "general_operand"))]
22015 "TARGET_USE_FANCY_MATH_387"
22016 {
22017 rtx_code_label *label = gen_label_rtx ();
22018
22019 rtx op1 = gen_reg_rtx (XFmode);
22020 rtx op2 = gen_reg_rtx (XFmode);
22021
22022 emit_move_insn (op2, operands[2]);
22023 emit_move_insn (op1, operands[1]);
22024
22025 emit_label (label);
22026 emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
22027 ix86_emit_fp_unordered_jump (label);
22028 LABEL_NUSES (label) = 1;
22029
22030 emit_move_insn (operands[0], op1);
22031 DONE;
22032 })
22033
22034 (define_expand "fmod<mode>3"
22035 [(use (match_operand:MODEF 0 "register_operand"))
22036 (use (match_operand:MODEF 1 "general_operand"))
22037 (use (match_operand:MODEF 2 "general_operand"))]
22038 "TARGET_USE_FANCY_MATH_387"
22039 {
22040 rtx (*gen_truncxf) (rtx, rtx);
22041
22042 rtx_code_label *label = gen_label_rtx ();
22043
22044 rtx op1 = gen_reg_rtx (XFmode);
22045 rtx op2 = gen_reg_rtx (XFmode);
22046
22047 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
22048 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22049
22050 emit_label (label);
22051 emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
22052 ix86_emit_fp_unordered_jump (label);
22053 LABEL_NUSES (label) = 1;
22054
22055 /* Truncate the result properly for strict SSE math. */
22056 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
22057 && !TARGET_MIX_SSE_I387)
22058 gen_truncxf = gen_truncxf<mode>2;
22059 else
22060 gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
22061
22062 emit_insn (gen_truncxf (operands[0], op1));
22063 DONE;
22064 })
22065
22066 (define_insn "fprem1xf4_i387"
22067 [(set (match_operand:XF 0 "register_operand" "=f")
22068 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
22069 (match_operand:XF 3 "register_operand" "1")]
22070 UNSPEC_FPREM1_F))
22071 (set (match_operand:XF 1 "register_operand" "=f")
22072 (unspec:XF [(match_dup 2) (match_dup 3)]
22073 UNSPEC_FPREM1_U))
22074 (set (reg:CCFP FPSR_REG)
22075 (unspec:CCFP [(match_dup 2) (match_dup 3)]
22076 UNSPEC_C2_FLAG))]
22077 "TARGET_USE_FANCY_MATH_387"
22078 "fprem1"
22079 [(set_attr "type" "fpspc")
22080 (set_attr "znver1_decode" "vector")
22081 (set_attr "mode" "XF")])
22082
22083 (define_expand "remainderxf3"
22084 [(use (match_operand:XF 0 "register_operand"))
22085 (use (match_operand:XF 1 "general_operand"))
22086 (use (match_operand:XF 2 "general_operand"))]
22087 "TARGET_USE_FANCY_MATH_387"
22088 {
22089 rtx_code_label *label = gen_label_rtx ();
22090
22091 rtx op1 = gen_reg_rtx (XFmode);
22092 rtx op2 = gen_reg_rtx (XFmode);
22093
22094 emit_move_insn (op2, operands[2]);
22095 emit_move_insn (op1, operands[1]);
22096
22097 emit_label (label);
22098 emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
22099 ix86_emit_fp_unordered_jump (label);
22100 LABEL_NUSES (label) = 1;
22101
22102 emit_move_insn (operands[0], op1);
22103 DONE;
22104 })
22105
22106 (define_expand "remainder<mode>3"
22107 [(use (match_operand:MODEF 0 "register_operand"))
22108 (use (match_operand:MODEF 1 "general_operand"))
22109 (use (match_operand:MODEF 2 "general_operand"))]
22110 "TARGET_USE_FANCY_MATH_387"
22111 {
22112 rtx (*gen_truncxf) (rtx, rtx);
22113
22114 rtx_code_label *label = gen_label_rtx ();
22115
22116 rtx op1 = gen_reg_rtx (XFmode);
22117 rtx op2 = gen_reg_rtx (XFmode);
22118
22119 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
22120 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22121
22122 emit_label (label);
22123
22124 emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
22125 ix86_emit_fp_unordered_jump (label);
22126 LABEL_NUSES (label) = 1;
22127
22128 /* Truncate the result properly for strict SSE math. */
22129 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
22130 && !TARGET_MIX_SSE_I387)
22131 gen_truncxf = gen_truncxf<mode>2;
22132 else
22133 gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
22134
22135 emit_insn (gen_truncxf (operands[0], op1));
22136 DONE;
22137 })
22138
22139 (define_int_iterator SINCOS
22140 [UNSPEC_SIN
22141 UNSPEC_COS])
22142
22143 (define_int_attr sincos
22144 [(UNSPEC_SIN "sin")
22145 (UNSPEC_COS "cos")])
22146
22147 (define_insn "<sincos>xf2"
22148 [(set (match_operand:XF 0 "register_operand" "=f")
22149 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
22150 SINCOS))]
22151 "TARGET_USE_FANCY_MATH_387
22152 && flag_unsafe_math_optimizations"
22153 "f<sincos>"
22154 [(set_attr "type" "fpspc")
22155 (set_attr "znver1_decode" "vector")
22156 (set_attr "mode" "XF")])
22157
22158 (define_expand "<sincos><mode>2"
22159 [(set (match_operand:MODEF 0 "register_operand")
22160 (unspec:MODEF [(match_operand:MODEF 1 "general_operand")]
22161 SINCOS))]
22162 "TARGET_USE_FANCY_MATH_387
22163 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22164 || TARGET_MIX_SSE_I387)
22165 && flag_unsafe_math_optimizations"
22166 {
22167 rtx op0 = gen_reg_rtx (XFmode);
22168 rtx op1 = gen_reg_rtx (XFmode);
22169
22170 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22171 emit_insn (gen_<sincos>xf2 (op0, op1));
22172 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22173 DONE;
22174 })
22175
22176 (define_insn "sincosxf3"
22177 [(set (match_operand:XF 0 "register_operand" "=f")
22178 (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
22179 UNSPEC_SINCOS_COS))
22180 (set (match_operand:XF 1 "register_operand" "=f")
22181 (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
22182 "TARGET_USE_FANCY_MATH_387
22183 && flag_unsafe_math_optimizations"
22184 "fsincos"
22185 [(set_attr "type" "fpspc")
22186 (set_attr "znver1_decode" "vector")
22187 (set_attr "mode" "XF")])
22188
22189 (define_expand "sincos<mode>3"
22190 [(use (match_operand:MODEF 0 "register_operand"))
22191 (use (match_operand:MODEF 1 "register_operand"))
22192 (use (match_operand:MODEF 2 "general_operand"))]
22193 "TARGET_USE_FANCY_MATH_387
22194 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22195 || TARGET_MIX_SSE_I387)
22196 && flag_unsafe_math_optimizations"
22197 {
22198 rtx op0 = gen_reg_rtx (XFmode);
22199 rtx op1 = gen_reg_rtx (XFmode);
22200 rtx op2 = gen_reg_rtx (XFmode);
22201
22202 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
22203 emit_insn (gen_sincosxf3 (op0, op1, op2));
22204 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22205 emit_insn (gen_truncxf<mode>2 (operands[1], op1));
22206 DONE;
22207 })
22208
22209 (define_insn "fptanxf4_i387"
22210 [(set (match_operand:SF 0 "register_operand" "=f")
22211 (match_operand:SF 3 "const1_operand"))
22212 (set (match_operand:XF 1 "register_operand" "=f")
22213 (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
22214 UNSPEC_TAN))]
22215 "TARGET_USE_FANCY_MATH_387
22216 && flag_unsafe_math_optimizations"
22217 "fptan"
22218 [(set_attr "type" "fpspc")
22219 (set_attr "znver1_decode" "vector")
22220 (set_attr "mode" "XF")])
22221
22222 (define_expand "tanxf2"
22223 [(use (match_operand:XF 0 "register_operand"))
22224 (use (match_operand:XF 1 "register_operand"))]
22225 "TARGET_USE_FANCY_MATH_387
22226 && flag_unsafe_math_optimizations"
22227 {
22228 rtx one = gen_reg_rtx (SFmode);
22229 emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1],
22230 CONST1_RTX (SFmode)));
22231 DONE;
22232 })
22233
22234 (define_expand "tan<mode>2"
22235 [(use (match_operand:MODEF 0 "register_operand"))
22236 (use (match_operand:MODEF 1 "general_operand"))]
22237 "TARGET_USE_FANCY_MATH_387
22238 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22239 || TARGET_MIX_SSE_I387)
22240 && flag_unsafe_math_optimizations"
22241 {
22242 rtx op0 = gen_reg_rtx (XFmode);
22243 rtx op1 = gen_reg_rtx (XFmode);
22244
22245 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22246 emit_insn (gen_tanxf2 (op0, op1));
22247 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22248 DONE;
22249 })
22250
22251 (define_insn "atan2xf3"
22252 [(set (match_operand:XF 0 "register_operand" "=f")
22253 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
22254 (match_operand:XF 1 "register_operand" "f")]
22255 UNSPEC_FPATAN))
22256 (clobber (match_scratch:XF 3 "=1"))]
22257 "TARGET_USE_FANCY_MATH_387
22258 && flag_unsafe_math_optimizations"
22259 "fpatan"
22260 [(set_attr "type" "fpspc")
22261 (set_attr "znver1_decode" "vector")
22262 (set_attr "mode" "XF")])
22263
22264 (define_expand "atan2<mode>3"
22265 [(use (match_operand:MODEF 0 "register_operand"))
22266 (use (match_operand:MODEF 1 "general_operand"))
22267 (use (match_operand:MODEF 2 "general_operand"))]
22268 "TARGET_USE_FANCY_MATH_387
22269 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22270 || TARGET_MIX_SSE_I387)
22271 && flag_unsafe_math_optimizations"
22272 {
22273 rtx op0 = gen_reg_rtx (XFmode);
22274 rtx op1 = gen_reg_rtx (XFmode);
22275 rtx op2 = gen_reg_rtx (XFmode);
22276
22277 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
22278 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22279
22280 emit_insn (gen_atan2xf3 (op0, op1, op2));
22281 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22282 DONE;
22283 })
22284
22285 (define_expand "atanxf2"
22286 [(parallel [(set (match_operand:XF 0 "register_operand")
22287 (unspec:XF [(match_dup 2)
22288 (match_operand:XF 1 "register_operand")]
22289 UNSPEC_FPATAN))
22290 (clobber (scratch:XF))])]
22291 "TARGET_USE_FANCY_MATH_387
22292 && flag_unsafe_math_optimizations"
22293 "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));")
22294
22295 (define_expand "atan<mode>2"
22296 [(use (match_operand:MODEF 0 "register_operand"))
22297 (use (match_operand:MODEF 1 "general_operand"))]
22298 "TARGET_USE_FANCY_MATH_387
22299 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22300 || TARGET_MIX_SSE_I387)
22301 && flag_unsafe_math_optimizations"
22302 {
22303 rtx op0 = gen_reg_rtx (XFmode);
22304 rtx op1 = gen_reg_rtx (XFmode);
22305
22306 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22307 emit_insn (gen_atanxf2 (op0, op1));
22308 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22309 DONE;
22310 })
22311
22312 (define_expand "asinxf2"
22313 [(set (match_dup 2)
22314 (mult:XF (match_operand:XF 1 "register_operand")
22315 (match_dup 1)))
22316 (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
22317 (set (match_dup 5) (sqrt:XF (match_dup 4)))
22318 (parallel [(set (match_operand:XF 0 "register_operand")
22319 (unspec:XF [(match_dup 5) (match_dup 1)]
22320 UNSPEC_FPATAN))
22321 (clobber (scratch:XF))])]
22322 "TARGET_USE_FANCY_MATH_387
22323 && flag_unsafe_math_optimizations"
22324 {
22325 int i;
22326
22327 for (i = 2; i < 6; i++)
22328 operands[i] = gen_reg_rtx (XFmode);
22329
22330 emit_move_insn (operands[3], CONST1_RTX (XFmode));
22331 })
22332
22333 (define_expand "asin<mode>2"
22334 [(use (match_operand:MODEF 0 "register_operand"))
22335 (use (match_operand:MODEF 1 "general_operand"))]
22336 "TARGET_USE_FANCY_MATH_387
22337 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22338 || TARGET_MIX_SSE_I387)
22339 && flag_unsafe_math_optimizations"
22340 {
22341 rtx op0 = gen_reg_rtx (XFmode);
22342 rtx op1 = gen_reg_rtx (XFmode);
22343
22344 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22345 emit_insn (gen_asinxf2 (op0, op1));
22346 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22347 DONE;
22348 })
22349
22350 (define_expand "acosxf2"
22351 [(set (match_dup 2)
22352 (mult:XF (match_operand:XF 1 "register_operand")
22353 (match_dup 1)))
22354 (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
22355 (set (match_dup 5) (sqrt:XF (match_dup 4)))
22356 (parallel [(set (match_operand:XF 0 "register_operand")
22357 (unspec:XF [(match_dup 1) (match_dup 5)]
22358 UNSPEC_FPATAN))
22359 (clobber (scratch:XF))])]
22360 "TARGET_USE_FANCY_MATH_387
22361 && flag_unsafe_math_optimizations"
22362 {
22363 int i;
22364
22365 for (i = 2; i < 6; i++)
22366 operands[i] = gen_reg_rtx (XFmode);
22367
22368 emit_move_insn (operands[3], CONST1_RTX (XFmode));
22369 })
22370
22371 (define_expand "acos<mode>2"
22372 [(use (match_operand:MODEF 0 "register_operand"))
22373 (use (match_operand:MODEF 1 "general_operand"))]
22374 "TARGET_USE_FANCY_MATH_387
22375 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22376 || TARGET_MIX_SSE_I387)
22377 && flag_unsafe_math_optimizations"
22378 {
22379 rtx op0 = gen_reg_rtx (XFmode);
22380 rtx op1 = gen_reg_rtx (XFmode);
22381
22382 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22383 emit_insn (gen_acosxf2 (op0, op1));
22384 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22385 DONE;
22386 })
22387
22388 (define_expand "sinhxf2"
22389 [(use (match_operand:XF 0 "register_operand"))
22390 (use (match_operand:XF 1 "register_operand"))]
22391 "TARGET_USE_FANCY_MATH_387
22392 && flag_finite_math_only
22393 && flag_unsafe_math_optimizations"
22394 {
22395 ix86_emit_i387_sinh (operands[0], operands[1]);
22396 DONE;
22397 })
22398
22399 (define_expand "sinh<mode>2"
22400 [(use (match_operand:MODEF 0 "register_operand"))
22401 (use (match_operand:MODEF 1 "general_operand"))]
22402 "TARGET_USE_FANCY_MATH_387
22403 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22404 || TARGET_MIX_SSE_I387)
22405 && flag_finite_math_only
22406 && flag_unsafe_math_optimizations"
22407 {
22408 rtx op0 = gen_reg_rtx (XFmode);
22409 rtx op1 = gen_reg_rtx (XFmode);
22410
22411 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22412 emit_insn (gen_sinhxf2 (op0, op1));
22413 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22414 DONE;
22415 })
22416
22417 (define_expand "coshxf2"
22418 [(use (match_operand:XF 0 "register_operand"))
22419 (use (match_operand:XF 1 "register_operand"))]
22420 "TARGET_USE_FANCY_MATH_387
22421 && flag_unsafe_math_optimizations"
22422 {
22423 ix86_emit_i387_cosh (operands[0], operands[1]);
22424 DONE;
22425 })
22426
22427 (define_expand "cosh<mode>2"
22428 [(use (match_operand:MODEF 0 "register_operand"))
22429 (use (match_operand:MODEF 1 "general_operand"))]
22430 "TARGET_USE_FANCY_MATH_387
22431 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22432 || TARGET_MIX_SSE_I387)
22433 && flag_unsafe_math_optimizations"
22434 {
22435 rtx op0 = gen_reg_rtx (XFmode);
22436 rtx op1 = gen_reg_rtx (XFmode);
22437
22438 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22439 emit_insn (gen_coshxf2 (op0, op1));
22440 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22441 DONE;
22442 })
22443
22444 (define_expand "tanhxf2"
22445 [(use (match_operand:XF 0 "register_operand"))
22446 (use (match_operand:XF 1 "register_operand"))]
22447 "TARGET_USE_FANCY_MATH_387
22448 && flag_unsafe_math_optimizations"
22449 {
22450 ix86_emit_i387_tanh (operands[0], operands[1]);
22451 DONE;
22452 })
22453
22454 (define_expand "tanh<mode>2"
22455 [(use (match_operand:MODEF 0 "register_operand"))
22456 (use (match_operand:MODEF 1 "general_operand"))]
22457 "TARGET_USE_FANCY_MATH_387
22458 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22459 || TARGET_MIX_SSE_I387)
22460 && flag_unsafe_math_optimizations"
22461 {
22462 rtx op0 = gen_reg_rtx (XFmode);
22463 rtx op1 = gen_reg_rtx (XFmode);
22464
22465 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22466 emit_insn (gen_tanhxf2 (op0, op1));
22467 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22468 DONE;
22469 })
22470
22471 (define_expand "asinhxf2"
22472 [(use (match_operand:XF 0 "register_operand"))
22473 (use (match_operand:XF 1 "register_operand"))]
22474 "TARGET_USE_FANCY_MATH_387
22475 && flag_finite_math_only
22476 && flag_unsafe_math_optimizations"
22477 {
22478 ix86_emit_i387_asinh (operands[0], operands[1]);
22479 DONE;
22480 })
22481
22482 (define_expand "asinh<mode>2"
22483 [(use (match_operand:MODEF 0 "register_operand"))
22484 (use (match_operand:MODEF 1 "general_operand"))]
22485 "TARGET_USE_FANCY_MATH_387
22486 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22487 || TARGET_MIX_SSE_I387)
22488 && flag_finite_math_only
22489 && flag_unsafe_math_optimizations"
22490 {
22491 rtx op0 = gen_reg_rtx (XFmode);
22492 rtx op1 = gen_reg_rtx (XFmode);
22493
22494 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22495 emit_insn (gen_asinhxf2 (op0, op1));
22496 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22497 DONE;
22498 })
22499
22500 (define_expand "acoshxf2"
22501 [(use (match_operand:XF 0 "register_operand"))
22502 (use (match_operand:XF 1 "register_operand"))]
22503 "TARGET_USE_FANCY_MATH_387
22504 && flag_unsafe_math_optimizations"
22505 {
22506 ix86_emit_i387_acosh (operands[0], operands[1]);
22507 DONE;
22508 })
22509
22510 (define_expand "acosh<mode>2"
22511 [(use (match_operand:MODEF 0 "register_operand"))
22512 (use (match_operand:MODEF 1 "general_operand"))]
22513 "TARGET_USE_FANCY_MATH_387
22514 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22515 || TARGET_MIX_SSE_I387)
22516 && flag_unsafe_math_optimizations"
22517 {
22518 rtx op0 = gen_reg_rtx (XFmode);
22519 rtx op1 = gen_reg_rtx (XFmode);
22520
22521 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22522 emit_insn (gen_acoshxf2 (op0, op1));
22523 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22524 DONE;
22525 })
22526
22527 (define_expand "atanhxf2"
22528 [(use (match_operand:XF 0 "register_operand"))
22529 (use (match_operand:XF 1 "register_operand"))]
22530 "TARGET_USE_FANCY_MATH_387
22531 && flag_unsafe_math_optimizations"
22532 {
22533 ix86_emit_i387_atanh (operands[0], operands[1]);
22534 DONE;
22535 })
22536
22537 (define_expand "atanh<mode>2"
22538 [(use (match_operand:MODEF 0 "register_operand"))
22539 (use (match_operand:MODEF 1 "general_operand"))]
22540 "TARGET_USE_FANCY_MATH_387
22541 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22542 || TARGET_MIX_SSE_I387)
22543 && flag_unsafe_math_optimizations"
22544 {
22545 rtx op0 = gen_reg_rtx (XFmode);
22546 rtx op1 = gen_reg_rtx (XFmode);
22547
22548 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22549 emit_insn (gen_atanhxf2 (op0, op1));
22550 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22551 DONE;
22552 })
22553
22554 (define_insn "fyl2xxf3_i387"
22555 [(set (match_operand:XF 0 "register_operand" "=f")
22556 (unspec:XF [(match_operand:XF 1 "register_operand" "0")
22557 (match_operand:XF 2 "register_operand" "f")]
22558 UNSPEC_FYL2X))
22559 (clobber (match_scratch:XF 3 "=2"))]
22560 "TARGET_USE_FANCY_MATH_387
22561 && flag_unsafe_math_optimizations"
22562 "fyl2x"
22563 [(set_attr "type" "fpspc")
22564 (set_attr "znver1_decode" "vector")
22565 (set_attr "mode" "XF")])
22566
22567 (define_expand "logxf2"
22568 [(parallel [(set (match_operand:XF 0 "register_operand")
22569 (unspec:XF [(match_operand:XF 1 "register_operand")
22570 (match_dup 2)] UNSPEC_FYL2X))
22571 (clobber (scratch:XF))])]
22572 "TARGET_USE_FANCY_MATH_387
22573 && flag_unsafe_math_optimizations"
22574 {
22575 operands[2]
22576 = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */
22577 })
22578
22579 (define_expand "log<mode>2"
22580 [(use (match_operand:MODEF 0 "register_operand"))
22581 (use (match_operand:MODEF 1 "general_operand"))]
22582 "TARGET_USE_FANCY_MATH_387
22583 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22584 || TARGET_MIX_SSE_I387)
22585 && flag_unsafe_math_optimizations"
22586 {
22587 rtx op0 = gen_reg_rtx (XFmode);
22588 rtx op1 = gen_reg_rtx (XFmode);
22589
22590 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22591 emit_insn (gen_logxf2 (op0, op1));
22592 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22593 DONE;
22594 })
22595
22596 (define_expand "log10xf2"
22597 [(parallel [(set (match_operand:XF 0 "register_operand")
22598 (unspec:XF [(match_operand:XF 1 "register_operand")
22599 (match_dup 2)] UNSPEC_FYL2X))
22600 (clobber (scratch:XF))])]
22601 "TARGET_USE_FANCY_MATH_387
22602 && flag_unsafe_math_optimizations"
22603 {
22604 operands[2]
22605 = force_reg (XFmode, standard_80387_constant_rtx (3)); /* fldlg2 */
22606 })
22607
22608 (define_expand "log10<mode>2"
22609 [(use (match_operand:MODEF 0 "register_operand"))
22610 (use (match_operand:MODEF 1 "general_operand"))]
22611 "TARGET_USE_FANCY_MATH_387
22612 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22613 || TARGET_MIX_SSE_I387)
22614 && flag_unsafe_math_optimizations"
22615 {
22616 rtx op0 = gen_reg_rtx (XFmode);
22617 rtx op1 = gen_reg_rtx (XFmode);
22618
22619 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22620 emit_insn (gen_log10xf2 (op0, op1));
22621 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22622 DONE;
22623 })
22624
22625 (define_expand "log2xf2"
22626 [(parallel [(set (match_operand:XF 0 "register_operand")
22627 (unspec:XF [(match_operand:XF 1 "register_operand")
22628 (match_dup 2)] UNSPEC_FYL2X))
22629 (clobber (scratch:XF))])]
22630 "TARGET_USE_FANCY_MATH_387
22631 && flag_unsafe_math_optimizations"
22632 "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));")
22633
22634 (define_expand "log2<mode>2"
22635 [(use (match_operand:MODEF 0 "register_operand"))
22636 (use (match_operand:MODEF 1 "general_operand"))]
22637 "TARGET_USE_FANCY_MATH_387
22638 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22639 || TARGET_MIX_SSE_I387)
22640 && flag_unsafe_math_optimizations"
22641 {
22642 rtx op0 = gen_reg_rtx (XFmode);
22643 rtx op1 = gen_reg_rtx (XFmode);
22644
22645 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22646 emit_insn (gen_log2xf2 (op0, op1));
22647 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22648 DONE;
22649 })
22650
22651 (define_insn "fyl2xp1xf3_i387"
22652 [(set (match_operand:XF 0 "register_operand" "=f")
22653 (unspec:XF [(match_operand:XF 1 "register_operand" "0")
22654 (match_operand:XF 2 "register_operand" "f")]
22655 UNSPEC_FYL2XP1))
22656 (clobber (match_scratch:XF 3 "=2"))]
22657 "TARGET_USE_FANCY_MATH_387
22658 && flag_unsafe_math_optimizations"
22659 "fyl2xp1"
22660 [(set_attr "type" "fpspc")
22661 (set_attr "znver1_decode" "vector")
22662 (set_attr "mode" "XF")])
22663
22664 (define_expand "log1pxf2"
22665 [(use (match_operand:XF 0 "register_operand"))
22666 (use (match_operand:XF 1 "register_operand"))]
22667 "TARGET_USE_FANCY_MATH_387
22668 && flag_unsafe_math_optimizations"
22669 {
22670 ix86_emit_i387_log1p (operands[0], operands[1]);
22671 DONE;
22672 })
22673
22674 (define_expand "log1p<mode>2"
22675 [(use (match_operand:MODEF 0 "register_operand"))
22676 (use (match_operand:MODEF 1 "general_operand"))]
22677 "TARGET_USE_FANCY_MATH_387
22678 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22679 || TARGET_MIX_SSE_I387)
22680 && flag_unsafe_math_optimizations"
22681 {
22682 rtx op0 = gen_reg_rtx (XFmode);
22683 rtx op1 = gen_reg_rtx (XFmode);
22684
22685 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22686 emit_insn (gen_log1pxf2 (op0, op1));
22687 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22688 DONE;
22689 })
22690
22691 (define_insn "fxtractxf3_i387"
22692 [(set (match_operand:XF 0 "register_operand" "=f")
22693 (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
22694 UNSPEC_XTRACT_FRACT))
22695 (set (match_operand:XF 1 "register_operand" "=f")
22696 (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))]
22697 "TARGET_USE_FANCY_MATH_387
22698 && flag_unsafe_math_optimizations"
22699 "fxtract"
22700 [(set_attr "type" "fpspc")
22701 (set_attr "znver1_decode" "vector")
22702 (set_attr "mode" "XF")])
22703
22704 (define_expand "logbxf2"
22705 [(parallel [(set (match_dup 2)
22706 (unspec:XF [(match_operand:XF 1 "register_operand")]
22707 UNSPEC_XTRACT_FRACT))
22708 (set (match_operand:XF 0 "register_operand")
22709 (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
22710 "TARGET_USE_FANCY_MATH_387
22711 && flag_unsafe_math_optimizations"
22712 "operands[2] = gen_reg_rtx (XFmode);")
22713
22714 (define_expand "logb<mode>2"
22715 [(use (match_operand:MODEF 0 "register_operand"))
22716 (use (match_operand:MODEF 1 "general_operand"))]
22717 "TARGET_USE_FANCY_MATH_387
22718 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22719 || TARGET_MIX_SSE_I387)
22720 && flag_unsafe_math_optimizations"
22721 {
22722 rtx op0 = gen_reg_rtx (XFmode);
22723 rtx op1 = gen_reg_rtx (XFmode);
22724
22725 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22726 emit_insn (gen_logbxf2 (op0, op1));
22727 emit_insn (gen_truncxf<mode>2 (operands[0], op1));
22728 DONE;
22729 })
22730
22731 (define_expand "ilogbxf2"
22732 [(use (match_operand:SI 0 "register_operand"))
22733 (use (match_operand:XF 1 "register_operand"))]
22734 "TARGET_USE_FANCY_MATH_387
22735 && flag_unsafe_math_optimizations"
22736 {
22737 rtx op0, op1;
22738
22739 if (optimize_insn_for_size_p ())
22740 FAIL;
22741
22742 op0 = gen_reg_rtx (XFmode);
22743 op1 = gen_reg_rtx (XFmode);
22744
22745 emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1]));
22746 emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
22747 DONE;
22748 })
22749
22750 (define_expand "ilogb<mode>2"
22751 [(use (match_operand:SI 0 "register_operand"))
22752 (use (match_operand:MODEF 1 "general_operand"))]
22753 "TARGET_USE_FANCY_MATH_387
22754 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22755 || TARGET_MIX_SSE_I387)
22756 && flag_unsafe_math_optimizations"
22757 {
22758 rtx op0, op1, op2;
22759
22760 if (optimize_insn_for_size_p ())
22761 FAIL;
22762
22763 op0 = gen_reg_rtx (XFmode);
22764 op1 = gen_reg_rtx (XFmode);
22765 op2 = gen_reg_rtx (XFmode);
22766
22767 emit_insn (gen_extend<mode>xf2 (op2, operands[1]));
22768 emit_insn (gen_fxtractxf3_i387 (op0, op1, op2));
22769 emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
22770 DONE;
22771 })
22772
22773 (define_insn "*f2xm1xf2_i387"
22774 [(set (match_operand:XF 0 "register_operand" "=f")
22775 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
22776 UNSPEC_F2XM1))]
22777 "TARGET_USE_FANCY_MATH_387
22778 && flag_unsafe_math_optimizations"
22779 "f2xm1"
22780 [(set_attr "type" "fpspc")
22781 (set_attr "znver1_decode" "vector")
22782 (set_attr "mode" "XF")])
22783
22784 (define_insn "fscalexf4_i387"
22785 [(set (match_operand:XF 0 "register_operand" "=f")
22786 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
22787 (match_operand:XF 3 "register_operand" "1")]
22788 UNSPEC_FSCALE_FRACT))
22789 (set (match_operand:XF 1 "register_operand" "=f")
22790 (unspec:XF [(match_dup 2) (match_dup 3)]
22791 UNSPEC_FSCALE_EXP))]
22792 "TARGET_USE_FANCY_MATH_387
22793 && flag_unsafe_math_optimizations"
22794 "fscale"
22795 [(set_attr "type" "fpspc")
22796 (set_attr "znver1_decode" "vector")
22797 (set_attr "mode" "XF")])
22798
22799 (define_expand "expNcorexf3"
22800 [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand")
22801 (match_operand:XF 2 "register_operand")))
22802 (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
22803 (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
22804 (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
22805 (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7)))
22806 (parallel [(set (match_operand:XF 0 "register_operand")
22807 (unspec:XF [(match_dup 8) (match_dup 4)]
22808 UNSPEC_FSCALE_FRACT))
22809 (set (match_dup 9)
22810 (unspec:XF [(match_dup 8) (match_dup 4)]
22811 UNSPEC_FSCALE_EXP))])]
22812 "TARGET_USE_FANCY_MATH_387
22813 && flag_unsafe_math_optimizations"
22814 {
22815 int i;
22816
22817 for (i = 3; i < 10; i++)
22818 operands[i] = gen_reg_rtx (XFmode);
22819
22820 emit_move_insn (operands[7], CONST1_RTX (XFmode));
22821 })
22822
22823 (define_expand "expxf2"
22824 [(use (match_operand:XF 0 "register_operand"))
22825 (use (match_operand:XF 1 "register_operand"))]
22826 "TARGET_USE_FANCY_MATH_387
22827 && flag_unsafe_math_optimizations"
22828 {
22829 rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (5)); /* fldl2e */
22830
22831 emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
22832 DONE;
22833 })
22834
22835 (define_expand "exp<mode>2"
22836 [(use (match_operand:MODEF 0 "register_operand"))
22837 (use (match_operand:MODEF 1 "general_operand"))]
22838 "TARGET_USE_FANCY_MATH_387
22839 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22840 || TARGET_MIX_SSE_I387)
22841 && flag_unsafe_math_optimizations"
22842 {
22843 rtx op0 = gen_reg_rtx (XFmode);
22844 rtx op1 = gen_reg_rtx (XFmode);
22845
22846 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22847 emit_insn (gen_expxf2 (op0, op1));
22848 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22849 DONE;
22850 })
22851
22852 (define_expand "exp10xf2"
22853 [(use (match_operand:XF 0 "register_operand"))
22854 (use (match_operand:XF 1 "register_operand"))]
22855 "TARGET_USE_FANCY_MATH_387
22856 && flag_unsafe_math_optimizations"
22857 {
22858 rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (6)); /* fldl2t */
22859
22860 emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
22861 DONE;
22862 })
22863
22864 (define_expand "exp10<mode>2"
22865 [(use (match_operand:MODEF 0 "register_operand"))
22866 (use (match_operand:MODEF 1 "general_operand"))]
22867 "TARGET_USE_FANCY_MATH_387
22868 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22869 || TARGET_MIX_SSE_I387)
22870 && flag_unsafe_math_optimizations"
22871 {
22872 rtx op0 = gen_reg_rtx (XFmode);
22873 rtx op1 = gen_reg_rtx (XFmode);
22874
22875 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22876 emit_insn (gen_exp10xf2 (op0, op1));
22877 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22878 DONE;
22879 })
22880
22881 (define_expand "exp2xf2"
22882 [(use (match_operand:XF 0 "register_operand"))
22883 (use (match_operand:XF 1 "register_operand"))]
22884 "TARGET_USE_FANCY_MATH_387
22885 && flag_unsafe_math_optimizations"
22886 {
22887 rtx op2 = force_reg (XFmode, CONST1_RTX (XFmode));
22888
22889 emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
22890 DONE;
22891 })
22892
22893 (define_expand "exp2<mode>2"
22894 [(use (match_operand:MODEF 0 "register_operand"))
22895 (use (match_operand:MODEF 1 "general_operand"))]
22896 "TARGET_USE_FANCY_MATH_387
22897 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22898 || TARGET_MIX_SSE_I387)
22899 && flag_unsafe_math_optimizations"
22900 {
22901 rtx op0 = gen_reg_rtx (XFmode);
22902 rtx op1 = gen_reg_rtx (XFmode);
22903
22904 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22905 emit_insn (gen_exp2xf2 (op0, op1));
22906 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22907 DONE;
22908 })
22909
22910 (define_expand "expm1xf2"
22911 [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand")
22912 (match_dup 2)))
22913 (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
22914 (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
22915 (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
22916 (parallel [(set (match_dup 7)
22917 (unspec:XF [(match_dup 6) (match_dup 4)]
22918 UNSPEC_FSCALE_FRACT))
22919 (set (match_dup 8)
22920 (unspec:XF [(match_dup 6) (match_dup 4)]
22921 UNSPEC_FSCALE_EXP))])
22922 (parallel [(set (match_dup 10)
22923 (unspec:XF [(match_dup 9) (match_dup 8)]
22924 UNSPEC_FSCALE_FRACT))
22925 (set (match_dup 11)
22926 (unspec:XF [(match_dup 9) (match_dup 8)]
22927 UNSPEC_FSCALE_EXP))])
22928 (set (match_dup 12) (minus:XF (match_dup 10) (match_dup 9)))
22929 (set (match_operand:XF 0 "register_operand")
22930 (plus:XF (match_dup 12) (match_dup 7)))]
22931 "TARGET_USE_FANCY_MATH_387
22932 && flag_unsafe_math_optimizations"
22933 {
22934 int i;
22935
22936 for (i = 2; i < 13; i++)
22937 operands[i] = gen_reg_rtx (XFmode);
22938
22939 emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */
22940 emit_move_insn (operands[9], CONST1_RTX (XFmode));
22941 })
22942
22943 (define_expand "expm1<mode>2"
22944 [(use (match_operand:MODEF 0 "register_operand"))
22945 (use (match_operand:MODEF 1 "general_operand"))]
22946 "TARGET_USE_FANCY_MATH_387
22947 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22948 || TARGET_MIX_SSE_I387)
22949 && flag_unsafe_math_optimizations"
22950 {
22951 rtx op0 = gen_reg_rtx (XFmode);
22952 rtx op1 = gen_reg_rtx (XFmode);
22953
22954 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22955 emit_insn (gen_expm1xf2 (op0, op1));
22956 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22957 DONE;
22958 })
22959
22960 (define_insn "avx512f_scalef<mode>2"
22961 [(set (match_operand:MODEF 0 "register_operand" "=v")
22962 (unspec:MODEF
22963 [(match_operand:MODEF 1 "register_operand" "v")
22964 (match_operand:MODEF 2 "nonimmediate_operand" "vm")]
22965 UNSPEC_SCALEF))]
22966 "TARGET_AVX512F"
22967 "vscalef<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
22968 [(set_attr "prefix" "evex")
22969 (set_attr "mode" "<MODE>")])
22970
22971 (define_expand "ldexpxf3"
22972 [(match_operand:XF 0 "register_operand")
22973 (match_operand:XF 1 "register_operand")
22974 (match_operand:SI 2 "register_operand")]
22975 "TARGET_USE_FANCY_MATH_387
22976 && flag_unsafe_math_optimizations"
22977 {
22978 rtx tmp1 = gen_reg_rtx (XFmode);
22979 rtx tmp2 = gen_reg_rtx (XFmode);
22980
22981 emit_insn (gen_floatsixf2 (tmp1, operands[2]));
22982 emit_insn (gen_fscalexf4_i387 (operands[0], tmp2,
22983 operands[1], tmp1));
22984 DONE;
22985 })
22986
22987 (define_expand "ldexp<mode>3"
22988 [(use (match_operand:MODEF 0 "register_operand"))
22989 (use (match_operand:MODEF 1 "general_operand"))
22990 (use (match_operand:SI 2 "register_operand"))]
22991 "((TARGET_USE_FANCY_MATH_387
22992 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22993 || TARGET_MIX_SSE_I387))
22994 || (TARGET_AVX512F && TARGET_SSE_MATH))
22995 && flag_unsafe_math_optimizations"
22996 {
22997 /* Prefer avx512f version. */
22998 if (TARGET_AVX512F && TARGET_SSE_MATH)
22999 {
23000 rtx op2 = gen_reg_rtx (<MODE>mode);
23001 operands[1] = force_reg (<MODE>mode, operands[1]);
23002
23003 emit_insn (gen_floatsi<mode>2 (op2, operands[2]));
23004 emit_insn (gen_avx512f_scalef<mode>2 (operands[0], operands[1], op2));
23005 }
23006 else
23007 {
23008 rtx op0 = gen_reg_rtx (XFmode);
23009 rtx op1 = gen_reg_rtx (XFmode);
23010
23011 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23012 emit_insn (gen_ldexpxf3 (op0, op1, operands[2]));
23013 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23014 }
23015 DONE;
23016 })
23017
23018 (define_expand "scalbxf3"
23019 [(parallel [(set (match_operand:XF 0 " register_operand")
23020 (unspec:XF [(match_operand:XF 1 "register_operand")
23021 (match_operand:XF 2 "register_operand")]
23022 UNSPEC_FSCALE_FRACT))
23023 (set (match_dup 3)
23024 (unspec:XF [(match_dup 1) (match_dup 2)]
23025 UNSPEC_FSCALE_EXP))])]
23026 "TARGET_USE_FANCY_MATH_387
23027 && flag_unsafe_math_optimizations"
23028 "operands[3] = gen_reg_rtx (XFmode);")
23029
23030 (define_expand "scalb<mode>3"
23031 [(use (match_operand:MODEF 0 "register_operand"))
23032 (use (match_operand:MODEF 1 "general_operand"))
23033 (use (match_operand:MODEF 2 "general_operand"))]
23034 "TARGET_USE_FANCY_MATH_387
23035 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23036 || TARGET_MIX_SSE_I387)
23037 && flag_unsafe_math_optimizations"
23038 {
23039 rtx op0 = gen_reg_rtx (XFmode);
23040 rtx op1 = gen_reg_rtx (XFmode);
23041 rtx op2 = gen_reg_rtx (XFmode);
23042
23043 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23044 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
23045 emit_insn (gen_scalbxf3 (op0, op1, op2));
23046 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23047 DONE;
23048 })
23049
23050 (define_expand "significandxf2"
23051 [(parallel [(set (match_operand:XF 0 "register_operand")
23052 (unspec:XF [(match_operand:XF 1 "register_operand")]
23053 UNSPEC_XTRACT_FRACT))
23054 (set (match_dup 2)
23055 (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
23056 "TARGET_USE_FANCY_MATH_387
23057 && flag_unsafe_math_optimizations"
23058 "operands[2] = gen_reg_rtx (XFmode);")
23059
23060 (define_expand "significand<mode>2"
23061 [(use (match_operand:MODEF 0 "register_operand"))
23062 (use (match_operand:MODEF 1 "general_operand"))]
23063 "TARGET_USE_FANCY_MATH_387
23064 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23065 || TARGET_MIX_SSE_I387)
23066 && flag_unsafe_math_optimizations"
23067 {
23068 rtx op0 = gen_reg_rtx (XFmode);
23069 rtx op1 = gen_reg_rtx (XFmode);
23070
23071 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23072 emit_insn (gen_significandxf2 (op0, op1));
23073 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23074 DONE;
23075 })
23076 \f
23077
23078 (define_insn "sse4_1_round<mode>2"
23079 [(set (match_operand:MODEFH 0 "register_operand" "=x,x,x,v,v")
23080 (unspec:MODEFH
23081 [(match_operand:MODEFH 1 "nonimmediate_operand" "0,x,jm,v,m")
23082 (match_operand:SI 2 "const_0_to_15_operand")]
23083 UNSPEC_ROUND))]
23084 "TARGET_SSE4_1"
23085 "@
23086 %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
23087 %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
23088 %vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}
23089 vrndscale<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
23090 vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
23091 [(set_attr "type" "ssecvt")
23092 (set_attr "prefix_extra" "1,1,1,*,*")
23093 (set_attr "length_immediate" "1")
23094 (set_attr "addr" "*,*,gpr16,*,*")
23095 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex,evex")
23096 (set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f,avx512f")
23097 (set_attr "avx_partial_xmm_update" "false,false,true,false,true")
23098 (set_attr "mode" "<MODE>")
23099 (set (attr "preferred_for_speed")
23100 (cond [(match_test "TARGET_AVX")
23101 (symbol_ref "true")
23102 (eq_attr "alternative" "1,2")
23103 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
23104 ]
23105 (symbol_ref "true")))])
23106
23107 (define_insn "rintxf2"
23108 [(set (match_operand:XF 0 "register_operand" "=f")
23109 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
23110 UNSPEC_FRNDINT))]
23111 "TARGET_USE_FANCY_MATH_387"
23112 "frndint"
23113 [(set_attr "type" "fpspc")
23114 (set_attr "znver1_decode" "vector")
23115 (set_attr "mode" "XF")])
23116
23117 (define_expand "rinthf2"
23118 [(match_operand:HF 0 "register_operand")
23119 (match_operand:HF 1 "nonimmediate_operand")]
23120 "TARGET_AVX512FP16"
23121 {
23122 emit_insn (gen_sse4_1_roundhf2 (operands[0],
23123 operands[1],
23124 GEN_INT (ROUND_MXCSR)));
23125 DONE;
23126 })
23127
23128 (define_expand "rint<mode>2"
23129 [(use (match_operand:MODEF 0 "register_operand"))
23130 (use (match_operand:MODEF 1 "nonimmediate_operand"))]
23131 "TARGET_USE_FANCY_MATH_387
23132 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
23133 {
23134 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23135 {
23136 if (TARGET_SSE4_1)
23137 emit_insn (gen_sse4_1_round<mode>2
23138 (operands[0], operands[1], GEN_INT (ROUND_MXCSR)));
23139 else
23140 ix86_expand_rint (operands[0], operands[1]);
23141 }
23142 else
23143 {
23144 rtx op0 = gen_reg_rtx (XFmode);
23145 rtx op1 = gen_reg_rtx (XFmode);
23146
23147 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23148 emit_insn (gen_rintxf2 (op0, op1));
23149 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
23150 }
23151 DONE;
23152 })
23153
23154 (define_expand "nearbyintxf2"
23155 [(set (match_operand:XF 0 "register_operand")
23156 (unspec:XF [(match_operand:XF 1 "register_operand")]
23157 UNSPEC_FRNDINT))]
23158 "TARGET_USE_FANCY_MATH_387
23159 && !flag_trapping_math")
23160
23161 (define_expand "nearbyinthf2"
23162 [(match_operand:HF 0 "register_operand")
23163 (match_operand:HF 1 "nonimmediate_operand")]
23164 "TARGET_AVX512FP16"
23165 {
23166 emit_insn (gen_sse4_1_roundhf2 (operands[0],
23167 operands[1],
23168 GEN_INT (ROUND_MXCSR | ROUND_NO_EXC)));
23169 DONE;
23170 })
23171
23172 (define_expand "nearbyint<mode>2"
23173 [(use (match_operand:MODEF 0 "register_operand"))
23174 (use (match_operand:MODEF 1 "nonimmediate_operand"))]
23175 "(TARGET_USE_FANCY_MATH_387
23176 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23177 || TARGET_MIX_SSE_I387)
23178 && !flag_trapping_math)
23179 || (TARGET_SSE4_1 && TARGET_SSE_MATH)"
23180 {
23181 if (TARGET_SSE4_1 && TARGET_SSE_MATH)
23182 emit_insn (gen_sse4_1_round<mode>2
23183 (operands[0], operands[1], GEN_INT (ROUND_MXCSR
23184 | ROUND_NO_EXC)));
23185 else
23186 {
23187 rtx op0 = gen_reg_rtx (XFmode);
23188 rtx op1 = gen_reg_rtx (XFmode);
23189
23190 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23191 emit_insn (gen_nearbyintxf2 (op0, op1));
23192 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
23193 }
23194 DONE;
23195 })
23196
23197 (define_expand "roundhf2"
23198 [(match_operand:HF 0 "register_operand")
23199 (match_operand:HF 1 "register_operand")]
23200 "TARGET_AVX512FP16 && !flag_trapping_math && !flag_rounding_math"
23201 {
23202 ix86_expand_round_sse4 (operands[0], operands[1]);
23203 DONE;
23204 })
23205
23206 (define_expand "round<mode>2"
23207 [(match_operand:X87MODEF 0 "register_operand")
23208 (match_operand:X87MODEF 1 "nonimmediate_operand")]
23209 "(TARGET_USE_FANCY_MATH_387
23210 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23211 || TARGET_MIX_SSE_I387)
23212 && flag_unsafe_math_optimizations
23213 && (flag_fp_int_builtin_inexact || !flag_trapping_math))
23214 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
23215 && !flag_trapping_math && !flag_rounding_math)"
23216 {
23217 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
23218 && !flag_trapping_math && !flag_rounding_math)
23219 {
23220 if (TARGET_SSE4_1)
23221 {
23222 operands[1] = force_reg (<MODE>mode, operands[1]);
23223 ix86_expand_round_sse4 (operands[0], operands[1]);
23224 }
23225 else if (TARGET_64BIT || (<MODE>mode != DFmode))
23226 ix86_expand_round (operands[0], operands[1]);
23227 else
23228 ix86_expand_rounddf_32 (operands[0], operands[1]);
23229 }
23230 else
23231 {
23232 operands[1] = force_reg (<MODE>mode, operands[1]);
23233 ix86_emit_i387_round (operands[0], operands[1]);
23234 }
23235 DONE;
23236 })
23237
23238 (define_insn "lrintxfdi2"
23239 [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
23240 (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
23241 UNSPEC_FIST))
23242 (clobber (match_scratch:XF 2 "=&f"))]
23243 "TARGET_USE_FANCY_MATH_387"
23244 "* return output_fix_trunc (insn, operands, false);"
23245 [(set_attr "type" "fpspc")
23246 (set_attr "mode" "DI")])
23247
23248 (define_insn "lrintxf<mode>2"
23249 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
23250 (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
23251 UNSPEC_FIST))]
23252 "TARGET_USE_FANCY_MATH_387"
23253 "* return output_fix_trunc (insn, operands, false);"
23254 [(set_attr "type" "fpspc")
23255 (set_attr "mode" "<MODE>")])
23256
23257 (define_expand "lroundhf<mode>2"
23258 [(set (match_operand:SWI248 0 "register_operand")
23259 (unspec:SWI248 [(match_operand:HF 1 "nonimmediate_operand")]
23260 UNSPEC_FIX_NOTRUNC))]
23261 "TARGET_AVX512FP16 && !flag_trapping_math && !flag_rounding_math"
23262 {
23263 ix86_expand_lround (operands[0], operands[1]);
23264 DONE;
23265 })
23266
23267 (define_expand "lrinthf<mode>2"
23268 [(set (match_operand:SWI48 0 "register_operand")
23269 (unspec:SWI48 [(match_operand:HF 1 "nonimmediate_operand")]
23270 UNSPEC_FIX_NOTRUNC))]
23271 "TARGET_AVX512FP16")
23272
23273 (define_expand "lrint<MODEF:mode><SWI48:mode>2"
23274 [(set (match_operand:SWI48 0 "register_operand")
23275 (unspec:SWI48 [(match_operand:MODEF 1 "nonimmediate_operand")]
23276 UNSPEC_FIX_NOTRUNC))]
23277 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH")
23278
23279 (define_expand "lround<X87MODEF:mode><SWI248x:mode>2"
23280 [(match_operand:SWI248x 0 "nonimmediate_operand")
23281 (match_operand:X87MODEF 1 "register_operand")]
23282 "(TARGET_USE_FANCY_MATH_387
23283 && (!(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
23284 || TARGET_MIX_SSE_I387)
23285 && flag_unsafe_math_optimizations)
23286 || (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
23287 && <SWI248x:MODE>mode != HImode
23288 && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
23289 && !flag_trapping_math && !flag_rounding_math)"
23290 {
23291 if (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
23292 && <SWI248x:MODE>mode != HImode
23293 && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
23294 && !flag_trapping_math && !flag_rounding_math)
23295 ix86_expand_lround (operands[0], operands[1]);
23296 else
23297 ix86_emit_i387_round (operands[0], operands[1]);
23298 DONE;
23299 })
23300
23301 (define_int_iterator FRNDINT_ROUNDING
23302 [UNSPEC_FRNDINT_ROUNDEVEN
23303 UNSPEC_FRNDINT_FLOOR
23304 UNSPEC_FRNDINT_CEIL
23305 UNSPEC_FRNDINT_TRUNC])
23306
23307 (define_int_iterator FIST_ROUNDING
23308 [UNSPEC_FIST_FLOOR
23309 UNSPEC_FIST_CEIL])
23310
23311 ;; Base name for define_insn
23312 (define_int_attr rounding_insn
23313 [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
23314 (UNSPEC_FRNDINT_FLOOR "floor")
23315 (UNSPEC_FRNDINT_CEIL "ceil")
23316 (UNSPEC_FRNDINT_TRUNC "btrunc")
23317 (UNSPEC_FIST_FLOOR "floor")
23318 (UNSPEC_FIST_CEIL "ceil")])
23319
23320 (define_int_attr rounding
23321 [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
23322 (UNSPEC_FRNDINT_FLOOR "floor")
23323 (UNSPEC_FRNDINT_CEIL "ceil")
23324 (UNSPEC_FRNDINT_TRUNC "trunc")
23325 (UNSPEC_FIST_FLOOR "floor")
23326 (UNSPEC_FIST_CEIL "ceil")])
23327
23328 (define_int_attr ROUNDING
23329 [(UNSPEC_FRNDINT_ROUNDEVEN "ROUNDEVEN")
23330 (UNSPEC_FRNDINT_FLOOR "FLOOR")
23331 (UNSPEC_FRNDINT_CEIL "CEIL")
23332 (UNSPEC_FRNDINT_TRUNC "TRUNC")
23333 (UNSPEC_FIST_FLOOR "FLOOR")
23334 (UNSPEC_FIST_CEIL "CEIL")])
23335
23336 ;; Rounding mode control word calculation could clobber FLAGS_REG.
23337 (define_insn_and_split "frndintxf2_<rounding>"
23338 [(set (match_operand:XF 0 "register_operand")
23339 (unspec:XF [(match_operand:XF 1 "register_operand")]
23340 FRNDINT_ROUNDING))
23341 (clobber (reg:CC FLAGS_REG))]
23342 "TARGET_USE_FANCY_MATH_387
23343 && (flag_fp_int_builtin_inexact || !flag_trapping_math)
23344 && ix86_pre_reload_split ()"
23345 "#"
23346 "&& 1"
23347 [(const_int 0)]
23348 {
23349 ix86_optimize_mode_switching[I387_<ROUNDING>] = 1;
23350
23351 operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
23352 operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>);
23353
23354 emit_insn (gen_frndintxf2_<rounding>_i387 (operands[0], operands[1],
23355 operands[2], operands[3]));
23356 DONE;
23357 }
23358 [(set_attr "type" "frndint")
23359 (set_attr "i387_cw" "<rounding>")
23360 (set_attr "mode" "XF")])
23361
23362 (define_insn "frndintxf2_<rounding>_i387"
23363 [(set (match_operand:XF 0 "register_operand" "=f")
23364 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
23365 FRNDINT_ROUNDING))
23366 (use (match_operand:HI 2 "memory_operand" "m"))
23367 (use (match_operand:HI 3 "memory_operand" "m"))]
23368 "TARGET_USE_FANCY_MATH_387
23369 && (flag_fp_int_builtin_inexact || !flag_trapping_math)"
23370 "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
23371 [(set_attr "type" "frndint")
23372 (set_attr "i387_cw" "<rounding>")
23373 (set_attr "mode" "XF")])
23374
23375 (define_expand "<rounding_insn>xf2"
23376 [(parallel [(set (match_operand:XF 0 "register_operand")
23377 (unspec:XF [(match_operand:XF 1 "register_operand")]
23378 FRNDINT_ROUNDING))
23379 (clobber (reg:CC FLAGS_REG))])]
23380 "TARGET_USE_FANCY_MATH_387
23381 && (flag_fp_int_builtin_inexact || !flag_trapping_math)")
23382
23383 (define_expand "<rounding_insn>hf2"
23384 [(parallel [(set (match_operand:HF 0 "register_operand")
23385 (unspec:HF [(match_operand:HF 1 "register_operand")]
23386 FRNDINT_ROUNDING))
23387 (clobber (reg:CC FLAGS_REG))])]
23388 "TARGET_AVX512FP16"
23389 {
23390 emit_insn (gen_sse4_1_roundhf2 (operands[0], operands[1],
23391 GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
23392 DONE;
23393 })
23394
23395 (define_expand "<rounding_insn><mode>2"
23396 [(parallel [(set (match_operand:MODEF 0 "register_operand")
23397 (unspec:MODEF [(match_operand:MODEF 1 "register_operand")]
23398 FRNDINT_ROUNDING))
23399 (clobber (reg:CC FLAGS_REG))])]
23400 "(TARGET_USE_FANCY_MATH_387
23401 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23402 || TARGET_MIX_SSE_I387)
23403 && (flag_fp_int_builtin_inexact || !flag_trapping_math))
23404 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
23405 && (TARGET_SSE4_1
23406 || (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
23407 && (flag_fp_int_builtin_inexact || !flag_trapping_math))))"
23408 {
23409 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
23410 && (TARGET_SSE4_1
23411 || (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
23412 && (flag_fp_int_builtin_inexact || !flag_trapping_math))))
23413 {
23414 if (TARGET_SSE4_1)
23415 emit_insn (gen_sse4_1_round<mode>2
23416 (operands[0], operands[1],
23417 GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
23418 else if (TARGET_64BIT || (<MODE>mode != DFmode))
23419 {
23420 if (ROUND_<ROUNDING> == ROUND_FLOOR)
23421 ix86_expand_floorceil (operands[0], operands[1], true);
23422 else if (ROUND_<ROUNDING> == ROUND_CEIL)
23423 ix86_expand_floorceil (operands[0], operands[1], false);
23424 else if (ROUND_<ROUNDING> == ROUND_TRUNC)
23425 ix86_expand_trunc (operands[0], operands[1]);
23426 else
23427 gcc_unreachable ();
23428 }
23429 else
23430 {
23431 if (ROUND_<ROUNDING> == ROUND_FLOOR)
23432 ix86_expand_floorceildf_32 (operands[0], operands[1], true);
23433 else if (ROUND_<ROUNDING> == ROUND_CEIL)
23434 ix86_expand_floorceildf_32 (operands[0], operands[1], false);
23435 else if (ROUND_<ROUNDING> == ROUND_TRUNC)
23436 ix86_expand_truncdf_32 (operands[0], operands[1]);
23437 else
23438 gcc_unreachable ();
23439 }
23440 }
23441 else
23442 {
23443 rtx op0 = gen_reg_rtx (XFmode);
23444 rtx op1 = gen_reg_rtx (XFmode);
23445
23446 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23447 emit_insn (gen_frndintxf2_<rounding> (op0, op1));
23448 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
23449 }
23450 DONE;
23451 })
23452
23453 ;; Rounding mode control word calculation could clobber FLAGS_REG.
23454 (define_insn_and_split "*fist<mode>2_<rounding>_1"
23455 [(set (match_operand:SWI248x 0 "nonimmediate_operand")
23456 (unspec:SWI248x [(match_operand:XF 1 "register_operand")]
23457 FIST_ROUNDING))
23458 (clobber (reg:CC FLAGS_REG))]
23459 "TARGET_USE_FANCY_MATH_387
23460 && flag_unsafe_math_optimizations
23461 && ix86_pre_reload_split ()"
23462 "#"
23463 "&& 1"
23464 [(const_int 0)]
23465 {
23466 ix86_optimize_mode_switching[I387_<ROUNDING>] = 1;
23467
23468 operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
23469 operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>);
23470
23471 emit_insn (gen_fist<mode>2_<rounding> (operands[0], operands[1],
23472 operands[2], operands[3]));
23473 DONE;
23474 }
23475 [(set_attr "type" "fistp")
23476 (set_attr "i387_cw" "<rounding>")
23477 (set_attr "mode" "<MODE>")])
23478
23479 (define_insn "fistdi2_<rounding>"
23480 [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
23481 (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
23482 FIST_ROUNDING))
23483 (use (match_operand:HI 2 "memory_operand" "m"))
23484 (use (match_operand:HI 3 "memory_operand" "m"))
23485 (clobber (match_scratch:XF 4 "=&f"))]
23486 "TARGET_USE_FANCY_MATH_387
23487 && flag_unsafe_math_optimizations"
23488 "* return output_fix_trunc (insn, operands, false);"
23489 [(set_attr "type" "fistp")
23490 (set_attr "i387_cw" "<rounding>")
23491 (set_attr "mode" "DI")])
23492
23493 (define_insn "fist<mode>2_<rounding>"
23494 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
23495 (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
23496 FIST_ROUNDING))
23497 (use (match_operand:HI 2 "memory_operand" "m"))
23498 (use (match_operand:HI 3 "memory_operand" "m"))]
23499 "TARGET_USE_FANCY_MATH_387
23500 && flag_unsafe_math_optimizations"
23501 "* return output_fix_trunc (insn, operands, false);"
23502 [(set_attr "type" "fistp")
23503 (set_attr "i387_cw" "<rounding>")
23504 (set_attr "mode" "<MODE>")])
23505
23506 (define_expand "l<rounding_insn>xf<mode>2"
23507 [(parallel [(set (match_operand:SWI248x 0 "nonimmediate_operand")
23508 (unspec:SWI248x [(match_operand:XF 1 "register_operand")]
23509 FIST_ROUNDING))
23510 (clobber (reg:CC FLAGS_REG))])]
23511 "TARGET_USE_FANCY_MATH_387
23512 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
23513 && flag_unsafe_math_optimizations")
23514
23515 (define_expand "l<rounding_insn>hf<mode>2"
23516 [(set (match_operand:SWI48 0 "nonimmediate_operand")
23517 (unspec:SWI48 [(match_operand:HF 1 "register_operand")]
23518 FIST_ROUNDING))]
23519 "TARGET_AVX512FP16"
23520 {
23521 rtx tmp = gen_reg_rtx (HFmode);
23522 emit_insn (gen_sse4_1_roundhf2 (tmp, operands[1],
23523 GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
23524 emit_insn (gen_fix_trunchf<mode>2 (operands[0], tmp));
23525 DONE;
23526 })
23527
23528 (define_expand "l<rounding_insn><MODEF:mode><SWI48:mode>2"
23529 [(parallel [(set (match_operand:SWI48 0 "nonimmediate_operand")
23530 (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")]
23531 FIST_ROUNDING))
23532 (clobber (reg:CC FLAGS_REG))])]
23533 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
23534 && (TARGET_SSE4_1 || !flag_trapping_math)"
23535 {
23536 if (TARGET_SSE4_1)
23537 {
23538 rtx tmp = gen_reg_rtx (<MODEF:MODE>mode);
23539
23540 emit_insn (gen_sse4_1_round<MODEF:mode>2
23541 (tmp, operands[1], GEN_INT (ROUND_<ROUNDING>
23542 | ROUND_NO_EXC)));
23543 emit_insn (gen_fix_trunc<MODEF:mode><SWI48:mode>2
23544 (operands[0], tmp));
23545 }
23546 else if (ROUND_<ROUNDING> == ROUND_FLOOR)
23547 ix86_expand_lfloorceil (operands[0], operands[1], true);
23548 else if (ROUND_<ROUNDING> == ROUND_CEIL)
23549 ix86_expand_lfloorceil (operands[0], operands[1], false);
23550 else
23551 gcc_unreachable ();
23552
23553 DONE;
23554 })
23555
23556 (define_insn "fxam<mode>2_i387"
23557 [(set (match_operand:HI 0 "register_operand" "=a")
23558 (unspec:HI
23559 [(match_operand:X87MODEF 1 "register_operand" "f")]
23560 UNSPEC_FXAM))]
23561 "TARGET_USE_FANCY_MATH_387"
23562 "fxam\n\tfnstsw\t%0"
23563 [(set_attr "type" "multi")
23564 (set_attr "length" "4")
23565 (set_attr "unit" "i387")
23566 (set_attr "mode" "<MODE>")])
23567
23568 (define_expand "signbittf2"
23569 [(use (match_operand:SI 0 "register_operand"))
23570 (use (match_operand:TF 1 "register_operand"))]
23571 "TARGET_SSE"
23572 {
23573 if (TARGET_SSE4_1)
23574 {
23575 rtx mask = ix86_build_signbit_mask (TFmode, 0, 0);
23576 rtx scratch = gen_reg_rtx (QImode);
23577
23578 emit_insn (gen_ptesttf2 (operands[1], mask));
23579 ix86_expand_setcc (scratch, NE,
23580 gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx);
23581
23582 emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
23583 }
23584 else
23585 {
23586 emit_insn (gen_sse_movmskps (operands[0],
23587 gen_lowpart (V4SFmode, operands[1])));
23588 emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (0x8)));
23589 }
23590 DONE;
23591 })
23592
23593 (define_expand "signbitxf2"
23594 [(use (match_operand:SI 0 "register_operand"))
23595 (use (match_operand:XF 1 "register_operand"))]
23596 "TARGET_USE_FANCY_MATH_387"
23597 {
23598 rtx scratch = gen_reg_rtx (HImode);
23599
23600 emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
23601 emit_insn (gen_andsi3 (operands[0],
23602 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
23603 DONE;
23604 })
23605
23606 (define_insn "movmsk_df"
23607 [(set (match_operand:SI 0 "register_operand" "=r,jr")
23608 (unspec:SI
23609 [(match_operand:DF 1 "register_operand" "x,x")]
23610 UNSPEC_MOVMSK))]
23611 "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH"
23612 "%vmovmskpd\t{%1, %0|%0, %1}"
23613 [(set_attr "isa" "noavx,avx")
23614 (set_attr "type" "ssemov")
23615 (set_attr "prefix" "maybe_evex")
23616 (set_attr "mode" "DF")])
23617
23618 ;; Use movmskpd in SSE mode to avoid store forwarding stall
23619 ;; for 32bit targets and movq+shrq sequence for 64bit targets.
23620 (define_expand "signbitdf2"
23621 [(use (match_operand:SI 0 "register_operand"))
23622 (use (match_operand:DF 1 "register_operand"))]
23623 "TARGET_USE_FANCY_MATH_387
23624 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
23625 {
23626 if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)
23627 {
23628 emit_insn (gen_movmsk_df (operands[0], operands[1]));
23629 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
23630 }
23631 else
23632 {
23633 rtx scratch = gen_reg_rtx (HImode);
23634
23635 emit_insn (gen_fxamdf2_i387 (scratch, operands[1]));
23636 emit_insn (gen_andsi3 (operands[0],
23637 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
23638 }
23639 DONE;
23640 })
23641
23642 (define_expand "signbitsf2"
23643 [(use (match_operand:SI 0 "register_operand"))
23644 (use (match_operand:SF 1 "register_operand"))]
23645 "TARGET_USE_FANCY_MATH_387
23646 && !(SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
23647 {
23648 rtx scratch = gen_reg_rtx (HImode);
23649
23650 emit_insn (gen_fxamsf2_i387 (scratch, operands[1]));
23651 emit_insn (gen_andsi3 (operands[0],
23652 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
23653 DONE;
23654 })
23655 \f
23656 ;; Block operation instructions
23657
23658 (define_insn "cld"
23659 [(unspec_volatile [(const_int 0)] UNSPECV_CLD)]
23660 ""
23661 "cld"
23662 [(set_attr "length" "1")
23663 (set_attr "length_immediate" "0")
23664 (set_attr "modrm" "0")])
23665
23666 (define_expand "cpymem<mode>"
23667 [(use (match_operand:BLK 0 "memory_operand"))
23668 (use (match_operand:BLK 1 "memory_operand"))
23669 (use (match_operand:SWI48 2 "nonmemory_operand"))
23670 (use (match_operand:SWI48 3 "const_int_operand"))
23671 (use (match_operand:SI 4 "const_int_operand"))
23672 (use (match_operand:SI 5 "const_int_operand"))
23673 (use (match_operand:SI 6 ""))
23674 (use (match_operand:SI 7 ""))
23675 (use (match_operand:SI 8 ""))]
23676 ""
23677 {
23678 if (ix86_expand_set_or_cpymem (operands[0], operands[1],
23679 operands[2], NULL, operands[3],
23680 operands[4], operands[5],
23681 operands[6], operands[7],
23682 operands[8], false))
23683 DONE;
23684 else
23685 FAIL;
23686 })
23687
23688 ;; Most CPUs don't like single string operations
23689 ;; Handle this case here to simplify previous expander.
23690
23691 (define_expand "strmov"
23692 [(set (match_dup 4) (match_operand 3 "memory_operand"))
23693 (set (match_operand 1 "memory_operand") (match_dup 4))
23694 (parallel [(set (match_operand 0 "register_operand") (match_dup 5))
23695 (clobber (reg:CC FLAGS_REG))])
23696 (parallel [(set (match_operand 2 "register_operand") (match_dup 6))
23697 (clobber (reg:CC FLAGS_REG))])]
23698 ""
23699 {
23700 /* Can't use this for non-default address spaces. */
23701 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3])))
23702 FAIL;
23703
23704 int piece_size = GET_MODE_SIZE (GET_MODE (operands[1]));
23705
23706 /* If .md ever supports :P for Pmode, these can be directly
23707 in the pattern above. */
23708 operands[5] = plus_constant (Pmode, operands[0], piece_size);
23709 operands[6] = plus_constant (Pmode, operands[2], piece_size);
23710
23711 /* Can't use this if the user has appropriated esi or edi. */
23712 if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
23713 && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]))
23714 {
23715 emit_insn (gen_strmov_singleop (operands[0], operands[1],
23716 operands[2], operands[3],
23717 operands[5], operands[6]));
23718 DONE;
23719 }
23720
23721 operands[4] = gen_reg_rtx (GET_MODE (operands[1]));
23722 })
23723
23724 (define_expand "strmov_singleop"
23725 [(parallel [(set (match_operand 1 "memory_operand")
23726 (match_operand 3 "memory_operand"))
23727 (set (match_operand 0 "register_operand")
23728 (match_operand 4))
23729 (set (match_operand 2 "register_operand")
23730 (match_operand 5))])]
23731 ""
23732 {
23733 if (TARGET_CLD)
23734 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
23735 })
23736
23737 (define_insn "*strmovdi_rex_1"
23738 [(set (mem:DI (match_operand:P 2 "register_operand" "0"))
23739 (mem:DI (match_operand:P 3 "register_operand" "1")))
23740 (set (match_operand:P 0 "register_operand" "=D")
23741 (plus:P (match_dup 2)
23742 (const_int 8)))
23743 (set (match_operand:P 1 "register_operand" "=S")
23744 (plus:P (match_dup 3)
23745 (const_int 8)))]
23746 "TARGET_64BIT
23747 && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])
23748 && ix86_check_no_addr_space (insn)"
23749 "%^movsq"
23750 [(set_attr "type" "str")
23751 (set_attr "memory" "both")
23752 (set_attr "mode" "DI")])
23753
23754 (define_insn "*strmovsi_1"
23755 [(set (mem:SI (match_operand:P 2 "register_operand" "0"))
23756 (mem:SI (match_operand:P 3 "register_operand" "1")))
23757 (set (match_operand:P 0 "register_operand" "=D")
23758 (plus:P (match_dup 2)
23759 (const_int 4)))
23760 (set (match_operand:P 1 "register_operand" "=S")
23761 (plus:P (match_dup 3)
23762 (const_int 4)))]
23763 "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
23764 && ix86_check_no_addr_space (insn)"
23765 "%^movs{l|d}"
23766 [(set_attr "type" "str")
23767 (set_attr "memory" "both")
23768 (set_attr "mode" "SI")])
23769
23770 (define_insn "*strmovhi_1"
23771 [(set (mem:HI (match_operand:P 2 "register_operand" "0"))
23772 (mem:HI (match_operand:P 3 "register_operand" "1")))
23773 (set (match_operand:P 0 "register_operand" "=D")
23774 (plus:P (match_dup 2)
23775 (const_int 2)))
23776 (set (match_operand:P 1 "register_operand" "=S")
23777 (plus:P (match_dup 3)
23778 (const_int 2)))]
23779 "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
23780 && ix86_check_no_addr_space (insn)"
23781 "%^movsw"
23782 [(set_attr "type" "str")
23783 (set_attr "memory" "both")
23784 (set_attr "mode" "HI")])
23785
23786 (define_insn "*strmovqi_1"
23787 [(set (mem:QI (match_operand:P 2 "register_operand" "0"))
23788 (mem:QI (match_operand:P 3 "register_operand" "1")))
23789 (set (match_operand:P 0 "register_operand" "=D")
23790 (plus:P (match_dup 2)
23791 (const_int 1)))
23792 (set (match_operand:P 1 "register_operand" "=S")
23793 (plus:P (match_dup 3)
23794 (const_int 1)))]
23795 "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
23796 && ix86_check_no_addr_space (insn)"
23797 "%^movsb"
23798 [(set_attr "type" "str")
23799 (set_attr "memory" "both")
23800 (set (attr "prefix_rex")
23801 (if_then_else
23802 (match_test "<P:MODE>mode == DImode")
23803 (const_string "0")
23804 (const_string "*")))
23805 (set_attr "mode" "QI")])
23806
23807 (define_expand "rep_mov"
23808 [(parallel [(set (match_operand 4 "register_operand") (const_int 0))
23809 (set (match_operand 0 "register_operand")
23810 (match_operand 5))
23811 (set (match_operand 2 "register_operand")
23812 (match_operand 6))
23813 (set (match_operand 1 "memory_operand")
23814 (match_operand 3 "memory_operand"))
23815 (use (match_dup 4))])]
23816 ""
23817 {
23818 if (TARGET_CLD)
23819 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
23820 })
23821
23822 (define_insn "*rep_movdi_rex64"
23823 [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
23824 (set (match_operand:P 0 "register_operand" "=D")
23825 (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
23826 (const_int 3))
23827 (match_operand:P 3 "register_operand" "0")))
23828 (set (match_operand:P 1 "register_operand" "=S")
23829 (plus:P (ashift:P (match_dup 5) (const_int 3))
23830 (match_operand:P 4 "register_operand" "1")))
23831 (set (mem:BLK (match_dup 3))
23832 (mem:BLK (match_dup 4)))
23833 (use (match_dup 5))]
23834 "TARGET_64BIT
23835 && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
23836 && ix86_check_no_addr_space (insn)"
23837 "%^rep{%;} movsq"
23838 [(set_attr "type" "str")
23839 (set_attr "prefix_rep" "1")
23840 (set_attr "memory" "both")
23841 (set_attr "mode" "DI")])
23842
23843 (define_insn "*rep_movsi"
23844 [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
23845 (set (match_operand:P 0 "register_operand" "=D")
23846 (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
23847 (const_int 2))
23848 (match_operand:P 3 "register_operand" "0")))
23849 (set (match_operand:P 1 "register_operand" "=S")
23850 (plus:P (ashift:P (match_dup 5) (const_int 2))
23851 (match_operand:P 4 "register_operand" "1")))
23852 (set (mem:BLK (match_dup 3))
23853 (mem:BLK (match_dup 4)))
23854 (use (match_dup 5))]
23855 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
23856 && ix86_check_no_addr_space (insn)"
23857 "%^rep{%;} movs{l|d}"
23858 [(set_attr "type" "str")
23859 (set_attr "prefix_rep" "1")
23860 (set_attr "memory" "both")
23861 (set_attr "mode" "SI")])
23862
23863 (define_insn "*rep_movqi"
23864 [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
23865 (set (match_operand:P 0 "register_operand" "=D")
23866 (plus:P (match_operand:P 3 "register_operand" "0")
23867 (match_operand:P 5 "register_operand" "2")))
23868 (set (match_operand:P 1 "register_operand" "=S")
23869 (plus:P (match_operand:P 4 "register_operand" "1") (match_dup 5)))
23870 (set (mem:BLK (match_dup 3))
23871 (mem:BLK (match_dup 4)))
23872 (use (match_dup 5))]
23873 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
23874 && ix86_check_no_addr_space (insn)"
23875 "%^rep{%;} movsb"
23876 [(set_attr "type" "str")
23877 (set_attr "prefix_rep" "1")
23878 (set_attr "memory" "both")
23879 (set_attr "mode" "QI")])
23880
23881 (define_expand "setmem<mode>"
23882 [(use (match_operand:BLK 0 "memory_operand"))
23883 (use (match_operand:SWI48 1 "nonmemory_operand"))
23884 (use (match_operand:QI 2 "nonmemory_operand"))
23885 (use (match_operand 3 "const_int_operand"))
23886 (use (match_operand:SI 4 "const_int_operand"))
23887 (use (match_operand:SI 5 "const_int_operand"))
23888 (use (match_operand:SI 6 ""))
23889 (use (match_operand:SI 7 ""))
23890 (use (match_operand:SI 8 ""))]
23891 ""
23892 {
23893 if (ix86_expand_set_or_cpymem (operands[0], NULL,
23894 operands[1], operands[2],
23895 operands[3], operands[4],
23896 operands[5], operands[6],
23897 operands[7], operands[8], true))
23898 DONE;
23899 else
23900 FAIL;
23901 })
23902
23903 ;; Most CPUs don't like single string operations
23904 ;; Handle this case here to simplify previous expander.
23905
23906 (define_expand "strset"
23907 [(set (match_operand 1 "memory_operand")
23908 (match_operand 2 "register_operand"))
23909 (parallel [(set (match_operand 0 "register_operand")
23910 (match_dup 3))
23911 (clobber (reg:CC FLAGS_REG))])]
23912 ""
23913 {
23914 /* Can't use this for non-default address spaces. */
23915 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1])))
23916 FAIL;
23917
23918 if (GET_MODE (operands[1]) != GET_MODE (operands[2]))
23919 operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0);
23920
23921 /* If .md ever supports :P for Pmode, this can be directly
23922 in the pattern above. */
23923 operands[3] = plus_constant (Pmode, operands[0],
23924 GET_MODE_SIZE (GET_MODE (operands[2])));
23925
23926 /* Can't use this if the user has appropriated eax or edi. */
23927 if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
23928 && !(fixed_regs[AX_REG] || fixed_regs[DI_REG]))
23929 {
23930 emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2],
23931 operands[3]));
23932 DONE;
23933 }
23934 })
23935
23936 (define_expand "strset_singleop"
23937 [(parallel [(set (match_operand 1 "memory_operand")
23938 (match_operand 2 "register_operand"))
23939 (set (match_operand 0 "register_operand")
23940 (match_operand 3))
23941 (unspec [(const_int 0)] UNSPEC_STOS)])]
23942 ""
23943 {
23944 if (TARGET_CLD)
23945 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
23946 })
23947
23948 (define_insn "*strsetdi_rex_1"
23949 [(set (mem:DI (match_operand:P 1 "register_operand" "0"))
23950 (match_operand:DI 2 "register_operand" "a"))
23951 (set (match_operand:P 0 "register_operand" "=D")
23952 (plus:P (match_dup 1)
23953 (const_int 8)))
23954 (unspec [(const_int 0)] UNSPEC_STOS)]
23955 "TARGET_64BIT
23956 && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])
23957 && ix86_check_no_addr_space (insn)"
23958 "%^stosq"
23959 [(set_attr "type" "str")
23960 (set_attr "memory" "store")
23961 (set_attr "mode" "DI")])
23962
23963 (define_insn "*strsetsi_1"
23964 [(set (mem:SI (match_operand:P 1 "register_operand" "0"))
23965 (match_operand:SI 2 "register_operand" "a"))
23966 (set (match_operand:P 0 "register_operand" "=D")
23967 (plus:P (match_dup 1)
23968 (const_int 4)))
23969 (unspec [(const_int 0)] UNSPEC_STOS)]
23970 "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
23971 && ix86_check_no_addr_space (insn)"
23972 "%^stos{l|d}"
23973 [(set_attr "type" "str")
23974 (set_attr "memory" "store")
23975 (set_attr "mode" "SI")])
23976
23977 (define_insn "*strsethi_1"
23978 [(set (mem:HI (match_operand:P 1 "register_operand" "0"))
23979 (match_operand:HI 2 "register_operand" "a"))
23980 (set (match_operand:P 0 "register_operand" "=D")
23981 (plus:P (match_dup 1)
23982 (const_int 2)))
23983 (unspec [(const_int 0)] UNSPEC_STOS)]
23984 "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
23985 && ix86_check_no_addr_space (insn)"
23986 "%^stosw"
23987 [(set_attr "type" "str")
23988 (set_attr "memory" "store")
23989 (set_attr "mode" "HI")])
23990
23991 (define_insn "*strsetqi_1"
23992 [(set (mem:QI (match_operand:P 1 "register_operand" "0"))
23993 (match_operand:QI 2 "register_operand" "a"))
23994 (set (match_operand:P 0 "register_operand" "=D")
23995 (plus:P (match_dup 1)
23996 (const_int 1)))
23997 (unspec [(const_int 0)] UNSPEC_STOS)]
23998 "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
23999 && ix86_check_no_addr_space (insn)"
24000 "%^stosb"
24001 [(set_attr "type" "str")
24002 (set_attr "memory" "store")
24003 (set (attr "prefix_rex")
24004 (if_then_else
24005 (match_test "<P:MODE>mode == DImode")
24006 (const_string "0")
24007 (const_string "*")))
24008 (set_attr "mode" "QI")])
24009
24010 (define_expand "rep_stos"
24011 [(parallel [(set (match_operand 1 "register_operand") (const_int 0))
24012 (set (match_operand 0 "register_operand")
24013 (match_operand 4))
24014 (set (match_operand 2 "memory_operand") (const_int 0))
24015 (use (match_operand 3 "register_operand"))
24016 (use (match_dup 1))])]
24017 ""
24018 {
24019 if (TARGET_CLD)
24020 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
24021 })
24022
24023 (define_insn "*rep_stosdi_rex64"
24024 [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
24025 (set (match_operand:P 0 "register_operand" "=D")
24026 (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
24027 (const_int 3))
24028 (match_operand:P 3 "register_operand" "0")))
24029 (set (mem:BLK (match_dup 3))
24030 (const_int 0))
24031 (use (match_operand:DI 2 "register_operand" "a"))
24032 (use (match_dup 4))]
24033 "TARGET_64BIT
24034 && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24035 && ix86_check_no_addr_space (insn)"
24036 "%^rep{%;} stosq"
24037 [(set_attr "type" "str")
24038 (set_attr "prefix_rep" "1")
24039 (set_attr "memory" "store")
24040 (set_attr "mode" "DI")])
24041
24042 (define_insn "*rep_stossi"
24043 [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
24044 (set (match_operand:P 0 "register_operand" "=D")
24045 (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
24046 (const_int 2))
24047 (match_operand:P 3 "register_operand" "0")))
24048 (set (mem:BLK (match_dup 3))
24049 (const_int 0))
24050 (use (match_operand:SI 2 "register_operand" "a"))
24051 (use (match_dup 4))]
24052 "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24053 && ix86_check_no_addr_space (insn)"
24054 "%^rep{%;} stos{l|d}"
24055 [(set_attr "type" "str")
24056 (set_attr "prefix_rep" "1")
24057 (set_attr "memory" "store")
24058 (set_attr "mode" "SI")])
24059
24060 (define_insn "*rep_stosqi"
24061 [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
24062 (set (match_operand:P 0 "register_operand" "=D")
24063 (plus:P (match_operand:P 3 "register_operand" "0")
24064 (match_operand:P 4 "register_operand" "1")))
24065 (set (mem:BLK (match_dup 3))
24066 (const_int 0))
24067 (use (match_operand:QI 2 "register_operand" "a"))
24068 (use (match_dup 4))]
24069 "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24070 && ix86_check_no_addr_space (insn)"
24071 "%^rep{%;} stosb"
24072 [(set_attr "type" "str")
24073 (set_attr "prefix_rep" "1")
24074 (set_attr "memory" "store")
24075 (set (attr "prefix_rex")
24076 (if_then_else
24077 (match_test "<P:MODE>mode == DImode")
24078 (const_string "0")
24079 (const_string "*")))
24080 (set_attr "mode" "QI")])
24081
24082 (define_expand "cmpmemsi"
24083 [(set (match_operand:SI 0 "register_operand" "")
24084 (compare:SI (match_operand:BLK 1 "memory_operand" "")
24085 (match_operand:BLK 2 "memory_operand" "") ) )
24086 (use (match_operand 3 "general_operand"))
24087 (use (match_operand 4 "immediate_operand"))]
24088 ""
24089 {
24090 if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
24091 operands[2], operands[3],
24092 operands[4], false))
24093 DONE;
24094 else
24095 FAIL;
24096 })
24097
24098 (define_expand "cmpstrnsi"
24099 [(set (match_operand:SI 0 "register_operand")
24100 (compare:SI (match_operand:BLK 1 "general_operand")
24101 (match_operand:BLK 2 "general_operand")))
24102 (use (match_operand 3 "general_operand"))
24103 (use (match_operand 4 "immediate_operand"))]
24104 ""
24105 {
24106 if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
24107 operands[2], operands[3],
24108 operands[4], true))
24109 DONE;
24110 else
24111 FAIL;
24112 })
24113
24114 ;; Produce a tri-state integer (-1, 0, 1) from condition codes.
24115
24116 (define_expand "cmpintqi"
24117 [(set (match_dup 1)
24118 (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
24119 (set (match_dup 2)
24120 (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
24121 (parallel [(set (match_operand:QI 0 "register_operand")
24122 (minus:QI (match_dup 1)
24123 (match_dup 2)))
24124 (clobber (reg:CC FLAGS_REG))])]
24125 ""
24126 {
24127 operands[1] = gen_reg_rtx (QImode);
24128 operands[2] = gen_reg_rtx (QImode);
24129 })
24130
24131 ;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is
24132 ;; zero. Emit extra code to make sure that a zero-length compare is EQ.
24133
24134 (define_expand "cmpstrnqi_nz_1"
24135 [(parallel [(set (reg:CC FLAGS_REG)
24136 (compare:CC (match_operand 4 "memory_operand")
24137 (match_operand 5 "memory_operand")))
24138 (use (match_operand 2 "register_operand"))
24139 (use (match_operand:SI 3 "immediate_operand"))
24140 (clobber (match_operand 0 "register_operand"))
24141 (clobber (match_operand 1 "register_operand"))
24142 (clobber (match_dup 2))])]
24143 ""
24144 {
24145 if (TARGET_CLD)
24146 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
24147 })
24148
24149 (define_insn "*cmpstrnqi_nz_1"
24150 [(set (reg:CC FLAGS_REG)
24151 (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
24152 (mem:BLK (match_operand:P 5 "register_operand" "1"))))
24153 (use (match_operand:P 6 "register_operand" "2"))
24154 (use (match_operand:SI 3 "immediate_operand" "i"))
24155 (clobber (match_operand:P 0 "register_operand" "=S"))
24156 (clobber (match_operand:P 1 "register_operand" "=D"))
24157 (clobber (match_operand:P 2 "register_operand" "=c"))]
24158 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
24159 && ix86_check_no_addr_space (insn)"
24160 "%^repz{%;} cmpsb"
24161 [(set_attr "type" "str")
24162 (set_attr "mode" "QI")
24163 (set (attr "prefix_rex")
24164 (if_then_else
24165 (match_test "<P:MODE>mode == DImode")
24166 (const_string "0")
24167 (const_string "*")))
24168 (set_attr "prefix_rep" "1")])
24169
24170 ;; The same, but the count is not known to not be zero.
24171
24172 (define_expand "cmpstrnqi_1"
24173 [(parallel [(set (reg:CC FLAGS_REG)
24174 (if_then_else:CC (ne (match_operand 2 "register_operand")
24175 (const_int 0))
24176 (compare:CC (match_operand 4 "memory_operand")
24177 (match_operand 5 "memory_operand"))
24178 (reg:CC FLAGS_REG)))
24179 (use (match_operand:SI 3 "immediate_operand"))
24180 (clobber (match_operand 0 "register_operand"))
24181 (clobber (match_operand 1 "register_operand"))
24182 (clobber (match_dup 2))])]
24183 ""
24184 {
24185 if (TARGET_CLD)
24186 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
24187 })
24188
24189 (define_insn "*cmpstrnqi_1"
24190 [(set (reg:CC FLAGS_REG)
24191 (if_then_else:CC (ne (match_operand:P 6 "register_operand" "2")
24192 (const_int 0))
24193 (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
24194 (mem:BLK (match_operand:P 5 "register_operand" "1")))
24195 (reg:CC FLAGS_REG)))
24196 (use (match_operand:SI 3 "immediate_operand" "i"))
24197 (clobber (match_operand:P 0 "register_operand" "=S"))
24198 (clobber (match_operand:P 1 "register_operand" "=D"))
24199 (clobber (match_operand:P 2 "register_operand" "=c"))]
24200 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
24201 && ix86_check_no_addr_space (insn)"
24202 "%^repz{%;} cmpsb"
24203 [(set_attr "type" "str")
24204 (set_attr "mode" "QI")
24205 (set (attr "prefix_rex")
24206 (if_then_else
24207 (match_test "<P:MODE>mode == DImode")
24208 (const_string "0")
24209 (const_string "*")))
24210 (set_attr "prefix_rep" "1")])
24211
24212 (define_expand "strlen<mode>"
24213 [(set (match_operand:P 0 "register_operand")
24214 (unspec:P [(match_operand:BLK 1 "general_operand")
24215 (match_operand:QI 2 "immediate_operand")
24216 (match_operand 3 "immediate_operand")]
24217 UNSPEC_SCAS))]
24218 ""
24219 {
24220 if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
24221 DONE;
24222 else
24223 FAIL;
24224 })
24225
24226 (define_expand "strlenqi_1"
24227 [(parallel [(set (match_operand 0 "register_operand")
24228 (match_operand 2))
24229 (clobber (match_operand 1 "register_operand"))
24230 (clobber (reg:CC FLAGS_REG))])]
24231 ""
24232 {
24233 if (TARGET_CLD)
24234 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
24235 })
24236
24237 (define_insn "*strlenqi_1"
24238 [(set (match_operand:P 0 "register_operand" "=&c")
24239 (unspec:P [(mem:BLK (match_operand:P 5 "register_operand" "1"))
24240 (match_operand:QI 2 "register_operand" "a")
24241 (match_operand:P 3 "immediate_operand" "i")
24242 (match_operand:P 4 "register_operand" "0")] UNSPEC_SCAS))
24243 (clobber (match_operand:P 1 "register_operand" "=D"))
24244 (clobber (reg:CC FLAGS_REG))]
24245 "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24246 && ix86_check_no_addr_space (insn)"
24247 "%^repnz{%;} scasb"
24248 [(set_attr "type" "str")
24249 (set_attr "mode" "QI")
24250 (set (attr "prefix_rex")
24251 (if_then_else
24252 (match_test "<P:MODE>mode == DImode")
24253 (const_string "0")
24254 (const_string "*")))
24255 (set_attr "prefix_rep" "1")])
24256
24257 ;; Peephole optimizations to clean up after cmpstrn*. This should be
24258 ;; handled in combine, but it is not currently up to the task.
24259 ;; When used for their truth value, the cmpstrn* expanders generate
24260 ;; code like this:
24261 ;;
24262 ;; repz cmpsb
24263 ;; seta %al
24264 ;; setb %dl
24265 ;; cmpb %al, %dl
24266 ;; jcc label
24267 ;;
24268 ;; The intermediate three instructions are unnecessary.
24269
24270 ;; This one handles cmpstrn*_nz_1...
24271 (define_peephole2
24272 [(parallel[
24273 (set (reg:CC FLAGS_REG)
24274 (compare:CC (mem:BLK (match_operand 4 "register_operand"))
24275 (mem:BLK (match_operand 5 "register_operand"))))
24276 (use (match_operand 6 "register_operand"))
24277 (use (match_operand:SI 3 "immediate_operand"))
24278 (clobber (match_operand 0 "register_operand"))
24279 (clobber (match_operand 1 "register_operand"))
24280 (clobber (match_operand 2 "register_operand"))])
24281 (set (match_operand:QI 7 "register_operand")
24282 (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
24283 (set (match_operand:QI 8 "register_operand")
24284 (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
24285 (set (reg FLAGS_REG)
24286 (compare (match_dup 7) (match_dup 8)))
24287 ]
24288 "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
24289 [(parallel[
24290 (set (reg:CC FLAGS_REG)
24291 (compare:CC (mem:BLK (match_dup 4))
24292 (mem:BLK (match_dup 5))))
24293 (use (match_dup 6))
24294 (use (match_dup 3))
24295 (clobber (match_dup 0))
24296 (clobber (match_dup 1))
24297 (clobber (match_dup 2))])])
24298
24299 ;; ...and this one handles cmpstrn*_1.
24300 (define_peephole2
24301 [(parallel[
24302 (set (reg:CC FLAGS_REG)
24303 (if_then_else:CC (ne (match_operand 6 "register_operand")
24304 (const_int 0))
24305 (compare:CC (mem:BLK (match_operand 4 "register_operand"))
24306 (mem:BLK (match_operand 5 "register_operand")))
24307 (reg:CC FLAGS_REG)))
24308 (use (match_operand:SI 3 "immediate_operand"))
24309 (clobber (match_operand 0 "register_operand"))
24310 (clobber (match_operand 1 "register_operand"))
24311 (clobber (match_operand 2 "register_operand"))])
24312 (set (match_operand:QI 7 "register_operand")
24313 (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
24314 (set (match_operand:QI 8 "register_operand")
24315 (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
24316 (set (reg FLAGS_REG)
24317 (compare (match_dup 7) (match_dup 8)))
24318 ]
24319 "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
24320 [(parallel[
24321 (set (reg:CC FLAGS_REG)
24322 (if_then_else:CC (ne (match_dup 6)
24323 (const_int 0))
24324 (compare:CC (mem:BLK (match_dup 4))
24325 (mem:BLK (match_dup 5)))
24326 (reg:CC FLAGS_REG)))
24327 (use (match_dup 3))
24328 (clobber (match_dup 0))
24329 (clobber (match_dup 1))
24330 (clobber (match_dup 2))])])
24331 \f
24332 ;; Conditional move instructions.
24333
24334 (define_expand "mov<mode>cc"
24335 [(set (match_operand:SWIM 0 "register_operand")
24336 (if_then_else:SWIM (match_operand 1 "comparison_operator")
24337 (match_operand:SWIM 2 "<general_operand>")
24338 (match_operand:SWIM 3 "<general_operand>")))]
24339 ""
24340 "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
24341
24342 ;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
24343 ;; the register first winds up with `sbbl $0,reg', which is also weird.
24344 ;; So just document what we're doing explicitly.
24345
24346 (define_expand "x86_mov<mode>cc_0_m1"
24347 [(parallel
24348 [(set (match_operand:SWI48 0 "register_operand")
24349 (if_then_else:SWI48
24350 (match_operator:SWI48 2 "ix86_carry_flag_operator"
24351 [(match_operand 1 "flags_reg_operand")
24352 (const_int 0)])
24353 (const_int -1)
24354 (const_int 0)))
24355 (clobber (reg:CC FLAGS_REG))])])
24356
24357 (define_insn "*x86_mov<mode>cc_0_m1"
24358 [(set (match_operand:SWI48 0 "register_operand" "=r")
24359 (if_then_else:SWI48 (match_operator 1 "ix86_carry_flag_operator"
24360 [(reg FLAGS_REG) (const_int 0)])
24361 (const_int -1)
24362 (const_int 0)))
24363 (clobber (reg:CC FLAGS_REG))]
24364 ""
24365 "sbb{<imodesuffix>}\t%0, %0"
24366 [(set_attr "type" "alu1")
24367 (set_attr "use_carry" "1")
24368 (set_attr "pent_pair" "pu")
24369 (set_attr "mode" "<MODE>")
24370 (set_attr "length_immediate" "0")])
24371
24372 (define_insn "*x86_mov<mode>cc_0_m1_se"
24373 [(set (match_operand:SWI48 0 "register_operand" "=r")
24374 (sign_extract:SWI48 (match_operator 1 "ix86_carry_flag_operator"
24375 [(reg FLAGS_REG) (const_int 0)])
24376 (const_int 1)
24377 (const_int 0)))
24378 (clobber (reg:CC FLAGS_REG))]
24379 ""
24380 "sbb{<imodesuffix>}\t%0, %0"
24381 [(set_attr "type" "alu1")
24382 (set_attr "use_carry" "1")
24383 (set_attr "pent_pair" "pu")
24384 (set_attr "mode" "<MODE>")
24385 (set_attr "length_immediate" "0")])
24386
24387 (define_insn "*x86_mov<mode>cc_0_m1_neg"
24388 [(set (match_operand:SWI 0 "register_operand" "=<r>")
24389 (neg:SWI (match_operator 1 "ix86_carry_flag_operator"
24390 [(reg FLAGS_REG) (const_int 0)])))
24391 (clobber (reg:CC FLAGS_REG))]
24392 ""
24393 "sbb{<imodesuffix>}\t%0, %0"
24394 [(set_attr "type" "alu1")
24395 (set_attr "use_carry" "1")
24396 (set_attr "pent_pair" "pu")
24397 (set_attr "mode" "<MODE>")
24398 (set_attr "length_immediate" "0")])
24399
24400 (define_expand "x86_mov<mode>cc_0_m1_neg"
24401 [(parallel
24402 [(set (match_operand:SWI48 0 "register_operand")
24403 (neg:SWI48 (ltu:SWI48 (reg:CCC FLAGS_REG) (const_int 0))))
24404 (clobber (reg:CC FLAGS_REG))])])
24405
24406 (define_split
24407 [(set (match_operand:SWI48 0 "register_operand")
24408 (neg:SWI48
24409 (leu:SWI48
24410 (match_operand 1 "int_nonimmediate_operand")
24411 (match_operand 2 "const_int_operand"))))]
24412 "x86_64_immediate_operand (operands[2], VOIDmode)
24413 && INTVAL (operands[2]) != -1
24414 && INTVAL (operands[2]) != 2147483647"
24415 [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
24416 (set (match_dup 0)
24417 (neg:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))))]
24418 "operands[2] = GEN_INT (INTVAL (operands[2]) + 1);")
24419
24420 (define_split
24421 [(set (match_operand:SWI 0 "register_operand")
24422 (neg:SWI
24423 (eq:SWI
24424 (match_operand 1 "int_nonimmediate_operand")
24425 (const_int 0))))]
24426 ""
24427 [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (const_int 1)))
24428 (set (match_dup 0)
24429 (neg:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))))])
24430
24431 (define_split
24432 [(set (match_operand:SWI 0 "register_operand")
24433 (neg:SWI
24434 (ne:SWI
24435 (match_operand 1 "int_nonimmediate_operand")
24436 (const_int 0))))]
24437 ""
24438 [(set (reg:CCC FLAGS_REG)
24439 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
24440 (set (match_dup 0)
24441 (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0))))])
24442
24443 (define_insn "*mov<mode>cc_noc"
24444 [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r")
24445 (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
24446 [(reg FLAGS_REG) (const_int 0)])
24447 (match_operand:SWI248 2 "nonimmediate_operand" "rm,0,rm,r")
24448 (match_operand:SWI248 3 "nonimmediate_operand" "0,rm,r,rm")))]
24449 "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
24450 "@
24451 cmov%O2%C1\t{%2, %0|%0, %2}
24452 cmov%O2%c1\t{%3, %0|%0, %3}
24453 cmov%O2%C1\t{%2, %3, %0|%0, %3, %2}
24454 cmov%O2%c1\t{%3, %2, %0|%0, %2, %3}"
24455 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
24456 (set_attr "type" "icmov")
24457 (set_attr "mode" "<MODE>")])
24458
24459 (define_insn "*movsicc_noc_zext"
24460 [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
24461 (if_then_else:DI (match_operator 1 "ix86_comparison_operator"
24462 [(reg FLAGS_REG) (const_int 0)])
24463 (zero_extend:DI
24464 (match_operand:SI 2 "nonimmediate_operand" "rm,0,rm,r"))
24465 (zero_extend:DI
24466 (match_operand:SI 3 "nonimmediate_operand" "0,rm,r,rm"))))]
24467 "TARGET_64BIT
24468 && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
24469 "@
24470 cmov%O2%C1\t{%2, %k0|%k0, %2}
24471 cmov%O2%c1\t{%3, %k0|%k0, %3}
24472 cmov%O2%C1\t{%2, %3, %k0|%k0, %3, %2}
24473 cmov%O2%c1\t{%3, %2, %k0|%k0, %2, %3}"
24474 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
24475 (set_attr "type" "icmov")
24476 (set_attr "mode" "SI")])
24477
24478 (define_insn "*movsicc_noc_zext_1"
24479 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r")
24480 (zero_extend:DI
24481 (if_then_else:SI (match_operator 1 "ix86_comparison_operator"
24482 [(reg FLAGS_REG) (const_int 0)])
24483 (match_operand:SI 2 "nonimmediate_operand" "rm,0,rm,r")
24484 (match_operand:SI 3 "nonimmediate_operand" "0,rm,r,rm"))))]
24485 "TARGET_64BIT
24486 && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
24487 "@
24488 cmov%O2%C1\t{%2, %k0|%k0, %2}
24489 cmov%O2%c1\t{%3, %k0|%k0, %3}
24490 cmov%O2%C1\t{%2, %3, %k0|%k0, %3, %2}
24491 cmov%O2%c1\t{%3, %2, %k0|%k0, %2, %3}"
24492 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
24493 (set_attr "type" "icmov")
24494 (set_attr "mode" "SI")])
24495
24496
24497 ;; Don't do conditional moves with memory inputs. This splitter helps
24498 ;; register starved x86_32 by forcing inputs into registers before reload.
24499 (define_split
24500 [(set (match_operand:SWI248 0 "register_operand")
24501 (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
24502 [(reg FLAGS_REG) (const_int 0)])
24503 (match_operand:SWI248 2 "nonimmediate_operand")
24504 (match_operand:SWI248 3 "nonimmediate_operand")))]
24505 "!TARGET_64BIT && TARGET_CMOVE
24506 && TARGET_AVOID_MEM_OPND_FOR_CMOVE
24507 && (MEM_P (operands[2]) || MEM_P (operands[3]))
24508 && can_create_pseudo_p ()
24509 && optimize_insn_for_speed_p ()"
24510 [(set (match_dup 0)
24511 (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))]
24512 {
24513 operands[2] = force_reg (<MODE>mode, operands[2]);
24514 operands[3] = force_reg (<MODE>mode, operands[3]);
24515 })
24516
24517 (define_insn "*movqicc_noc"
24518 [(set (match_operand:QI 0 "register_operand" "=r,r,r")
24519 (if_then_else:QI (match_operator 1 "ix86_comparison_operator"
24520 [(reg FLAGS_REG) (const_int 0)])
24521 (match_operand:QI 2 "register_operand" "r,0,r")
24522 (match_operand:QI 3 "register_operand" "0,r,r")))]
24523 "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
24524 "#"
24525 [(set_attr "isa" "*,*,apx_ndd")
24526 (set_attr "type" "icmov")
24527 (set_attr "mode" "QI")])
24528
24529 (define_split
24530 [(set (match_operand:SWI12 0 "register_operand")
24531 (if_then_else:SWI12 (match_operator 1 "ix86_comparison_operator"
24532 [(reg FLAGS_REG) (const_int 0)])
24533 (match_operand:SWI12 2 "register_operand")
24534 (match_operand:SWI12 3 "register_operand")))]
24535 "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL
24536 && reload_completed"
24537 [(set (match_dup 0)
24538 (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
24539 {
24540 operands[0] = gen_lowpart (SImode, operands[0]);
24541 operands[2] = gen_lowpart (SImode, operands[2]);
24542 operands[3] = gen_lowpart (SImode, operands[3]);
24543 })
24544
24545 ;; Don't do conditional moves with memory inputs
24546 (define_peephole2
24547 [(match_scratch:SWI248 4 "r")
24548 (set (match_operand:SWI248 0 "register_operand")
24549 (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
24550 [(reg FLAGS_REG) (const_int 0)])
24551 (match_operand:SWI248 2 "nonimmediate_operand")
24552 (match_operand:SWI248 3 "nonimmediate_operand")))]
24553 "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
24554 && (MEM_P (operands[2]) || MEM_P (operands[3]))
24555 && optimize_insn_for_speed_p ()"
24556 [(set (match_dup 4) (match_dup 5))
24557 (set (match_dup 0)
24558 (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))]
24559 {
24560 if (MEM_P (operands[2]))
24561 {
24562 operands[5] = operands[2];
24563 operands[2] = operands[4];
24564 }
24565 else if (MEM_P (operands[3]))
24566 {
24567 operands[5] = operands[3];
24568 operands[3] = operands[4];
24569 }
24570 else
24571 gcc_unreachable ();
24572 })
24573
24574 (define_peephole2
24575 [(match_scratch:SI 4 "r")
24576 (set (match_operand:DI 0 "register_operand")
24577 (if_then_else:DI (match_operator 1 "ix86_comparison_operator"
24578 [(reg FLAGS_REG) (const_int 0)])
24579 (zero_extend:DI
24580 (match_operand:SI 2 "nonimmediate_operand"))
24581 (zero_extend:DI
24582 (match_operand:SI 3 "nonimmediate_operand"))))]
24583 "TARGET_64BIT
24584 && TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
24585 && (MEM_P (operands[2]) || MEM_P (operands[3]))
24586 && optimize_insn_for_speed_p ()"
24587 [(set (match_dup 4) (match_dup 5))
24588 (set (match_dup 0)
24589 (if_then_else:DI (match_dup 1)
24590 (zero_extend:DI (match_dup 2))
24591 (zero_extend:DI (match_dup 3))))]
24592 {
24593 if (MEM_P (operands[2]))
24594 {
24595 operands[5] = operands[2];
24596 operands[2] = operands[4];
24597 }
24598 else if (MEM_P (operands[3]))
24599 {
24600 operands[5] = operands[3];
24601 operands[3] = operands[4];
24602 }
24603 else
24604 gcc_unreachable ();
24605 })
24606
24607 ;; Eliminate a reg-reg mov by inverting the condition of a cmov (#1).
24608 ;; mov r0,r1; dec r0; mov r2,r3; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1
24609 (define_peephole2
24610 [(set (match_operand:SWI248 0 "general_reg_operand")
24611 (match_operand:SWI248 1 "general_reg_operand"))
24612 (parallel [(set (reg FLAGS_REG) (match_operand 5))
24613 (set (match_dup 0) (match_operand:SWI248 6))])
24614 (set (match_operand:SWI248 2 "general_reg_operand")
24615 (match_operand:SWI248 3 "general_gr_operand"))
24616 (set (match_dup 0)
24617 (if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator"
24618 [(reg FLAGS_REG) (const_int 0)])
24619 (match_dup 0)
24620 (match_dup 2)))]
24621 "TARGET_CMOVE
24622 && REGNO (operands[2]) != REGNO (operands[0])
24623 && REGNO (operands[2]) != REGNO (operands[1])
24624 && peep2_reg_dead_p (1, operands[1])
24625 && peep2_reg_dead_p (4, operands[2])
24626 && !reg_overlap_mentioned_p (operands[0], operands[3])"
24627 [(parallel [(set (match_dup 7) (match_dup 8))
24628 (set (match_dup 1) (match_dup 9))])
24629 (set (match_dup 0) (match_dup 3))
24630 (set (match_dup 0) (if_then_else:SWI248 (match_dup 4)
24631 (match_dup 1)
24632 (match_dup 0)))]
24633 {
24634 operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
24635 operands[8]
24636 = ix86_replace_reg_with_reg (operands[5], operands[0], operands[1]);
24637 operands[9]
24638 = ix86_replace_reg_with_reg (operands[6], operands[0], operands[1]);
24639 })
24640
24641 ;; Eliminate a reg-reg mov by inverting the condition of a cmov (#2).
24642 ;; mov r2,r3; mov r0,r1; dec r0; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1
24643 (define_peephole2
24644 [(set (match_operand:SWI248 2 "general_reg_operand")
24645 (match_operand:SWI248 3 "general_gr_operand"))
24646 (set (match_operand:SWI248 0 "general_reg_operand")
24647 (match_operand:SWI248 1 "general_reg_operand"))
24648 (parallel [(set (reg FLAGS_REG) (match_operand 5))
24649 (set (match_dup 0) (match_operand:SWI248 6))])
24650 (set (match_dup 0)
24651 (if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator"
24652 [(reg FLAGS_REG) (const_int 0)])
24653 (match_dup 0)
24654 (match_dup 2)))]
24655 "TARGET_CMOVE
24656 && REGNO (operands[2]) != REGNO (operands[0])
24657 && REGNO (operands[2]) != REGNO (operands[1])
24658 && peep2_reg_dead_p (2, operands[1])
24659 && peep2_reg_dead_p (4, operands[2])
24660 && !reg_overlap_mentioned_p (operands[0], operands[3])
24661 && !reg_mentioned_p (operands[2], operands[6])"
24662 [(parallel [(set (match_dup 7) (match_dup 8))
24663 (set (match_dup 1) (match_dup 9))])
24664 (set (match_dup 0) (match_dup 3))
24665 (set (match_dup 0) (if_then_else:SWI248 (match_dup 4)
24666 (match_dup 1)
24667 (match_dup 0)))]
24668 {
24669 operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (2)), 0, 0));
24670 operands[8]
24671 = ix86_replace_reg_with_reg (operands[5], operands[0], operands[1]);
24672 operands[9]
24673 = ix86_replace_reg_with_reg (operands[6], operands[0], operands[1]);
24674 })
24675
24676 (define_insn "movhf_mask"
24677 [(set (match_operand:HF 0 "nonimmediate_operand" "=v,m,v")
24678 (unspec:HF
24679 [(match_operand:HF 1 "nonimmediate_operand" "m,v,v")
24680 (match_operand:HF 2 "nonimm_or_0_operand" "0C,0C,0C")
24681 (match_operand:QI 3 "register_operand" "Yk,Yk,Yk")]
24682 UNSPEC_MOVCC_MASK))]
24683 "TARGET_AVX512FP16"
24684 "@
24685 vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
24686 vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
24687 vmovsh\t{%d1, %0%{%3%}%N2|%0%{%3%}%N2, %d1}"
24688 [(set_attr "type" "ssemov")
24689 (set_attr "prefix" "evex")
24690 (set_attr "mode" "HF")])
24691
24692 (define_expand "movhfcc"
24693 [(set (match_operand:HF 0 "register_operand")
24694 (if_then_else:HF
24695 (match_operand 1 "comparison_operator")
24696 (match_operand:HF 2 "register_operand")
24697 (match_operand:HF 3 "register_operand")))]
24698 "TARGET_AVX512FP16"
24699 "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
24700
24701 (define_expand "mov<mode>cc"
24702 [(set (match_operand:X87MODEF 0 "register_operand")
24703 (if_then_else:X87MODEF
24704 (match_operand 1 "comparison_operator")
24705 (match_operand:X87MODEF 2 "register_operand")
24706 (match_operand:X87MODEF 3 "register_operand")))]
24707 "(TARGET_80387 && TARGET_CMOVE)
24708 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
24709 "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
24710
24711 (define_insn "*movxfcc_1"
24712 [(set (match_operand:XF 0 "register_operand" "=f,f")
24713 (if_then_else:XF (match_operator 1 "fcmov_comparison_operator"
24714 [(reg FLAGS_REG) (const_int 0)])
24715 (match_operand:XF 2 "register_operand" "f,0")
24716 (match_operand:XF 3 "register_operand" "0,f")))]
24717 "TARGET_80387 && TARGET_CMOVE"
24718 "@
24719 fcmov%F1\t{%2, %0|%0, %2}
24720 fcmov%f1\t{%3, %0|%0, %3}"
24721 [(set_attr "type" "fcmov")
24722 (set_attr "mode" "XF")])
24723
24724 (define_insn "*movdfcc_1"
24725 [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r,r ,r")
24726 (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
24727 [(reg FLAGS_REG) (const_int 0)])
24728 (match_operand:DF 2 "nonimmediate_operand"
24729 "f ,0,rm,0 ,rm,0")
24730 (match_operand:DF 3 "nonimmediate_operand"
24731 "0 ,f,0 ,rm,0, rm")))]
24732 "TARGET_80387 && TARGET_CMOVE
24733 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
24734 "@
24735 fcmov%F1\t{%2, %0|%0, %2}
24736 fcmov%f1\t{%3, %0|%0, %3}
24737 #
24738 #
24739 cmov%O2%C1\t{%2, %0|%0, %2}
24740 cmov%O2%c1\t{%3, %0|%0, %3}"
24741 [(set_attr "isa" "*,*,nox64,nox64,x64,x64")
24742 (set_attr "type" "fcmov,fcmov,multi,multi,icmov,icmov")
24743 (set_attr "mode" "DF,DF,DI,DI,DI,DI")])
24744
24745 (define_split
24746 [(set (match_operand:DF 0 "general_reg_operand")
24747 (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
24748 [(reg FLAGS_REG) (const_int 0)])
24749 (match_operand:DF 2 "nonimmediate_operand")
24750 (match_operand:DF 3 "nonimmediate_operand")))]
24751 "!TARGET_64BIT && reload_completed"
24752 [(set (match_dup 2)
24753 (if_then_else:SI (match_dup 1) (match_dup 4) (match_dup 5)))
24754 (set (match_dup 3)
24755 (if_then_else:SI (match_dup 1) (match_dup 6) (match_dup 7)))]
24756 {
24757 split_double_mode (DImode, &operands[2], 2, &operands[4], &operands[6]);
24758 split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
24759 })
24760
24761 (define_insn "*movsfcc_1_387"
24762 [(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
24763 (if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
24764 [(reg FLAGS_REG) (const_int 0)])
24765 (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0")
24766 (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))]
24767 "TARGET_80387 && TARGET_CMOVE
24768 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
24769 "@
24770 fcmov%F1\t{%2, %0|%0, %2}
24771 fcmov%f1\t{%3, %0|%0, %3}
24772 cmov%O2%C1\t{%2, %0|%0, %2}
24773 cmov%O2%c1\t{%3, %0|%0, %3}"
24774 [(set_attr "type" "fcmov,fcmov,icmov,icmov")
24775 (set_attr "mode" "SF,SF,SI,SI")])
24776
24777 ;; Don't do conditional moves with memory inputs. This splitter helps
24778 ;; register starved x86_32 by forcing inputs into registers before reload.
24779 (define_split
24780 [(set (match_operand:MODEF 0 "register_operand")
24781 (if_then_else:MODEF (match_operator 1 "ix86_comparison_operator"
24782 [(reg FLAGS_REG) (const_int 0)])
24783 (match_operand:MODEF 2 "nonimmediate_operand")
24784 (match_operand:MODEF 3 "nonimmediate_operand")))]
24785 "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
24786 && TARGET_AVOID_MEM_OPND_FOR_CMOVE
24787 && (MEM_P (operands[2]) || MEM_P (operands[3]))
24788 && can_create_pseudo_p ()
24789 && optimize_insn_for_speed_p ()"
24790 [(set (match_dup 0)
24791 (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))]
24792 {
24793 operands[2] = force_reg (<MODE>mode, operands[2]);
24794 operands[3] = force_reg (<MODE>mode, operands[3]);
24795 })
24796
24797 ;; Don't do conditional moves with memory inputs
24798 (define_peephole2
24799 [(match_scratch:MODEF 4 "r")
24800 (set (match_operand:MODEF 0 "general_reg_operand")
24801 (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator"
24802 [(reg FLAGS_REG) (const_int 0)])
24803 (match_operand:MODEF 2 "nonimmediate_operand")
24804 (match_operand:MODEF 3 "nonimmediate_operand")))]
24805 "(<MODE>mode != DFmode || TARGET_64BIT)
24806 && TARGET_80387 && TARGET_CMOVE
24807 && TARGET_AVOID_MEM_OPND_FOR_CMOVE
24808 && (MEM_P (operands[2]) || MEM_P (operands[3]))
24809 && optimize_insn_for_speed_p ()"
24810 [(set (match_dup 4) (match_dup 5))
24811 (set (match_dup 0)
24812 (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))]
24813 {
24814 if (MEM_P (operands[2]))
24815 {
24816 operands[5] = operands[2];
24817 operands[2] = operands[4];
24818 }
24819 else if (MEM_P (operands[3]))
24820 {
24821 operands[5] = operands[3];
24822 operands[3] = operands[4];
24823 }
24824 else
24825 gcc_unreachable ();
24826 })
24827
24828 ;; All moves in XOP pcmov instructions are 128 bits and hence we restrict
24829 ;; the scalar versions to have only XMM registers as operands.
24830
24831 ;; XOP conditional move
24832 (define_insn "*xop_pcmov_<mode>"
24833 [(set (match_operand:MODEF 0 "register_operand" "=x")
24834 (if_then_else:MODEF
24835 (match_operand:MODEF 1 "register_operand" "x")
24836 (match_operand:MODEF 2 "register_operand" "x")
24837 (match_operand:MODEF 3 "register_operand" "x")))]
24838 "TARGET_XOP"
24839 "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
24840 [(set_attr "type" "sse4arg")
24841 (set_attr "mode" "TI")])
24842
24843 ;; These versions of the min/max patterns are intentionally ignorant of
24844 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
24845 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
24846 ;; are undefined in this condition, we're certain this is correct.
24847
24848 (define_insn "<code><mode>3"
24849 [(set (match_operand:MODEF 0 "register_operand" "=x,v")
24850 (smaxmin:MODEF
24851 (match_operand:MODEF 1 "nonimmediate_operand" "%0,v")
24852 (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")))]
24853 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
24854 "@
24855 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
24856 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
24857 [(set_attr "isa" "noavx,avx")
24858 (set_attr "prefix" "orig,vex")
24859 (set_attr "type" "sseadd")
24860 (set_attr "mode" "<MODE>")])
24861
24862 (define_insn "<code>hf3"
24863 [(set (match_operand:HF 0 "register_operand" "=v")
24864 (smaxmin:HF
24865 (match_operand:HF 1 "nonimmediate_operand" "%v")
24866 (match_operand:HF 2 "nonimmediate_operand" "vm")))]
24867 "TARGET_AVX512FP16"
24868 "v<maxmin_float>sh\t{%2, %1, %0|%0, %1, %2}"
24869 [(set_attr "prefix" "evex")
24870 (set_attr "type" "sseadd")
24871 (set_attr "mode" "HF")])
24872
24873 ;; These versions of the min/max patterns implement exactly the operations
24874 ;; min = (op1 < op2 ? op1 : op2)
24875 ;; max = (!(op1 < op2) ? op1 : op2)
24876 ;; Their operands are not commutative, and thus they may be used in the
24877 ;; presence of -0.0 and NaN.
24878
24879 (define_insn "*ieee_s<ieee_maxmin>hf3"
24880 [(set (match_operand:HF 0 "register_operand" "=v")
24881 (unspec:HF
24882 [(match_operand:HF 1 "register_operand" "v")
24883 (match_operand:HF 2 "nonimmediate_operand" "vm")]
24884 IEEE_MAXMIN))]
24885 "TARGET_AVX512FP16"
24886 "v<ieee_maxmin>sh\t{%2, %1, %0|%0, %1, %2}"
24887 [(set_attr "prefix" "evex")
24888 (set_attr "type" "sseadd")
24889 (set_attr "mode" "HF")])
24890
24891 (define_insn "*ieee_s<ieee_maxmin><mode>3"
24892 [(set (match_operand:MODEF 0 "register_operand" "=x,v")
24893 (unspec:MODEF
24894 [(match_operand:MODEF 1 "register_operand" "0,v")
24895 (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")]
24896 IEEE_MAXMIN))]
24897 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
24898 "@
24899 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
24900 v<ieee_maxmin><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
24901 [(set_attr "isa" "noavx,avx")
24902 (set_attr "prefix" "orig,maybe_evex")
24903 (set_attr "type" "sseadd")
24904 (set_attr "mode" "<MODE>")])
24905
24906 ;; Operands order in min/max instruction matters for signed zero and NANs.
24907 (define_insn_and_split "*ieee_max<mode>3_1"
24908 [(set (match_operand:MODEF 0 "register_operand")
24909 (unspec:MODEF
24910 [(match_operand:MODEF 1 "register_operand")
24911 (match_operand:MODEF 2 "register_operand")
24912 (lt:MODEF
24913 (match_operand:MODEF 3 "register_operand")
24914 (match_operand:MODEF 4 "register_operand"))]
24915 UNSPEC_BLENDV))]
24916 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
24917 && (rtx_equal_p (operands[1], operands[3])
24918 && rtx_equal_p (operands[2], operands[4]))
24919 && ix86_pre_reload_split ()"
24920 "#"
24921 "&& 1"
24922 [(set (match_dup 0)
24923 (unspec:MODEF
24924 [(match_dup 2)
24925 (match_dup 1)]
24926 UNSPEC_IEEE_MAX))])
24927
24928 (define_insn_and_split "*ieee_min<mode>3_1"
24929 [(set (match_operand:MODEF 0 "register_operand")
24930 (unspec:MODEF
24931 [(match_operand:MODEF 1 "register_operand")
24932 (match_operand:MODEF 2 "register_operand")
24933 (lt:MODEF
24934 (match_operand:MODEF 3 "register_operand")
24935 (match_operand:MODEF 4 "register_operand"))]
24936 UNSPEC_BLENDV))]
24937 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
24938 && (rtx_equal_p (operands[1], operands[4])
24939 && rtx_equal_p (operands[2], operands[3]))
24940 && ix86_pre_reload_split ()"
24941 "#"
24942 "&& 1"
24943 [(set (match_dup 0)
24944 (unspec:MODEF
24945 [(match_dup 2)
24946 (match_dup 1)]
24947 UNSPEC_IEEE_MIN))])
24948
24949 ;; Make two stack loads independent:
24950 ;; fld aa fld aa
24951 ;; fld %st(0) -> fld bb
24952 ;; fmul bb fmul %st(1), %st
24953 ;;
24954 ;; Actually we only match the last two instructions for simplicity.
24955
24956 (define_peephole2
24957 [(set (match_operand 0 "fp_register_operand")
24958 (match_operand 1 "fp_register_operand"))
24959 (set (match_dup 0)
24960 (match_operator 2 "binary_fp_operator"
24961 [(match_dup 0)
24962 (match_operand 3 "memory_operand")]))]
24963 "REGNO (operands[0]) != REGNO (operands[1])"
24964 [(set (match_dup 0) (match_dup 3))
24965 (set (match_dup 0)
24966 (match_op_dup 2
24967 [(match_dup 5) (match_dup 4)]))]
24968 {
24969 operands[4] = operands[0];
24970 operands[5] = operands[1];
24971
24972 /* The % modifier is not operational anymore in peephole2's, so we have to
24973 swap the operands manually in the case of addition and multiplication. */
24974 if (COMMUTATIVE_ARITH_P (operands[2]))
24975 std::swap (operands[4], operands[5]);
24976 })
24977
24978 (define_peephole2
24979 [(set (match_operand 0 "fp_register_operand")
24980 (match_operand 1 "fp_register_operand"))
24981 (set (match_dup 0)
24982 (match_operator 2 "binary_fp_operator"
24983 [(match_operand 3 "memory_operand")
24984 (match_dup 0)]))]
24985 "REGNO (operands[0]) != REGNO (operands[1])"
24986 [(set (match_dup 0) (match_dup 3))
24987 (set (match_dup 0)
24988 (match_op_dup 2
24989 [(match_dup 4) (match_dup 5)]))]
24990 {
24991 operands[4] = operands[0];
24992 operands[5] = operands[1];
24993
24994 /* The % modifier is not operational anymore in peephole2's, so we have to
24995 swap the operands manually in the case of addition and multiplication. */
24996 if (COMMUTATIVE_ARITH_P (operands[2]))
24997 std::swap (operands[4], operands[5]);
24998 })
24999
25000 ;; Conditional addition patterns
25001 (define_expand "add<mode>cc"
25002 [(match_operand:SWI 0 "register_operand")
25003 (match_operand 1 "ordered_comparison_operator")
25004 (match_operand:SWI 2 "register_operand")
25005 (match_operand:SWI 3 "const_int_operand")]
25006 ""
25007 "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
25008
25009 ;; min/max patterns
25010
25011 (define_code_attr maxmin_rel
25012 [(smax "GE") (smin "LE") (umax "GEU") (umin "LEU")])
25013
25014 (define_expand "<code><mode>3"
25015 [(parallel
25016 [(set (match_operand:SDWIM 0 "register_operand")
25017 (maxmin:SDWIM
25018 (match_operand:SDWIM 1 "register_operand")
25019 (match_operand:SDWIM 2 "general_operand")))
25020 (clobber (reg:CC FLAGS_REG))])]
25021 "TARGET_CMOVE
25022 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)")
25023
25024 (define_insn_and_split "*<code><dwi>3_doubleword"
25025 [(set (match_operand:<DWI> 0 "register_operand")
25026 (maxmin:<DWI>
25027 (match_operand:<DWI> 1 "register_operand")
25028 (match_operand:<DWI> 2 "general_operand")))
25029 (clobber (reg:CC FLAGS_REG))]
25030 "TARGET_CMOVE
25031 && ix86_pre_reload_split ()"
25032 "#"
25033 "&& 1"
25034 [(set (match_dup 0)
25035 (if_then_else:DWIH (match_dup 6)
25036 (match_dup 1)
25037 (match_dup 2)))
25038 (set (match_dup 3)
25039 (if_then_else:DWIH (match_dup 6)
25040 (match_dup 4)
25041 (match_dup 5)))]
25042 {
25043 operands[2] = force_reg (<DWI>mode, operands[2]);
25044
25045 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
25046
25047 rtx cmplo[2] = { operands[1], operands[2] };
25048 rtx cmphi[2] = { operands[4], operands[5] };
25049
25050 enum rtx_code code = <maxmin_rel>;
25051
25052 switch (code)
25053 {
25054 case LE: case LEU:
25055 std::swap (cmplo[0], cmplo[1]);
25056 std::swap (cmphi[0], cmphi[1]);
25057 code = swap_condition (code);
25058 /* FALLTHRU */
25059
25060 case GE: case GEU:
25061 {
25062 bool uns = (code == GEU);
25063 rtx (*sbb_insn) (machine_mode, rtx, rtx, rtx)
25064 = uns ? gen_sub3_carry_ccc : gen_sub3_carry_ccgz;
25065
25066 emit_insn (gen_cmp_1 (<MODE>mode, cmplo[0], cmplo[1]));
25067
25068 rtx tmp = gen_rtx_SCRATCH (<MODE>mode);
25069 emit_insn (sbb_insn (<MODE>mode, tmp, cmphi[0], cmphi[1]));
25070
25071 rtx flags = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG);
25072 operands[6] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
25073
25074 break;
25075 }
25076
25077 default:
25078 gcc_unreachable ();
25079 }
25080 })
25081
25082 (define_insn_and_split "*<code><mode>3_1"
25083 [(set (match_operand:SWI 0 "register_operand")
25084 (maxmin:SWI
25085 (match_operand:SWI 1 "register_operand")
25086 (match_operand:SWI 2 "general_operand")))
25087 (clobber (reg:CC FLAGS_REG))]
25088 "TARGET_CMOVE
25089 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)
25090 && ix86_pre_reload_split ()"
25091 "#"
25092 "&& 1"
25093 [(set (match_dup 0)
25094 (if_then_else:SWI (match_dup 3)
25095 (match_dup 1)
25096 (match_dup 2)))]
25097 {
25098 machine_mode mode = <MODE>mode;
25099 rtx cmp_op = operands[2];
25100
25101 operands[2] = force_reg (mode, cmp_op);
25102
25103 enum rtx_code code = <maxmin_rel>;
25104
25105 if (cmp_op == const1_rtx)
25106 {
25107 /* Convert smax (x, 1) into (x > 0 ? x : 1).
25108 Convert umax (x, 1) into (x != 0 ? x : 1).
25109 Convert ?min (x, 1) into (x <= 0 ? x : 1). */
25110 cmp_op = const0_rtx;
25111 if (code == GE)
25112 code = GT;
25113 else if (code == GEU)
25114 code = NE;
25115 }
25116 /* Convert smin (x, -1) into (x < 0 ? x : -1). */
25117 else if (cmp_op == constm1_rtx && code == LE)
25118 {
25119 cmp_op = const0_rtx;
25120 code = LT;
25121 }
25122 /* Convert smax (x, -1) into (x >= 0 ? x : -1). */
25123 else if (cmp_op == constm1_rtx && code == GE)
25124 cmp_op = const0_rtx;
25125 else if (cmp_op != const0_rtx)
25126 cmp_op = operands[2];
25127
25128 machine_mode cmpmode = SELECT_CC_MODE (code, operands[1], cmp_op);
25129 rtx flags = gen_rtx_REG (cmpmode, FLAGS_REG);
25130
25131 rtx tmp = gen_rtx_COMPARE (cmpmode, operands[1], cmp_op);
25132 emit_insn (gen_rtx_SET (flags, tmp));
25133
25134 operands[3] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
25135 })
25136
25137 ;; Avoid clearing a register between a flags setting comparison and its use,
25138 ;; i.e. prefer "xorl %eax,%eax; test/cmp" over "test/cmp; movl $0, %eax".
25139 (define_peephole2
25140 [(set (reg FLAGS_REG) (match_operand 0))
25141 (set (match_operand:SWI 1 "general_reg_operand") (const_int 0))]
25142 "peep2_regno_dead_p (0, FLAGS_REG)
25143 && !reg_overlap_mentioned_p (operands[1], operands[0])"
25144 [(set (match_dup 2) (match_dup 0))]
25145 {
25146 operands[2] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
25147 ix86_expand_clear (operands[1]);
25148 })
25149
25150 ;; When optimizing for size, zeroing memory should use a register.
25151 (define_peephole2
25152 [(match_scratch:SWI48 0 "r")
25153 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
25154 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))
25155 (set (match_operand:SWI48 3 "memory_operand") (const_int 0))
25156 (set (match_operand:SWI48 4 "memory_operand") (const_int 0))]
25157 "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
25158 [(const_int 0)]
25159 {
25160 ix86_expand_clear (operands[0]);
25161 emit_move_insn (operands[1], operands[0]);
25162 emit_move_insn (operands[2], operands[0]);
25163 emit_move_insn (operands[3], operands[0]);
25164 ix86_last_zero_store_uid
25165 = INSN_UID (emit_move_insn (operands[4], operands[0]));
25166 DONE;
25167 })
25168
25169 (define_peephole2
25170 [(match_scratch:SWI48 0 "r")
25171 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
25172 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))]
25173 "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
25174 [(const_int 0)]
25175 {
25176 ix86_expand_clear (operands[0]);
25177 emit_move_insn (operands[1], operands[0]);
25178 ix86_last_zero_store_uid
25179 = INSN_UID (emit_move_insn (operands[2], operands[0]));
25180 DONE;
25181 })
25182
25183 (define_peephole2
25184 [(match_scratch:SWI48 0 "r")
25185 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))]
25186 "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
25187 [(const_int 0)]
25188 {
25189 ix86_expand_clear (operands[0]);
25190 ix86_last_zero_store_uid
25191 = INSN_UID (emit_move_insn (operands[1], operands[0]));
25192 DONE;
25193 })
25194
25195 (define_peephole2
25196 [(set (match_operand:SWI48 5 "memory_operand")
25197 (match_operand:SWI48 0 "general_reg_operand"))
25198 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
25199 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))
25200 (set (match_operand:SWI48 3 "memory_operand") (const_int 0))
25201 (set (match_operand:SWI48 4 "memory_operand") (const_int 0))]
25202 "optimize_insn_for_size_p ()
25203 && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
25204 [(const_int 0)]
25205 {
25206 emit_move_insn (operands[5], operands[0]);
25207 emit_move_insn (operands[1], operands[0]);
25208 emit_move_insn (operands[2], operands[0]);
25209 emit_move_insn (operands[3], operands[0]);
25210 ix86_last_zero_store_uid
25211 = INSN_UID (emit_move_insn (operands[4], operands[0]));
25212 DONE;
25213 })
25214
25215 (define_peephole2
25216 [(set (match_operand:SWI48 3 "memory_operand")
25217 (match_operand:SWI48 0 "general_reg_operand"))
25218 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
25219 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))]
25220 "optimize_insn_for_size_p ()
25221 && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
25222 [(const_int 0)]
25223 {
25224 emit_move_insn (operands[3], operands[0]);
25225 emit_move_insn (operands[1], operands[0]);
25226 ix86_last_zero_store_uid
25227 = INSN_UID (emit_move_insn (operands[2], operands[0]));
25228 DONE;
25229 })
25230
25231 (define_peephole2
25232 [(set (match_operand:SWI48 2 "memory_operand")
25233 (match_operand:SWI48 0 "general_reg_operand"))
25234 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))]
25235 "optimize_insn_for_size_p ()
25236 && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
25237 [(const_int 0)]
25238 {
25239 emit_move_insn (operands[2], operands[0]);
25240 ix86_last_zero_store_uid
25241 = INSN_UID (emit_move_insn (operands[1], operands[0]));
25242 DONE;
25243 })
25244
25245 ;; Reload dislikes loading constants directly into class_likely_spilled
25246 ;; hard registers. Try to tidy things up here.
25247 (define_peephole2
25248 [(set (match_operand:SWI 0 "general_reg_operand")
25249 (match_operand:SWI 1 "x86_64_general_operand"))
25250 (set (match_operand:SWI 2 "general_reg_operand")
25251 (match_dup 0))]
25252 "peep2_reg_dead_p (2, operands[0])"
25253 [(set (match_dup 2) (match_dup 1))])
25254 \f
25255 ;; Misc patterns (?)
25256
25257 ;; This pattern exists to put a dependency on all ebp-based memory accesses.
25258 ;; Otherwise there will be nothing to keep
25259 ;;
25260 ;; [(set (reg ebp) (reg esp))]
25261 ;; [(set (reg esp) (plus (reg esp) (const_int -160000)))
25262 ;; (clobber (eflags)]
25263 ;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))]
25264 ;;
25265 ;; in proper program order.
25266
25267 (define_insn "@pro_epilogue_adjust_stack_add_<mode>"
25268 [(set (match_operand:P 0 "register_operand" "=r,r")
25269 (plus:P (match_operand:P 1 "register_operand" "0,r")
25270 (match_operand:P 2 "<nonmemory_operand>" "r<i>,l<i>")))
25271 (clobber (reg:CC FLAGS_REG))
25272 (clobber (mem:BLK (scratch)))]
25273 ""
25274 {
25275 switch (get_attr_type (insn))
25276 {
25277 case TYPE_IMOV:
25278 return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
25279
25280 case TYPE_ALU:
25281 gcc_assert (rtx_equal_p (operands[0], operands[1]));
25282 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
25283 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
25284
25285 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
25286
25287 default:
25288 operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
25289 return "lea{<imodesuffix>}\t{%E2, %0|%0, %E2}";
25290 }
25291 }
25292 [(set (attr "type")
25293 (cond [(and (eq_attr "alternative" "0")
25294 (not (match_test "TARGET_OPT_AGU")))
25295 (const_string "alu")
25296 (match_operand:<MODE> 2 "const0_operand")
25297 (const_string "imov")
25298 ]
25299 (const_string "lea")))
25300 (set (attr "length_immediate")
25301 (cond [(eq_attr "type" "imov")
25302 (const_string "0")
25303 (and (eq_attr "type" "alu")
25304 (match_operand 2 "const128_operand"))
25305 (const_string "1")
25306 ]
25307 (const_string "*")))
25308 (set_attr "mode" "<MODE>")])
25309
25310 (define_insn "@pro_epilogue_adjust_stack_sub_<mode>"
25311 [(set (match_operand:P 0 "register_operand" "=r")
25312 (minus:P (match_operand:P 1 "register_operand" "0")
25313 (match_operand:P 2 "register_operand" "r")))
25314 (clobber (reg:CC FLAGS_REG))
25315 (clobber (mem:BLK (scratch)))]
25316 ""
25317 "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
25318 [(set_attr "type" "alu")
25319 (set_attr "mode" "<MODE>")])
25320
25321 (define_insn "@allocate_stack_worker_probe_<mode>"
25322 [(set (match_operand:P 0 "register_operand" "=a")
25323 (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
25324 UNSPECV_STACK_PROBE))
25325 (clobber (reg:CC FLAGS_REG))]
25326 "ix86_target_stack_probe ()"
25327 "call\t___chkstk_ms"
25328 [(set_attr "type" "multi")
25329 (set_attr "length" "5")])
25330
25331 (define_expand "allocate_stack"
25332 [(match_operand 0 "register_operand")
25333 (match_operand 1 "general_operand")]
25334 "ix86_target_stack_probe ()"
25335 {
25336 rtx x;
25337
25338 #ifndef CHECK_STACK_LIMIT
25339 #define CHECK_STACK_LIMIT 0
25340 #endif
25341
25342 if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1])
25343 && INTVAL (operands[1]) < CHECK_STACK_LIMIT)
25344 x = operands[1];
25345 else
25346 {
25347 x = copy_to_mode_reg (Pmode, operands[1]);
25348
25349 emit_insn (gen_allocate_stack_worker_probe (Pmode, x, x));
25350 }
25351
25352 x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, x,
25353 stack_pointer_rtx, 0, OPTAB_DIRECT);
25354
25355 if (x != stack_pointer_rtx)
25356 emit_move_insn (stack_pointer_rtx, x);
25357
25358 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
25359 DONE;
25360 })
25361
25362 (define_expand "probe_stack"
25363 [(match_operand 0 "memory_operand")]
25364 ""
25365 {
25366 emit_insn (gen_probe_stack_1
25367 (word_mode, operands[0], const0_rtx));
25368 DONE;
25369 })
25370
25371 ;; Use OR for stack probes, this is shorter.
25372 (define_insn "@probe_stack_1_<mode>"
25373 [(set (match_operand:W 0 "memory_operand" "=m")
25374 (unspec:W [(match_operand:W 1 "const0_operand")]
25375 UNSPEC_PROBE_STACK))
25376 (clobber (reg:CC FLAGS_REG))]
25377 ""
25378 "or{<imodesuffix>}\t{%1, %0|%0, %1}"
25379 [(set_attr "type" "alu1")
25380 (set_attr "mode" "<MODE>")
25381 (set_attr "length_immediate" "1")])
25382
25383 (define_insn "@adjust_stack_and_probe_<mode>"
25384 [(set (match_operand:P 0 "register_operand" "=r")
25385 (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
25386 UNSPECV_PROBE_STACK_RANGE))
25387 (set (reg:P SP_REG)
25388 (minus:P (reg:P SP_REG) (match_operand:P 2 "const_int_operand")))
25389 (clobber (reg:CC FLAGS_REG))
25390 (clobber (mem:BLK (scratch)))]
25391 ""
25392 "* return output_adjust_stack_and_probe (operands[0]);"
25393 [(set_attr "type" "multi")])
25394
25395 (define_insn "@probe_stack_range_<mode>"
25396 [(set (match_operand:P 0 "register_operand" "=r")
25397 (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
25398 (match_operand:P 2 "const_int_operand")]
25399 UNSPECV_PROBE_STACK_RANGE))
25400 (clobber (reg:CC FLAGS_REG))]
25401 ""
25402 "* return output_probe_stack_range (operands[0], operands[2]);"
25403 [(set_attr "type" "multi")])
25404
25405 (define_expand "builtin_setjmp_receiver"
25406 [(label_ref (match_operand 0))]
25407 "!TARGET_64BIT && flag_pic"
25408 {
25409 #if TARGET_MACHO
25410 if (TARGET_MACHO)
25411 {
25412 rtx xops[3];
25413 rtx_code_label *label_rtx = gen_label_rtx ();
25414 emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx));
25415 xops[0] = xops[1] = pic_offset_table_rtx;
25416 xops[2] = machopic_gen_offset (gen_rtx_LABEL_REF (SImode, label_rtx));
25417 ix86_expand_binary_operator (MINUS, SImode, xops);
25418 }
25419 else
25420 #endif
25421 emit_insn (gen_set_got (pic_offset_table_rtx));
25422 DONE;
25423 })
25424
25425 (define_expand "save_stack_nonlocal"
25426 [(set (match_operand 0 "memory_operand")
25427 (match_operand 1 "register_operand"))]
25428 ""
25429 {
25430 rtx stack_slot;
25431
25432 if (flag_cf_protection & CF_RETURN)
25433 {
25434 /* Copy shadow stack pointer to the first slot
25435 and stack pointer to the second slot. */
25436 rtx ssp_slot = adjust_address (operands[0], word_mode, 0);
25437 stack_slot = adjust_address (operands[0], Pmode, UNITS_PER_WORD);
25438
25439 rtx reg_ssp = force_reg (word_mode, const0_rtx);
25440 emit_insn (gen_rdssp (word_mode, reg_ssp, reg_ssp));
25441 emit_move_insn (ssp_slot, reg_ssp);
25442 }
25443 else
25444 stack_slot = adjust_address (operands[0], Pmode, 0);
25445 emit_move_insn (stack_slot, operands[1]);
25446 DONE;
25447 })
25448
25449 (define_expand "restore_stack_nonlocal"
25450 [(set (match_operand 0 "register_operand" "")
25451 (match_operand 1 "memory_operand" ""))]
25452 ""
25453 {
25454 rtx stack_slot;
25455
25456 if (flag_cf_protection & CF_RETURN)
25457 {
25458 /* Restore shadow stack pointer from the first slot
25459 and stack pointer from the second slot. */
25460 rtx ssp_slot = adjust_address (operands[1], word_mode, 0);
25461 stack_slot = adjust_address (operands[1], Pmode, UNITS_PER_WORD);
25462
25463 /* Get the current shadow stack pointer. The code below will check if
25464 SHSTK feature is enabled. If it is not enabled the RDSSP instruction
25465 is a NOP. */
25466 rtx reg_ssp = force_reg (word_mode, const0_rtx);
25467 emit_insn (gen_rdssp (word_mode, reg_ssp, reg_ssp));
25468
25469 /* Compare through subtraction the saved and the current ssp
25470 to decide if ssp has to be adjusted. */
25471 reg_ssp = expand_simple_binop (word_mode, MINUS,
25472 reg_ssp, ssp_slot,
25473 reg_ssp, 1, OPTAB_DIRECT);
25474
25475 /* Compare and jump over adjustment code. */
25476 rtx noadj_label = gen_label_rtx ();
25477 emit_cmp_and_jump_insns (reg_ssp, const0_rtx, EQ, NULL_RTX,
25478 word_mode, 1, noadj_label);
25479
25480 /* Compute the number of frames to adjust. */
25481 rtx reg_adj = gen_lowpart (ptr_mode, reg_ssp);
25482 rtx reg_adj_neg = expand_simple_unop (ptr_mode, NEG, reg_adj,
25483 NULL_RTX, 1);
25484
25485 reg_adj = expand_simple_binop (ptr_mode, LSHIFTRT, reg_adj_neg,
25486 GEN_INT (exact_log2 (UNITS_PER_WORD)),
25487 reg_adj, 1, OPTAB_DIRECT);
25488
25489 /* Check if number of frames <= 255 so no loop is needed. */
25490 rtx inc_label = gen_label_rtx ();
25491 emit_cmp_and_jump_insns (reg_adj, GEN_INT (255), LEU, NULL_RTX,
25492 ptr_mode, 1, inc_label);
25493
25494 /* Adjust the ssp in a loop. */
25495 rtx loop_label = gen_label_rtx ();
25496 emit_label (loop_label);
25497 LABEL_NUSES (loop_label) = 1;
25498
25499 rtx reg_255 = force_reg (word_mode, GEN_INT (255));
25500 emit_insn (gen_incssp (word_mode, reg_255));
25501
25502 reg_adj = expand_simple_binop (ptr_mode, MINUS,
25503 reg_adj, GEN_INT (255),
25504 reg_adj, 1, OPTAB_DIRECT);
25505
25506 /* Compare and jump to the loop label. */
25507 emit_cmp_and_jump_insns (reg_adj, GEN_INT (255), GTU, NULL_RTX,
25508 ptr_mode, 1, loop_label);
25509
25510 emit_label (inc_label);
25511 LABEL_NUSES (inc_label) = 1;
25512
25513 emit_insn (gen_incssp (word_mode, reg_ssp));
25514
25515 emit_label (noadj_label);
25516 LABEL_NUSES (noadj_label) = 1;
25517 }
25518 else
25519 stack_slot = adjust_address (operands[1], Pmode, 0);
25520 emit_move_insn (operands[0], stack_slot);
25521 DONE;
25522 })
25523
25524 (define_expand "stack_protect_set"
25525 [(match_operand 0 "memory_operand")
25526 (match_operand 1 "memory_operand")]
25527 ""
25528 {
25529 rtx scratch = gen_reg_rtx (word_mode);
25530
25531 emit_insn (gen_stack_protect_set_1
25532 (ptr_mode, word_mode, operands[0], operands[1], scratch));
25533 DONE;
25534 })
25535
25536 (define_insn "@stack_protect_set_1_<PTR:mode>_<W:mode>"
25537 [(set (match_operand:PTR 0 "memory_operand" "=m")
25538 (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")]
25539 UNSPEC_SP_SET))
25540 (set (match_operand:W 2 "register_operand" "=&r") (const_int 0))
25541 (clobber (reg:CC FLAGS_REG))]
25542 ""
25543 {
25544 output_asm_insn ("mov{<PTR:imodesuffix>}\t{%1, %<PTR:k>2|%<PTR:k>2, %1}",
25545 operands);
25546 output_asm_insn ("mov{<PTR:imodesuffix>}\t{%<PTR:k>2, %0|%0, %<PTR:k>2}",
25547 operands);
25548 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
25549 return "xor{l}\t%k2, %k2";
25550 else
25551 return "mov{l}\t{$0, %k2|%k2, 0}";
25552 }
25553 [(set_attr "type" "multi")])
25554
25555 ;; Patterns and peephole2s to optimize stack_protect_set_1_<mode>
25556 ;; immediately followed by *mov{s,d}i_internal, where we can avoid
25557 ;; the xor{l} above. We don't split this, so that scheduling or
25558 ;; anything else doesn't separate the *stack_protect_set* pattern from
25559 ;; the set of the register that overwrites the register with a new value.
25560
25561 (define_peephole2
25562 [(parallel [(set (match_operand:PTR 0 "memory_operand")
25563 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
25564 UNSPEC_SP_SET))
25565 (set (match_operand 2 "general_reg_operand") (const_int 0))
25566 (clobber (reg:CC FLAGS_REG))])
25567 (set (match_operand 3 "general_reg_operand")
25568 (match_operand 4 "const0_operand"))]
25569 "GET_MODE (operands[2]) == word_mode
25570 && GET_MODE_SIZE (GET_MODE (operands[3])) <= UNITS_PER_WORD
25571 && peep2_reg_dead_p (0, operands[3])
25572 && peep2_reg_dead_p (1, operands[2])"
25573 [(parallel [(set (match_dup 0)
25574 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
25575 (set (match_dup 3) (const_int 0))
25576 (clobber (reg:CC FLAGS_REG))])]
25577 "operands[3] = gen_lowpart (word_mode, operands[3]);")
25578
25579 (define_insn "*stack_protect_set_2_<mode>_si"
25580 [(set (match_operand:PTR 0 "memory_operand" "=m")
25581 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
25582 UNSPEC_SP_SET))
25583 (set (match_operand:SI 1 "register_operand" "=&r")
25584 (match_operand:SI 2 "general_operand" "g"))]
25585 "reload_completed"
25586 {
25587 output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
25588 output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
25589 if (pic_32bit_operand (operands[2], SImode)
25590 || ix86_use_lea_for_mov (insn, operands + 1))
25591 return "lea{l}\t{%E2, %1|%1, %E2}";
25592 else
25593 return "mov{l}\t{%2, %1|%1, %2}";
25594 }
25595 [(set_attr "type" "multi")
25596 (set_attr "length" "24")])
25597
25598 (define_insn "*stack_protect_set_2_<mode>_di"
25599 [(set (match_operand:PTR 0 "memory_operand" "=m,m,m")
25600 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m,m,m")]
25601 UNSPEC_SP_SET))
25602 (set (match_operand:DI 1 "register_operand" "=&r,&r,&r")
25603 (match_operand:DI 2 "general_operand" "Z,rem,i"))]
25604 "TARGET_64BIT && reload_completed"
25605 {
25606 output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
25607 output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
25608 if (pic_32bit_operand (operands[2], DImode))
25609 return "lea{q}\t{%E2, %1|%1, %E2}";
25610 else if (which_alternative == 0)
25611 return "mov{l}\t{%k2, %k1|%k1, %k2}";
25612 else if (which_alternative == 2)
25613 return "movabs{q}\t{%2, %1|%1, %2}";
25614 else if (ix86_use_lea_for_mov (insn, operands + 1))
25615 return "lea{q}\t{%E2, %1|%1, %E2}";
25616 else
25617 return "mov{q}\t{%2, %1|%1, %2}";
25618 }
25619 [(set_attr "type" "multi")
25620 (set_attr "length" "24")])
25621
25622 (define_peephole2
25623 [(parallel [(set (match_operand:PTR 0 "memory_operand")
25624 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
25625 UNSPEC_SP_SET))
25626 (set (match_operand 2 "general_reg_operand") (const_int 0))
25627 (clobber (reg:CC FLAGS_REG))])
25628 (set (match_operand:SWI48 3 "general_reg_operand")
25629 (match_operand:SWI48 4 "general_gr_operand"))]
25630 "GET_MODE (operands[2]) == word_mode
25631 && peep2_reg_dead_p (0, operands[3])
25632 && peep2_reg_dead_p (1, operands[2])"
25633 [(parallel [(set (match_dup 0)
25634 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
25635 (set (match_dup 3) (match_dup 4))])])
25636
25637 (define_peephole2
25638 [(set (match_operand:SWI48 3 "general_reg_operand")
25639 (match_operand:SWI48 4 "general_gr_operand"))
25640 (parallel [(set (match_operand:PTR 0 "memory_operand")
25641 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
25642 UNSPEC_SP_SET))
25643 (set (match_operand 2 "general_reg_operand") (const_int 0))
25644 (clobber (reg:CC FLAGS_REG))])]
25645 "GET_MODE (operands[2]) == word_mode
25646 && peep2_reg_dead_p (0, operands[3])
25647 && peep2_reg_dead_p (2, operands[2])
25648 && !reg_mentioned_p (operands[3], operands[0])
25649 && !reg_mentioned_p (operands[3], operands[1])"
25650 [(parallel [(set (match_dup 0)
25651 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
25652 (set (match_dup 3) (match_dup 4))])])
25653
25654 (define_insn "*stack_protect_set_3_<PTR:mode>_<SWI48:mode>"
25655 [(set (match_operand:PTR 0 "memory_operand" "=m")
25656 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
25657 UNSPEC_SP_SET))
25658 (set (match_operand:SWI48 1 "register_operand" "=&r")
25659 (match_operand:SWI48 2 "address_no_seg_operand" "Ts"))]
25660 ""
25661 {
25662 output_asm_insn ("mov{<PTR:imodesuffix>}\t{%3, %<PTR:k>1|%<PTR:k>1, %3}",
25663 operands);
25664 output_asm_insn ("mov{<PTR:imodesuffix>}\t{%<PTR:k>1, %0|%0, %<PTR:k>1}",
25665 operands);
25666 if (SImode_address_operand (operands[2], VOIDmode))
25667 {
25668 gcc_assert (TARGET_64BIT);
25669 return "lea{l}\t{%E2, %k1|%k1, %E2}";
25670 }
25671 else
25672 return "lea{<SWI48:imodesuffix>}\t{%E2, %1|%1, %E2}";
25673 }
25674 [(set_attr "type" "multi")
25675 (set_attr "length" "24")])
25676
25677 (define_peephole2
25678 [(parallel [(set (match_operand:PTR 0 "memory_operand")
25679 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
25680 UNSPEC_SP_SET))
25681 (set (match_operand 2 "general_reg_operand") (const_int 0))
25682 (clobber (reg:CC FLAGS_REG))])
25683 (set (match_operand:SWI48 3 "general_reg_operand")
25684 (match_operand:SWI48 4 "address_no_seg_operand"))]
25685 "GET_MODE (operands[2]) == word_mode
25686 && peep2_reg_dead_p (0, operands[3])
25687 && peep2_reg_dead_p (1, operands[2])"
25688 [(parallel [(set (match_dup 0)
25689 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
25690 (set (match_dup 3) (match_dup 4))])])
25691
25692 (define_insn "*stack_protect_set_4z_<mode>_di"
25693 [(set (match_operand:PTR 0 "memory_operand" "=m")
25694 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
25695 UNSPEC_SP_SET))
25696 (set (match_operand:DI 1 "register_operand" "=&r")
25697 (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm")))]
25698 "TARGET_64BIT && reload_completed"
25699 {
25700 output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
25701 output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
25702 if (ix86_use_lea_for_mov (insn, operands + 1))
25703 return "lea{l}\t{%E2, %k1|%k1, %E2}";
25704 else
25705 return "mov{l}\t{%2, %k1|%k1, %2}";
25706 }
25707 [(set_attr "type" "multi")
25708 (set_attr "length" "24")])
25709
25710 (define_insn "*stack_protect_set_4s_<mode>_di"
25711 [(set (match_operand:PTR 0 "memory_operand" "=m")
25712 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
25713 UNSPEC_SP_SET))
25714 (set (match_operand:DI 1 "register_operand" "=&r")
25715 (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm")))]
25716 "TARGET_64BIT && reload_completed"
25717 {
25718 output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
25719 output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
25720 return "movs{lq|x}\t{%2, %1|%1, %2}";
25721 }
25722 [(set_attr "type" "multi")
25723 (set_attr "length" "24")])
25724
25725 (define_peephole2
25726 [(parallel [(set (match_operand:PTR 0 "memory_operand")
25727 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
25728 UNSPEC_SP_SET))
25729 (set (match_operand 2 "general_reg_operand") (const_int 0))
25730 (clobber (reg:CC FLAGS_REG))])
25731 (set (match_operand:DI 3 "general_reg_operand")
25732 (any_extend:DI
25733 (match_operand:SI 4 "nonimmediate_gr_operand")))]
25734 "TARGET_64BIT
25735 && GET_MODE (operands[2]) == word_mode
25736 && peep2_reg_dead_p (0, operands[3])
25737 && peep2_reg_dead_p (1, operands[2])"
25738 [(parallel [(set (match_dup 0)
25739 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
25740 (set (match_dup 3)
25741 (any_extend:DI (match_dup 4)))])])
25742
25743 (define_expand "stack_protect_test"
25744 [(match_operand 0 "memory_operand")
25745 (match_operand 1 "memory_operand")
25746 (match_operand 2)]
25747 ""
25748 {
25749 rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG);
25750
25751 emit_insn (gen_stack_protect_test_1
25752 (ptr_mode, flags, operands[0], operands[1]));
25753
25754 emit_jump_insn (gen_cbranchcc4 (gen_rtx_EQ (VOIDmode, flags, const0_rtx),
25755 flags, const0_rtx, operands[2]));
25756 DONE;
25757 })
25758
25759 (define_insn "@stack_protect_test_1_<mode>"
25760 [(set (match_operand:CCZ 0 "flags_reg_operand")
25761 (unspec:CCZ [(match_operand:PTR 1 "memory_operand" "m")
25762 (match_operand:PTR 2 "memory_operand" "m")]
25763 UNSPEC_SP_TEST))
25764 (clobber (match_scratch:PTR 3 "=&r"))]
25765 ""
25766 {
25767 output_asm_insn ("mov{<imodesuffix>}\t{%1, %3|%3, %1}", operands);
25768 return "sub{<imodesuffix>}\t{%2, %3|%3, %2}";
25769 }
25770 [(set_attr "type" "multi")])
25771
25772 ;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.
25773 ;; Do not split instructions with mask registers.
25774 (define_split
25775 [(set (match_operand 0 "general_reg_operand")
25776 (match_operator 3 "promotable_binary_operator"
25777 [(match_operand 1 "general_reg_operand")
25778 (match_operand 2 "aligned_operand")]))
25779 (clobber (reg:CC FLAGS_REG))]
25780 "! TARGET_PARTIAL_REG_STALL && reload_completed
25781 && ((GET_MODE (operands[0]) == HImode
25782 && ((optimize_function_for_speed_p (cfun) && !TARGET_FAST_PREFIX)
25783 /* ??? next two lines just !satisfies_constraint_K (...) */
25784 || !CONST_INT_P (operands[2])
25785 || satisfies_constraint_K (operands[2])))
25786 || (GET_MODE (operands[0]) == QImode
25787 && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))"
25788 [(parallel [(set (match_dup 0)
25789 (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
25790 (clobber (reg:CC FLAGS_REG))])]
25791 {
25792 operands[0] = gen_lowpart (SImode, operands[0]);
25793 operands[1] = gen_lowpart (SImode, operands[1]);
25794 if (GET_CODE (operands[3]) != ASHIFT)
25795 operands[2] = gen_lowpart (SImode, operands[2]);
25796 operands[3] = shallow_copy_rtx (operands[3]);
25797 PUT_MODE (operands[3], SImode);
25798 })
25799
25800 ; Promote the QImode tests, as i386 has encoding of the AND
25801 ; instruction with 32-bit sign-extended immediate and thus the
25802 ; instruction size is unchanged, except in the %eax case for
25803 ; which it is increased by one byte, hence the ! optimize_size.
25804 (define_split
25805 [(set (match_operand 0 "flags_reg_operand")
25806 (match_operator 2 "compare_operator"
25807 [(and (match_operand 3 "aligned_operand")
25808 (match_operand 4 "const_int_operand"))
25809 (const_int 0)]))
25810 (set (match_operand 1 "register_operand")
25811 (and (match_dup 3) (match_dup 4)))]
25812 "! TARGET_PARTIAL_REG_STALL && reload_completed
25813 && optimize_insn_for_speed_p ()
25814 && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX)
25815 || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))
25816 /* Ensure that the operand will remain sign-extended immediate. */
25817 && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)"
25818 [(parallel [(set (match_dup 0)
25819 (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4))
25820 (const_int 0)]))
25821 (set (match_dup 1)
25822 (and:SI (match_dup 3) (match_dup 4)))])]
25823 {
25824 operands[4]
25825 = gen_int_mode (INTVAL (operands[4])
25826 & GET_MODE_MASK (GET_MODE (operands[1])), SImode);
25827 operands[1] = gen_lowpart (SImode, operands[1]);
25828 operands[3] = gen_lowpart (SImode, operands[3]);
25829 })
25830
25831 ; Don't promote the QImode tests, as i386 doesn't have encoding of
25832 ; the TEST instruction with 32-bit sign-extended immediate and thus
25833 ; the instruction size would at least double, which is not what we
25834 ; want even with ! optimize_size.
25835 (define_split
25836 [(set (match_operand 0 "flags_reg_operand")
25837 (match_operator 1 "compare_operator"
25838 [(and (match_operand:HI 2 "aligned_operand")
25839 (match_operand:HI 3 "const_int_operand"))
25840 (const_int 0)]))]
25841 "! TARGET_PARTIAL_REG_STALL && reload_completed
25842 && ! TARGET_FAST_PREFIX
25843 && optimize_insn_for_speed_p ()
25844 /* Ensure that the operand will remain sign-extended immediate. */
25845 && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)"
25846 [(set (match_dup 0)
25847 (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
25848 (const_int 0)]))]
25849 {
25850 operands[3]
25851 = gen_int_mode (INTVAL (operands[3])
25852 & GET_MODE_MASK (GET_MODE (operands[2])), SImode);
25853 operands[2] = gen_lowpart (SImode, operands[2]);
25854 })
25855
25856 (define_split
25857 [(set (match_operand 0 "register_operand")
25858 (neg (match_operand 1 "register_operand")))
25859 (clobber (reg:CC FLAGS_REG))]
25860 "! TARGET_PARTIAL_REG_STALL && reload_completed
25861 && (GET_MODE (operands[0]) == HImode
25862 || (GET_MODE (operands[0]) == QImode
25863 && (TARGET_PROMOTE_QImode
25864 || optimize_insn_for_size_p ())))"
25865 [(parallel [(set (match_dup 0)
25866 (neg:SI (match_dup 1)))
25867 (clobber (reg:CC FLAGS_REG))])]
25868 {
25869 operands[0] = gen_lowpart (SImode, operands[0]);
25870 operands[1] = gen_lowpart (SImode, operands[1]);
25871 })
25872
25873 ;; Do not split instructions with mask regs.
25874 (define_split
25875 [(set (match_operand 0 "general_reg_operand")
25876 (not (match_operand 1 "general_reg_operand")))]
25877 "! TARGET_PARTIAL_REG_STALL && reload_completed
25878 && (GET_MODE (operands[0]) == HImode
25879 || (GET_MODE (operands[0]) == QImode
25880 && (TARGET_PROMOTE_QImode
25881 || optimize_insn_for_size_p ())))"
25882 [(set (match_dup 0)
25883 (not:SI (match_dup 1)))]
25884 {
25885 operands[0] = gen_lowpart (SImode, operands[0]);
25886 operands[1] = gen_lowpart (SImode, operands[1]);
25887 })
25888 \f
25889 ;; RTL Peephole optimizations, run before sched2. These primarily look to
25890 ;; transform a complex memory operation into two memory to register operations.
25891
25892 ;; Don't push memory operands
25893 (define_peephole2
25894 [(set (match_operand:SWI 0 "push_operand")
25895 (match_operand:SWI 1 "memory_operand"))
25896 (match_scratch:SWI 2 "<r>")]
25897 "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
25898 && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
25899 [(set (match_dup 2) (match_dup 1))
25900 (set (match_dup 0) (match_dup 2))])
25901
25902 ;; We need to handle SFmode only, because DFmode and XFmode are split to
25903 ;; SImode pushes.
25904 (define_peephole2
25905 [(set (match_operand:SF 0 "push_operand")
25906 (match_operand:SF 1 "memory_operand"))
25907 (match_scratch:SF 2 "r")]
25908 "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
25909 && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
25910 [(set (match_dup 2) (match_dup 1))
25911 (set (match_dup 0) (match_dup 2))])
25912
25913 ;; Don't move an immediate directly to memory when the instruction
25914 ;; gets too big, or if LCP stalls are a problem for 16-bit moves.
25915 (define_peephole2
25916 [(match_scratch:SWI124 1 "<r>")
25917 (set (match_operand:SWI124 0 "memory_operand")
25918 (const_int 0))]
25919 "optimize_insn_for_speed_p ()
25920 && ((<MODE>mode == HImode
25921 && TARGET_LCP_STALL)
25922 || (!TARGET_USE_MOV0
25923 && TARGET_SPLIT_LONG_MOVES
25924 && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))
25925 && peep2_regno_dead_p (0, FLAGS_REG)"
25926 [(parallel [(set (match_dup 2) (const_int 0))
25927 (clobber (reg:CC FLAGS_REG))])
25928 (set (match_dup 0) (match_dup 1))]
25929 "operands[2] = gen_lowpart (SImode, operands[1]);")
25930
25931 (define_peephole2
25932 [(match_scratch:SWI124 2 "<r>")
25933 (set (match_operand:SWI124 0 "memory_operand")
25934 (match_operand:SWI124 1 "immediate_operand"))]
25935 "optimize_insn_for_speed_p ()
25936 && ((<MODE>mode == HImode
25937 && TARGET_LCP_STALL)
25938 || (TARGET_SPLIT_LONG_MOVES
25939 && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))"
25940 [(set (match_dup 2) (match_dup 1))
25941 (set (match_dup 0) (match_dup 2))])
25942
25943 ;; Don't compare memory with zero, load and use a test instead.
25944 (define_peephole2
25945 [(set (match_operand 0 "flags_reg_operand")
25946 (match_operator 1 "compare_operator"
25947 [(match_operand:SI 2 "memory_operand")
25948 (const_int 0)]))
25949 (match_scratch:SI 3 "r")]
25950 "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)"
25951 [(set (match_dup 3) (match_dup 2))
25952 (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))])
25953
25954 ;; NOT is not pairable on Pentium, while XOR is, but one byte longer.
25955 ;; Don't split NOTs with a displacement operand, because resulting XOR
25956 ;; will not be pairable anyway.
25957 ;;
25958 ;; On AMD K6, NOT is vector decoded with memory operand that cannot be
25959 ;; represented using a modRM byte. The XOR replacement is long decoded,
25960 ;; so this split helps here as well.
25961 ;;
25962 ;; Note: Can't do this as a regular split because we can't get proper
25963 ;; lifetime information then.
25964
25965 (define_peephole2
25966 [(set (match_operand:SWI124 0 "nonimmediate_gr_operand")
25967 (not:SWI124 (match_operand:SWI124 1 "nonimmediate_gr_operand")))]
25968 "optimize_insn_for_speed_p ()
25969 && ((TARGET_NOT_UNPAIRABLE
25970 && (!MEM_P (operands[0])
25971 || !memory_displacement_operand (operands[0], <MODE>mode)))
25972 || (TARGET_NOT_VECTORMODE
25973 && long_memory_operand (operands[0], <MODE>mode)))
25974 && peep2_regno_dead_p (0, FLAGS_REG)"
25975 [(parallel [(set (match_dup 0)
25976 (xor:SWI124 (match_dup 1) (const_int -1)))
25977 (clobber (reg:CC FLAGS_REG))])])
25978
25979 ;; Non pairable "test imm, reg" instructions can be translated to
25980 ;; "and imm, reg" if reg dies. The "and" form is also shorter (one
25981 ;; byte opcode instead of two, have a short form for byte operands),
25982 ;; so do it for other CPUs as well. Given that the value was dead,
25983 ;; this should not create any new dependencies. Pass on the sub-word
25984 ;; versions if we're concerned about partial register stalls.
25985
25986 (define_peephole2
25987 [(set (match_operand 0 "flags_reg_operand")
25988 (match_operator 1 "compare_operator"
25989 [(and:SI (match_operand:SI 2 "register_operand")
25990 (match_operand:SI 3 "immediate_operand"))
25991 (const_int 0)]))]
25992 "ix86_match_ccmode (insn, CCNOmode)
25993 && (REGNO (operands[2]) != AX_REG
25994 || satisfies_constraint_K (operands[3]))
25995 && peep2_reg_dead_p (1, operands[2])"
25996 [(parallel
25997 [(set (match_dup 0)
25998 (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
25999 (const_int 0)]))
26000 (set (match_dup 2)
26001 (and:SI (match_dup 2) (match_dup 3)))])])
26002
26003 ;; We don't need to handle HImode case, because it will be promoted to SImode
26004 ;; on ! TARGET_PARTIAL_REG_STALL
26005
26006 (define_peephole2
26007 [(set (match_operand 0 "flags_reg_operand")
26008 (match_operator 1 "compare_operator"
26009 [(and:QI (match_operand:QI 2 "register_operand")
26010 (match_operand:QI 3 "immediate_operand"))
26011 (const_int 0)]))]
26012 "! TARGET_PARTIAL_REG_STALL
26013 && ix86_match_ccmode (insn, CCNOmode)
26014 && REGNO (operands[2]) != AX_REG
26015 && peep2_reg_dead_p (1, operands[2])"
26016 [(parallel
26017 [(set (match_dup 0)
26018 (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
26019 (const_int 0)]))
26020 (set (match_dup 2)
26021 (and:QI (match_dup 2) (match_dup 3)))])])
26022
26023 (define_peephole2
26024 [(set (match_operand 0 "flags_reg_operand")
26025 (match_operator 1 "compare_operator"
26026 [(and:QI
26027 (subreg:QI
26028 (match_operator:SWI248 4 "extract_operator"
26029 [(match_operand 2 "int248_register_operand")
26030 (const_int 8)
26031 (const_int 8)]) 0)
26032 (match_operand 3 "const_int_operand"))
26033 (const_int 0)]))]
26034 "! TARGET_PARTIAL_REG_STALL
26035 && ix86_match_ccmode (insn, CCNOmode)
26036 && REGNO (operands[2]) != AX_REG
26037 && peep2_reg_dead_p (1, operands[2])"
26038 [(parallel
26039 [(set (match_dup 0)
26040 (match_op_dup 1
26041 [(and:QI
26042 (subreg:QI
26043 (match_op_dup 4 [(match_dup 2)
26044 (const_int 8)
26045 (const_int 8)]) 0)
26046 (match_dup 3))
26047 (const_int 0)]))
26048 (set (zero_extract:SWI248 (match_dup 2)
26049 (const_int 8)
26050 (const_int 8))
26051 (subreg:SWI248
26052 (and:QI
26053 (subreg:QI
26054 (match_op_dup 4 [(match_dup 2)
26055 (const_int 8)
26056 (const_int 8)]) 0)
26057 (match_dup 3)) 0))])])
26058
26059 ;; Don't do logical operations with memory inputs.
26060 (define_peephole2
26061 [(match_scratch:SWI 2 "<r>")
26062 (parallel [(set (match_operand:SWI 0 "register_operand")
26063 (match_operator:SWI 3 "arith_or_logical_operator"
26064 [(match_dup 0)
26065 (match_operand:SWI 1 "memory_operand")]))
26066 (clobber (reg:CC FLAGS_REG))])]
26067 "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
26068 [(set (match_dup 2) (match_dup 1))
26069 (parallel [(set (match_dup 0)
26070 (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
26071 (clobber (reg:CC FLAGS_REG))])])
26072
26073 (define_peephole2
26074 [(match_scratch:SWI 2 "<r>")
26075 (parallel [(set (match_operand:SWI 0 "register_operand")
26076 (match_operator:SWI 3 "arith_or_logical_operator"
26077 [(match_operand:SWI 1 "memory_operand")
26078 (match_dup 0)]))
26079 (clobber (reg:CC FLAGS_REG))])]
26080 "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
26081 [(set (match_dup 2) (match_dup 1))
26082 (parallel [(set (match_dup 0)
26083 (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
26084 (clobber (reg:CC FLAGS_REG))])])
26085
26086 ;; Prefer Load+RegOp to Mov+MemOp. Watch out for cases when
26087 ;; the memory address refers to the destination of the load!
26088
26089 (define_peephole2
26090 [(set (match_operand:SWI 0 "general_reg_operand")
26091 (match_operand:SWI 1 "general_reg_operand"))
26092 (parallel [(set (match_dup 0)
26093 (match_operator:SWI 3 "commutative_operator"
26094 [(match_dup 0)
26095 (match_operand:SWI 2 "memory_operand")]))
26096 (clobber (reg:CC FLAGS_REG))])]
26097 "REGNO (operands[0]) != REGNO (operands[1])
26098 && (<MODE>mode != QImode
26099 || any_QIreg_operand (operands[1], QImode))"
26100 [(set (match_dup 0) (match_dup 4))
26101 (parallel [(set (match_dup 0)
26102 (match_op_dup 3 [(match_dup 0) (match_dup 1)]))
26103 (clobber (reg:CC FLAGS_REG))])]
26104 {
26105 operands[4]
26106 = ix86_replace_reg_with_reg (operands[2], operands[0], operands[1]);
26107 })
26108
26109 (define_peephole2
26110 [(set (match_operand 0 "mmx_reg_operand")
26111 (match_operand 1 "mmx_reg_operand"))
26112 (set (match_dup 0)
26113 (match_operator 3 "commutative_operator"
26114 [(match_dup 0)
26115 (match_operand 2 "memory_operand")]))]
26116 "REGNO (operands[0]) != REGNO (operands[1])"
26117 [(set (match_dup 0) (match_dup 2))
26118 (set (match_dup 0)
26119 (match_op_dup 3 [(match_dup 0) (match_dup 1)]))])
26120
26121 (define_peephole2
26122 [(set (match_operand 0 "sse_reg_operand")
26123 (match_operand 1 "sse_reg_operand"))
26124 (set (match_dup 0)
26125 (match_operator 3 "commutative_operator"
26126 [(match_dup 0)
26127 (match_operand 2 "memory_operand")]))]
26128 "REGNO (operands[0]) != REGNO (operands[1])
26129 /* Punt if operands[1] is %[xy]mm16+ and AVX512BW is not enabled,
26130 as EVEX encoded vpadd[bw], vpmullw, vpmin[su][bw] and vpmax[su][bw]
26131 instructions require AVX512BW and AVX512VL, but with the original
26132 instructions it might require just AVX512VL.
26133 AVX512VL is implied from TARGET_HARD_REGNO_MODE_OK. */
26134 && (!EXT_REX_SSE_REGNO_P (REGNO (operands[1]))
26135 || TARGET_AVX512BW
26136 || GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (operands[0]))) > 2
26137 || logic_operator (operands[3], VOIDmode))"
26138 [(set (match_dup 0) (match_dup 2))
26139 (set (match_dup 0)
26140 (match_op_dup 3 [(match_dup 0) (match_dup 1)]))])
26141
26142 ; Don't do logical operations with memory outputs
26143 ;
26144 ; These two don't make sense for PPro/PII -- we're expanding a 4-uop
26145 ; instruction into two 1-uop insns plus a 2-uop insn. That last has
26146 ; the same decoder scheduling characteristics as the original.
26147
26148 (define_peephole2
26149 [(match_scratch:SWI 2 "<r>")
26150 (parallel [(set (match_operand:SWI 0 "memory_operand")
26151 (match_operator:SWI 3 "arith_or_logical_operator"
26152 [(match_dup 0)
26153 (match_operand:SWI 1 "<nonmemory_operand>")]))
26154 (clobber (reg:CC FLAGS_REG))])]
26155 "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())"
26156 [(set (match_dup 2) (match_dup 0))
26157 (parallel [(set (match_dup 2)
26158 (match_op_dup 3 [(match_dup 2) (match_dup 1)]))
26159 (clobber (reg:CC FLAGS_REG))])
26160 (set (match_dup 0) (match_dup 2))])
26161
26162 (define_peephole2
26163 [(match_scratch:SWI 2 "<r>")
26164 (parallel [(set (match_operand:SWI 0 "memory_operand")
26165 (match_operator:SWI 3 "arith_or_logical_operator"
26166 [(match_operand:SWI 1 "<nonmemory_operand>")
26167 (match_dup 0)]))
26168 (clobber (reg:CC FLAGS_REG))])]
26169 "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())"
26170 [(set (match_dup 2) (match_dup 0))
26171 (parallel [(set (match_dup 2)
26172 (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
26173 (clobber (reg:CC FLAGS_REG))])
26174 (set (match_dup 0) (match_dup 2))])
26175
26176 ;; Attempt to use arith or logical operations with memory outputs with
26177 ;; setting of flags.
26178 (define_peephole2
26179 [(set (match_operand:SWI 0 "register_operand")
26180 (match_operand:SWI 1 "memory_operand"))
26181 (parallel [(set (match_dup 0)
26182 (match_operator:SWI 3 "plusminuslogic_operator"
26183 [(match_dup 0)
26184 (match_operand:SWI 2 "<nonmemory_operand>")]))
26185 (clobber (reg:CC FLAGS_REG))])
26186 (set (match_dup 1) (match_dup 0))
26187 (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
26188 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26189 && peep2_reg_dead_p (4, operands[0])
26190 && !reg_overlap_mentioned_p (operands[0], operands[1])
26191 && !reg_overlap_mentioned_p (operands[0], operands[2])
26192 && (<MODE>mode != QImode
26193 || immediate_operand (operands[2], QImode)
26194 || any_QIreg_operand (operands[2], QImode))
26195 && ix86_match_ccmode (peep2_next_insn (3),
26196 (GET_CODE (operands[3]) == PLUS
26197 || GET_CODE (operands[3]) == MINUS)
26198 ? CCGOCmode : CCNOmode)"
26199 [(parallel [(set (match_dup 4) (match_dup 6))
26200 (set (match_dup 1) (match_dup 5))])]
26201 {
26202 operands[4] = SET_DEST (PATTERN (peep2_next_insn (3)));
26203 operands[5]
26204 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
26205 copy_rtx (operands[1]),
26206 operands[2]);
26207 operands[6]
26208 = gen_rtx_COMPARE (GET_MODE (operands[4]),
26209 copy_rtx (operands[5]),
26210 const0_rtx);
26211 })
26212
26213 ;; Likewise for cmpelim optimized pattern.
26214 (define_peephole2
26215 [(set (match_operand:SWI 0 "register_operand")
26216 (match_operand:SWI 1 "memory_operand"))
26217 (parallel [(set (reg FLAGS_REG)
26218 (compare (match_operator:SWI 3 "plusminuslogic_operator"
26219 [(match_dup 0)
26220 (match_operand:SWI 2 "<nonmemory_operand>")])
26221 (const_int 0)))
26222 (set (match_dup 0) (match_dup 3))])
26223 (set (match_dup 1) (match_dup 0))]
26224 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26225 && peep2_reg_dead_p (3, operands[0])
26226 && !reg_overlap_mentioned_p (operands[0], operands[1])
26227 && !reg_overlap_mentioned_p (operands[0], operands[2])
26228 && ix86_match_ccmode (peep2_next_insn (1),
26229 (GET_CODE (operands[3]) == PLUS
26230 || GET_CODE (operands[3]) == MINUS)
26231 ? CCGOCmode : CCNOmode)"
26232 [(parallel [(set (match_dup 4) (match_dup 6))
26233 (set (match_dup 1) (match_dup 5))])]
26234 {
26235 operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
26236 operands[5]
26237 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
26238 copy_rtx (operands[1]), operands[2]);
26239 operands[6]
26240 = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]),
26241 const0_rtx);
26242 })
26243
26244 ;; Likewise for instances where we have a lea pattern.
26245 (define_peephole2
26246 [(set (match_operand:SWI 0 "register_operand")
26247 (match_operand:SWI 1 "memory_operand"))
26248 (set (match_operand:<LEAMODE> 3 "register_operand")
26249 (plus:<LEAMODE> (match_operand:<LEAMODE> 4 "register_operand")
26250 (match_operand:<LEAMODE> 2 "<nonmemory_operand>")))
26251 (set (match_dup 1) (match_operand:SWI 5 "register_operand"))
26252 (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))]
26253 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26254 && REGNO (operands[4]) == REGNO (operands[0])
26255 && REGNO (operands[5]) == REGNO (operands[3])
26256 && peep2_reg_dead_p (4, operands[3])
26257 && ((REGNO (operands[0]) == REGNO (operands[3]))
26258 || peep2_reg_dead_p (2, operands[0]))
26259 && !reg_overlap_mentioned_p (operands[0], operands[1])
26260 && !reg_overlap_mentioned_p (operands[3], operands[1])
26261 && !reg_overlap_mentioned_p (operands[0], operands[2])
26262 && (<MODE>mode != QImode
26263 || immediate_operand (operands[2], QImode)
26264 || any_QIreg_operand (operands[2], QImode))
26265 && ix86_match_ccmode (peep2_next_insn (3), CCGOCmode)"
26266 [(parallel [(set (match_dup 6) (match_dup 8))
26267 (set (match_dup 1) (match_dup 7))])]
26268 {
26269 operands[6] = SET_DEST (PATTERN (peep2_next_insn (3)));
26270 operands[7]
26271 = gen_rtx_PLUS (<MODE>mode,
26272 copy_rtx (operands[1]),
26273 gen_lowpart (<MODE>mode, operands[2]));
26274 operands[8]
26275 = gen_rtx_COMPARE (GET_MODE (operands[6]),
26276 copy_rtx (operands[7]),
26277 const0_rtx);
26278 })
26279
26280 (define_peephole2
26281 [(parallel [(set (match_operand:SWI 0 "register_operand")
26282 (match_operator:SWI 2 "plusminuslogic_operator"
26283 [(match_dup 0)
26284 (match_operand:SWI 1 "memory_operand")]))
26285 (clobber (reg:CC FLAGS_REG))])
26286 (set (match_dup 1) (match_dup 0))
26287 (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
26288 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26289 && COMMUTATIVE_ARITH_P (operands[2])
26290 && peep2_reg_dead_p (3, operands[0])
26291 && !reg_overlap_mentioned_p (operands[0], operands[1])
26292 && ix86_match_ccmode (peep2_next_insn (2),
26293 GET_CODE (operands[2]) == PLUS
26294 ? CCGOCmode : CCNOmode)"
26295 [(parallel [(set (match_dup 3) (match_dup 5))
26296 (set (match_dup 1) (match_dup 4))])]
26297 {
26298 operands[3] = SET_DEST (PATTERN (peep2_next_insn (2)));
26299 operands[4]
26300 = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
26301 copy_rtx (operands[1]),
26302 operands[0]);
26303 operands[5]
26304 = gen_rtx_COMPARE (GET_MODE (operands[3]),
26305 copy_rtx (operands[4]),
26306 const0_rtx);
26307 })
26308
26309 ;; Likewise for cmpelim optimized pattern.
26310 (define_peephole2
26311 [(parallel [(set (reg FLAGS_REG)
26312 (compare (match_operator:SWI 2 "plusminuslogic_operator"
26313 [(match_operand:SWI 0 "register_operand")
26314 (match_operand:SWI 1 "memory_operand")])
26315 (const_int 0)))
26316 (set (match_dup 0) (match_dup 2))])
26317 (set (match_dup 1) (match_dup 0))]
26318 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26319 && COMMUTATIVE_ARITH_P (operands[2])
26320 && peep2_reg_dead_p (2, operands[0])
26321 && !reg_overlap_mentioned_p (operands[0], operands[1])
26322 && ix86_match_ccmode (peep2_next_insn (0),
26323 GET_CODE (operands[2]) == PLUS
26324 ? CCGOCmode : CCNOmode)"
26325 [(parallel [(set (match_dup 3) (match_dup 5))
26326 (set (match_dup 1) (match_dup 4))])]
26327 {
26328 operands[3] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0));
26329 operands[4]
26330 = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
26331 copy_rtx (operands[1]), operands[0]);
26332 operands[5]
26333 = gen_rtx_COMPARE (GET_MODE (operands[3]), copy_rtx (operands[4]),
26334 const0_rtx);
26335 })
26336
26337 (define_peephole2
26338 [(set (match_operand:SWI12 0 "register_operand")
26339 (match_operand:SWI12 1 "memory_operand"))
26340 (parallel [(set (match_operand:SI 4 "register_operand")
26341 (match_operator:SI 3 "plusminuslogic_operator"
26342 [(match_dup 4)
26343 (match_operand:SI 2 "nonmemory_operand")]))
26344 (clobber (reg:CC FLAGS_REG))])
26345 (set (match_dup 1) (match_dup 0))
26346 (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
26347 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26348 && REGNO (operands[0]) == REGNO (operands[4])
26349 && peep2_reg_dead_p (4, operands[0])
26350 && (<MODE>mode != QImode
26351 || immediate_operand (operands[2], SImode)
26352 || any_QIreg_operand (operands[2], SImode))
26353 && !reg_overlap_mentioned_p (operands[0], operands[1])
26354 && !reg_overlap_mentioned_p (operands[0], operands[2])
26355 && ix86_match_ccmode (peep2_next_insn (3),
26356 (GET_CODE (operands[3]) == PLUS
26357 || GET_CODE (operands[3]) == MINUS)
26358 ? CCGOCmode : CCNOmode)"
26359 [(parallel [(set (match_dup 5) (match_dup 7))
26360 (set (match_dup 1) (match_dup 6))])]
26361 {
26362 operands[5] = SET_DEST (PATTERN (peep2_next_insn (3)));
26363 operands[6]
26364 = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
26365 copy_rtx (operands[1]),
26366 gen_lowpart (<MODE>mode, operands[2]));
26367 operands[7]
26368 = gen_rtx_COMPARE (GET_MODE (operands[5]),
26369 copy_rtx (operands[6]),
26370 const0_rtx);
26371 })
26372
26373 ;; peephole2 comes before regcprop, so deal also with a case that
26374 ;; would be cleaned up by regcprop.
26375 (define_peephole2
26376 [(set (match_operand:SWI 0 "register_operand")
26377 (match_operand:SWI 1 "memory_operand"))
26378 (parallel [(set (match_dup 0)
26379 (match_operator:SWI 3 "plusminuslogic_operator"
26380 [(match_dup 0)
26381 (match_operand:SWI 2 "<nonmemory_operand>")]))
26382 (clobber (reg:CC FLAGS_REG))])
26383 (set (match_operand:SWI 4 "register_operand") (match_dup 0))
26384 (set (match_dup 1) (match_dup 4))
26385 (set (reg FLAGS_REG) (compare (match_dup 4) (const_int 0)))]
26386 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26387 && peep2_reg_dead_p (3, operands[0])
26388 && peep2_reg_dead_p (5, operands[4])
26389 && !reg_overlap_mentioned_p (operands[0], operands[1])
26390 && !reg_overlap_mentioned_p (operands[0], operands[2])
26391 && !reg_overlap_mentioned_p (operands[4], operands[1])
26392 && (<MODE>mode != QImode
26393 || immediate_operand (operands[2], QImode)
26394 || any_QIreg_operand (operands[2], QImode))
26395 && ix86_match_ccmode (peep2_next_insn (4),
26396 (GET_CODE (operands[3]) == PLUS
26397 || GET_CODE (operands[3]) == MINUS)
26398 ? CCGOCmode : CCNOmode)"
26399 [(parallel [(set (match_dup 5) (match_dup 7))
26400 (set (match_dup 1) (match_dup 6))])]
26401 {
26402 operands[5] = SET_DEST (PATTERN (peep2_next_insn (4)));
26403 operands[6]
26404 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
26405 copy_rtx (operands[1]),
26406 operands[2]);
26407 operands[7]
26408 = gen_rtx_COMPARE (GET_MODE (operands[5]),
26409 copy_rtx (operands[6]),
26410 const0_rtx);
26411 })
26412
26413 (define_peephole2
26414 [(set (match_operand:SWI12 0 "register_operand")
26415 (match_operand:SWI12 1 "memory_operand"))
26416 (parallel [(set (match_operand:SI 4 "register_operand")
26417 (match_operator:SI 3 "plusminuslogic_operator"
26418 [(match_dup 4)
26419 (match_operand:SI 2 "nonmemory_operand")]))
26420 (clobber (reg:CC FLAGS_REG))])
26421 (set (match_operand:SWI12 5 "register_operand") (match_dup 0))
26422 (set (match_dup 1) (match_dup 5))
26423 (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))]
26424 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26425 && REGNO (operands[0]) == REGNO (operands[4])
26426 && peep2_reg_dead_p (3, operands[0])
26427 && peep2_reg_dead_p (5, operands[5])
26428 && (<MODE>mode != QImode
26429 || immediate_operand (operands[2], SImode)
26430 || any_QIreg_operand (operands[2], SImode))
26431 && !reg_overlap_mentioned_p (operands[0], operands[1])
26432 && !reg_overlap_mentioned_p (operands[0], operands[2])
26433 && !reg_overlap_mentioned_p (operands[5], operands[1])
26434 && ix86_match_ccmode (peep2_next_insn (4),
26435 (GET_CODE (operands[3]) == PLUS
26436 || GET_CODE (operands[3]) == MINUS)
26437 ? CCGOCmode : CCNOmode)"
26438 [(parallel [(set (match_dup 6) (match_dup 8))
26439 (set (match_dup 1) (match_dup 7))])]
26440 {
26441 operands[6] = SET_DEST (PATTERN (peep2_next_insn (4)));
26442 operands[7]
26443 = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
26444 copy_rtx (operands[1]),
26445 gen_lowpart (<MODE>mode, operands[2]));
26446 operands[8]
26447 = gen_rtx_COMPARE (GET_MODE (operands[6]),
26448 copy_rtx (operands[7]),
26449 const0_rtx);
26450 })
26451
26452 ;; Likewise for cmpelim optimized pattern.
26453 (define_peephole2
26454 [(set (match_operand:SWI 0 "register_operand")
26455 (match_operand:SWI 1 "memory_operand"))
26456 (parallel [(set (reg FLAGS_REG)
26457 (compare (match_operator:SWI 3 "plusminuslogic_operator"
26458 [(match_dup 0)
26459 (match_operand:SWI 2 "<nonmemory_operand>")])
26460 (const_int 0)))
26461 (set (match_dup 0) (match_dup 3))])
26462 (set (match_operand:SWI 4 "register_operand") (match_dup 0))
26463 (set (match_dup 1) (match_dup 4))]
26464 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26465 && peep2_reg_dead_p (3, operands[0])
26466 && peep2_reg_dead_p (4, operands[4])
26467 && !reg_overlap_mentioned_p (operands[0], operands[1])
26468 && !reg_overlap_mentioned_p (operands[0], operands[2])
26469 && !reg_overlap_mentioned_p (operands[4], operands[1])
26470 && ix86_match_ccmode (peep2_next_insn (1),
26471 (GET_CODE (operands[3]) == PLUS
26472 || GET_CODE (operands[3]) == MINUS)
26473 ? CCGOCmode : CCNOmode)"
26474 [(parallel [(set (match_dup 5) (match_dup 7))
26475 (set (match_dup 1) (match_dup 6))])]
26476 {
26477 operands[5] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
26478 operands[6]
26479 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
26480 copy_rtx (operands[1]), operands[2]);
26481 operands[7]
26482 = gen_rtx_COMPARE (GET_MODE (operands[5]), copy_rtx (operands[6]),
26483 const0_rtx);
26484 })
26485
26486 ;; Special cases for xor, where (x ^= y) != 0 is (misoptimized)
26487 ;; into x = z; x ^= y; x != z
26488 (define_peephole2
26489 [(set (match_operand:SWI 0 "register_operand")
26490 (match_operand:SWI 1 "memory_operand"))
26491 (set (match_operand:SWI 3 "register_operand") (match_dup 0))
26492 (parallel [(set (match_operand:SWI 4 "register_operand")
26493 (xor:SWI (match_dup 4)
26494 (match_operand:SWI 2 "<nonmemory_operand>")))
26495 (clobber (reg:CC FLAGS_REG))])
26496 (set (match_dup 1) (match_dup 4))
26497 (set (reg:CCZ FLAGS_REG)
26498 (compare:CCZ (match_operand:SWI 5 "register_operand")
26499 (match_operand:SWI 6 "<nonmemory_operand>")))]
26500 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26501 && (REGNO (operands[4]) == REGNO (operands[0])
26502 || REGNO (operands[4]) == REGNO (operands[3]))
26503 && (rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0])
26504 ? 3 : 0], operands[5])
26505 ? rtx_equal_p (operands[2], operands[6])
26506 : rtx_equal_p (operands[2], operands[5])
26507 && rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0])
26508 ? 3 : 0], operands[6]))
26509 && peep2_reg_dead_p (4, operands[4])
26510 && peep2_reg_dead_p (5, operands[REGNO (operands[4]) == REGNO (operands[0])
26511 ? 3 : 0])
26512 && !reg_overlap_mentioned_p (operands[0], operands[1])
26513 && !reg_overlap_mentioned_p (operands[0], operands[2])
26514 && !reg_overlap_mentioned_p (operands[3], operands[0])
26515 && !reg_overlap_mentioned_p (operands[3], operands[1])
26516 && !reg_overlap_mentioned_p (operands[3], operands[2])
26517 && (<MODE>mode != QImode
26518 || immediate_operand (operands[2], QImode)
26519 || any_QIreg_operand (operands[2], QImode))"
26520 [(parallel [(set (match_dup 7) (match_dup 9))
26521 (set (match_dup 1) (match_dup 8))])]
26522 {
26523 operands[7] = SET_DEST (PATTERN (peep2_next_insn (4)));
26524 operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
26525 operands[2]);
26526 operands[9]
26527 = gen_rtx_COMPARE (GET_MODE (operands[7]),
26528 copy_rtx (operands[8]),
26529 const0_rtx);
26530 })
26531
26532 (define_peephole2
26533 [(set (match_operand:SWI12 0 "register_operand")
26534 (match_operand:SWI12 1 "memory_operand"))
26535 (set (match_operand:SWI12 3 "register_operand") (match_dup 0))
26536 (parallel [(set (match_operand:SI 4 "register_operand")
26537 (xor:SI (match_dup 4)
26538 (match_operand:SI 2 "<nonmemory_operand>")))
26539 (clobber (reg:CC FLAGS_REG))])
26540 (set (match_dup 1) (match_operand:SWI12 5 "register_operand"))
26541 (set (reg:CCZ FLAGS_REG)
26542 (compare:CCZ (match_operand:SWI12 6 "register_operand")
26543 (match_operand:SWI12 7 "<nonmemory_operand>")))]
26544 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26545 && (REGNO (operands[5]) == REGNO (operands[0])
26546 || REGNO (operands[5]) == REGNO (operands[3]))
26547 && REGNO (operands[5]) == REGNO (operands[4])
26548 && (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0])
26549 ? 3 : 0], operands[6])
26550 ? (REG_P (operands[2])
26551 ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7])
26552 : rtx_equal_p (operands[2], operands[7]))
26553 : (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0])
26554 ? 3 : 0], operands[7])
26555 && REG_P (operands[2])
26556 && REGNO (operands[2]) == REGNO (operands[6])))
26557 && peep2_reg_dead_p (4, operands[5])
26558 && peep2_reg_dead_p (5, operands[REGNO (operands[5]) == REGNO (operands[0])
26559 ? 3 : 0])
26560 && !reg_overlap_mentioned_p (operands[0], operands[1])
26561 && !reg_overlap_mentioned_p (operands[0], operands[2])
26562 && !reg_overlap_mentioned_p (operands[3], operands[0])
26563 && !reg_overlap_mentioned_p (operands[3], operands[1])
26564 && !reg_overlap_mentioned_p (operands[3], operands[2])
26565 && (<MODE>mode != QImode
26566 || immediate_operand (operands[2], SImode)
26567 || any_QIreg_operand (operands[2], SImode))"
26568 [(parallel [(set (match_dup 8) (match_dup 10))
26569 (set (match_dup 1) (match_dup 9))])]
26570 {
26571 operands[8] = SET_DEST (PATTERN (peep2_next_insn (4)));
26572 operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
26573 gen_lowpart (<MODE>mode, operands[2]));
26574 operands[10]
26575 = gen_rtx_COMPARE (GET_MODE (operands[8]),
26576 copy_rtx (operands[9]),
26577 const0_rtx);
26578 })
26579
26580 ;; Attempt to optimize away memory stores of values the memory already
26581 ;; has. See PR79593.
26582 (define_peephole2
26583 [(set (match_operand 0 "register_operand")
26584 (match_operand 1 "memory_operand"))
26585 (set (match_operand 2 "memory_operand") (match_dup 0))]
26586 "!MEM_VOLATILE_P (operands[1])
26587 && !MEM_VOLATILE_P (operands[2])
26588 && rtx_equal_p (operands[1], operands[2])
26589 && !reg_overlap_mentioned_p (operands[0], operands[2])"
26590 [(set (match_dup 0) (match_dup 1))])
26591
26592 ;; Attempt to always use XOR for zeroing registers (including FP modes).
26593 (define_peephole2
26594 [(set (match_operand 0 "general_reg_operand")
26595 (match_operand 1 "const0_operand"))]
26596 "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
26597 && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
26598 && peep2_regno_dead_p (0, FLAGS_REG)"
26599 [(parallel [(set (match_dup 0) (const_int 0))
26600 (clobber (reg:CC FLAGS_REG))])]
26601 "operands[0] = gen_lowpart (word_mode, operands[0]);")
26602
26603 (define_peephole2
26604 [(set (strict_low_part (match_operand:SWI12 0 "general_reg_operand"))
26605 (const_int 0))]
26606 "(! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
26607 && peep2_regno_dead_p (0, FLAGS_REG)"
26608 [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0))
26609 (clobber (reg:CC FLAGS_REG))])])
26610
26611 ;; For HI, SI and DI modes, or $-1,reg is smaller than mov $-1,reg.
26612 (define_peephole2
26613 [(set (match_operand:SWI248 0 "general_reg_operand")
26614 (const_int -1))]
26615 "(TARGET_MOVE_M1_VIA_OR || optimize_insn_for_size_p ())
26616 && peep2_regno_dead_p (0, FLAGS_REG)"
26617 [(parallel [(set (match_dup 0) (const_int -1))
26618 (clobber (reg:CC FLAGS_REG))])]
26619 {
26620 if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
26621 operands[0] = gen_lowpart (SImode, operands[0]);
26622 })
26623
26624 ;; Attempt to convert simple lea to add/shift.
26625 ;; These can be created by move expanders.
26626 ;; Disable PLUS peepholes on TARGET_OPT_AGU, since all
26627 ;; relevant lea instructions were already split.
26628
26629 (define_peephole2
26630 [(set (match_operand:SWI48 0 "register_operand")
26631 (plus:SWI48 (match_dup 0)
26632 (match_operand:SWI48 1 "<nonmemory_operand>")))]
26633 "!TARGET_OPT_AGU
26634 && peep2_regno_dead_p (0, FLAGS_REG)"
26635 [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
26636 (clobber (reg:CC FLAGS_REG))])])
26637
26638 (define_peephole2
26639 [(set (match_operand:SWI48 0 "register_operand")
26640 (plus:SWI48 (match_operand:SWI48 1 "<nonmemory_operand>")
26641 (match_dup 0)))]
26642 "!TARGET_OPT_AGU
26643 && peep2_regno_dead_p (0, FLAGS_REG)"
26644 [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
26645 (clobber (reg:CC FLAGS_REG))])])
26646
26647 (define_peephole2
26648 [(set (match_operand:DI 0 "register_operand")
26649 (zero_extend:DI
26650 (plus:SI (match_operand:SI 1 "register_operand")
26651 (match_operand:SI 2 "nonmemory_operand"))))]
26652 "TARGET_64BIT && !TARGET_OPT_AGU
26653 && REGNO (operands[0]) == REGNO (operands[1])
26654 && peep2_regno_dead_p (0, FLAGS_REG)"
26655 [(parallel [(set (match_dup 0)
26656 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))
26657 (clobber (reg:CC FLAGS_REG))])])
26658
26659 (define_peephole2
26660 [(set (match_operand:DI 0 "register_operand")
26661 (zero_extend:DI
26662 (plus:SI (match_operand:SI 1 "nonmemory_operand")
26663 (match_operand:SI 2 "register_operand"))))]
26664 "TARGET_64BIT && !TARGET_OPT_AGU
26665 && REGNO (operands[0]) == REGNO (operands[2])
26666 && peep2_regno_dead_p (0, FLAGS_REG)"
26667 [(parallel [(set (match_dup 0)
26668 (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1))))
26669 (clobber (reg:CC FLAGS_REG))])])
26670
26671 (define_peephole2
26672 [(set (match_operand:SWI48 0 "register_operand")
26673 (mult:SWI48 (match_dup 0)
26674 (match_operand:SWI48 1 "const_int_operand")))]
26675 "pow2p_hwi (INTVAL (operands[1]))
26676 && peep2_regno_dead_p (0, FLAGS_REG)"
26677 [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
26678 (clobber (reg:CC FLAGS_REG))])]
26679 "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
26680
26681 (define_peephole2
26682 [(set (match_operand:DI 0 "register_operand")
26683 (zero_extend:DI
26684 (mult:SI (match_operand:SI 1 "register_operand")
26685 (match_operand:SI 2 "const_int_operand"))))]
26686 "TARGET_64BIT
26687 && pow2p_hwi (INTVAL (operands[2]))
26688 && REGNO (operands[0]) == REGNO (operands[1])
26689 && peep2_regno_dead_p (0, FLAGS_REG)"
26690 [(parallel [(set (match_dup 0)
26691 (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))
26692 (clobber (reg:CC FLAGS_REG))])]
26693 "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
26694
26695 ;; The ESP adjustments can be done by the push and pop instructions. Resulting
26696 ;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes.
26697 ;; On many CPUs it is also faster, since special hardware to avoid esp
26698 ;; dependencies is present.
26699
26700 ;; While some of these conversions may be done using splitters, we use
26701 ;; peepholes in order to allow combine_stack_adjustments pass to see
26702 ;; nonobfuscated RTL.
26703
26704 ;; Convert prologue esp subtractions to push.
26705 ;; We need register to push. In order to keep verify_flow_info happy we have
26706 ;; two choices
26707 ;; - use scratch and clobber it in order to avoid dependencies
26708 ;; - use already live register
26709 ;; We can't use the second way right now, since there is no reliable way how to
26710 ;; verify that given register is live. First choice will also most likely in
26711 ;; fewer dependencies. On the place of esp adjustments it is very likely that
26712 ;; call clobbered registers are dead. We may want to use base pointer as an
26713 ;; alternative when no register is available later.
26714
26715 (define_peephole2
26716 [(match_scratch:W 1 "r")
26717 (parallel [(set (reg:P SP_REG)
26718 (plus:P (reg:P SP_REG)
26719 (match_operand:P 0 "const_int_operand")))
26720 (clobber (reg:CC FLAGS_REG))
26721 (clobber (mem:BLK (scratch)))])]
26722 "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
26723 && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)
26724 && !ix86_red_zone_used"
26725 [(clobber (match_dup 1))
26726 (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
26727 (clobber (mem:BLK (scratch)))])])
26728
26729 (define_peephole2
26730 [(match_scratch:W 1 "r")
26731 (parallel [(set (reg:P SP_REG)
26732 (plus:P (reg:P SP_REG)
26733 (match_operand:P 0 "const_int_operand")))
26734 (clobber (reg:CC FLAGS_REG))
26735 (clobber (mem:BLK (scratch)))])]
26736 "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
26737 && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)
26738 && !ix86_red_zone_used"
26739 [(clobber (match_dup 1))
26740 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
26741 (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
26742 (clobber (mem:BLK (scratch)))])])
26743
26744 ;; Convert esp subtractions to push.
26745 (define_peephole2
26746 [(match_scratch:W 1 "r")
26747 (parallel [(set (reg:P SP_REG)
26748 (plus:P (reg:P SP_REG)
26749 (match_operand:P 0 "const_int_operand")))
26750 (clobber (reg:CC FLAGS_REG))])]
26751 "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
26752 && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)
26753 && !ix86_red_zone_used"
26754 [(clobber (match_dup 1))
26755 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
26756
26757 (define_peephole2
26758 [(match_scratch:W 1 "r")
26759 (parallel [(set (reg:P SP_REG)
26760 (plus:P (reg:P SP_REG)
26761 (match_operand:P 0 "const_int_operand")))
26762 (clobber (reg:CC FLAGS_REG))])]
26763 "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
26764 && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)
26765 && !ix86_red_zone_used"
26766 [(clobber (match_dup 1))
26767 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
26768 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
26769
26770 ;; Convert epilogue deallocator to pop.
26771 (define_peephole2
26772 [(match_scratch:W 1 "r")
26773 (parallel [(set (reg:P SP_REG)
26774 (plus:P (reg:P SP_REG)
26775 (match_operand:P 0 "const_int_operand")))
26776 (clobber (reg:CC FLAGS_REG))
26777 (clobber (mem:BLK (scratch)))])]
26778 "(TARGET_SINGLE_POP || optimize_insn_for_size_p ())
26779 && INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)"
26780 [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
26781 (clobber (mem:BLK (scratch)))])])
26782
26783 ;; Two pops case is tricky, since pop causes dependency
26784 ;; on destination register. We use two registers if available.
26785 (define_peephole2
26786 [(match_scratch:W 1 "r")
26787 (match_scratch:W 2 "r")
26788 (parallel [(set (reg:P SP_REG)
26789 (plus:P (reg:P SP_REG)
26790 (match_operand:P 0 "const_int_operand")))
26791 (clobber (reg:CC FLAGS_REG))
26792 (clobber (mem:BLK (scratch)))])]
26793 "(TARGET_DOUBLE_POP || optimize_insn_for_size_p ())
26794 && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
26795 [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
26796 (clobber (mem:BLK (scratch)))])
26797 (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))])
26798
26799 (define_peephole2
26800 [(match_scratch:W 1 "r")
26801 (parallel [(set (reg:P SP_REG)
26802 (plus:P (reg:P SP_REG)
26803 (match_operand:P 0 "const_int_operand")))
26804 (clobber (reg:CC FLAGS_REG))
26805 (clobber (mem:BLK (scratch)))])]
26806 "optimize_insn_for_size_p ()
26807 && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
26808 [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
26809 (clobber (mem:BLK (scratch)))])
26810 (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
26811
26812 ;; Convert esp additions to pop.
26813 (define_peephole2
26814 [(match_scratch:W 1 "r")
26815 (parallel [(set (reg:P SP_REG)
26816 (plus:P (reg:P SP_REG)
26817 (match_operand:P 0 "const_int_operand")))
26818 (clobber (reg:CC FLAGS_REG))])]
26819 "INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)"
26820 [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
26821
26822 ;; Two pops case is tricky, since pop causes dependency
26823 ;; on destination register. We use two registers if available.
26824 (define_peephole2
26825 [(match_scratch:W 1 "r")
26826 (match_scratch:W 2 "r")
26827 (parallel [(set (reg:P SP_REG)
26828 (plus:P (reg:P SP_REG)
26829 (match_operand:P 0 "const_int_operand")))
26830 (clobber (reg:CC FLAGS_REG))])]
26831 "INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
26832 [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
26833 (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))])
26834
26835 (define_peephole2
26836 [(match_scratch:W 1 "r")
26837 (parallel [(set (reg:P SP_REG)
26838 (plus:P (reg:P SP_REG)
26839 (match_operand:P 0 "const_int_operand")))
26840 (clobber (reg:CC FLAGS_REG))])]
26841 "optimize_insn_for_size_p ()
26842 && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
26843 [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
26844 (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
26845 \f
26846 ;; Convert compares with 1 to shorter inc/dec operations when CF is not
26847 ;; required and register dies. Similarly for 128 to -128.
26848 (define_peephole2
26849 [(set (match_operand 0 "flags_reg_operand")
26850 (match_operator 1 "compare_operator"
26851 [(match_operand 2 "register_operand")
26852 (match_operand 3 "const_int_operand")]))]
26853 "(((!TARGET_FUSE_CMP_AND_BRANCH || optimize_insn_for_size_p ())
26854 && incdec_operand (operands[3], GET_MODE (operands[3])))
26855 || (!TARGET_FUSE_CMP_AND_BRANCH
26856 && INTVAL (operands[3]) == 128))
26857 && ix86_match_ccmode (insn, CCGCmode)
26858 && peep2_reg_dead_p (1, operands[2])"
26859 [(parallel [(set (match_dup 0)
26860 (match_op_dup 1 [(match_dup 2) (match_dup 3)]))
26861 (clobber (match_dup 2))])])
26862 \f
26863 ;; Convert imul by three, five and nine into lea
26864 (define_peephole2
26865 [(parallel
26866 [(set (match_operand:SWI48 0 "register_operand")
26867 (mult:SWI48 (match_operand:SWI48 1 "register_operand")
26868 (match_operand:SWI48 2 "const359_operand")))
26869 (clobber (reg:CC FLAGS_REG))])]
26870 "!TARGET_PARTIAL_REG_STALL
26871 || <MODE>mode == SImode
26872 || optimize_function_for_size_p (cfun)"
26873 [(set (match_dup 0)
26874 (plus:SWI48 (mult:SWI48 (match_dup 1) (match_dup 2))
26875 (match_dup 1)))]
26876 "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
26877
26878 (define_peephole2
26879 [(parallel
26880 [(set (match_operand:SWI48 0 "register_operand")
26881 (mult:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
26882 (match_operand:SWI48 2 "const359_operand")))
26883 (clobber (reg:CC FLAGS_REG))])]
26884 "optimize_insn_for_speed_p ()
26885 && (!TARGET_PARTIAL_REG_STALL || <MODE>mode == SImode)"
26886 [(set (match_dup 0) (match_dup 1))
26887 (set (match_dup 0)
26888 (plus:SWI48 (mult:SWI48 (match_dup 0) (match_dup 2))
26889 (match_dup 0)))]
26890 "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
26891
26892 ;; imul $32bit_imm, mem, reg is vector decoded, while
26893 ;; imul $32bit_imm, reg, reg is direct decoded.
26894 (define_peephole2
26895 [(match_scratch:SWI48 3 "r")
26896 (parallel [(set (match_operand:SWI48 0 "register_operand")
26897 (mult:SWI48 (match_operand:SWI48 1 "memory_operand")
26898 (match_operand:SWI48 2 "immediate_operand")))
26899 (clobber (reg:CC FLAGS_REG))])]
26900 "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
26901 && !satisfies_constraint_K (operands[2])"
26902 [(set (match_dup 3) (match_dup 1))
26903 (parallel [(set (match_dup 0) (mult:SWI48 (match_dup 3) (match_dup 2)))
26904 (clobber (reg:CC FLAGS_REG))])])
26905
26906 (define_peephole2
26907 [(match_scratch:SI 3 "r")
26908 (parallel [(set (match_operand:DI 0 "register_operand")
26909 (zero_extend:DI
26910 (mult:SI (match_operand:SI 1 "memory_operand")
26911 (match_operand:SI 2 "immediate_operand"))))
26912 (clobber (reg:CC FLAGS_REG))])]
26913 "TARGET_64BIT
26914 && TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
26915 && !satisfies_constraint_K (operands[2])"
26916 [(set (match_dup 3) (match_dup 1))
26917 (parallel [(set (match_dup 0)
26918 (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
26919 (clobber (reg:CC FLAGS_REG))])])
26920
26921 ;; imul $8/16bit_imm, regmem, reg is vector decoded.
26922 ;; Convert it into imul reg, reg
26923 ;; It would be better to force assembler to encode instruction using long
26924 ;; immediate, but there is apparently no way to do so.
26925 (define_peephole2
26926 [(parallel [(set (match_operand:SWI248 0 "register_operand")
26927 (mult:SWI248
26928 (match_operand:SWI248 1 "nonimmediate_operand")
26929 (match_operand:SWI248 2 "const_int_operand")))
26930 (clobber (reg:CC FLAGS_REG))])
26931 (match_scratch:SWI248 3 "r")]
26932 "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()
26933 && satisfies_constraint_K (operands[2])"
26934 [(set (match_dup 3) (match_dup 2))
26935 (parallel [(set (match_dup 0) (mult:SWI248 (match_dup 0) (match_dup 3)))
26936 (clobber (reg:CC FLAGS_REG))])]
26937 {
26938 if (!rtx_equal_p (operands[0], operands[1]))
26939 emit_move_insn (operands[0], operands[1]);
26940 })
26941
26942 ;; After splitting up read-modify operations, array accesses with memory
26943 ;; operands might end up in form:
26944 ;; sall $2, %eax
26945 ;; movl 4(%esp), %edx
26946 ;; addl %edx, %eax
26947 ;; instead of pre-splitting:
26948 ;; sall $2, %eax
26949 ;; addl 4(%esp), %eax
26950 ;; Turn it into:
26951 ;; movl 4(%esp), %edx
26952 ;; leal (%edx,%eax,4), %eax
26953
26954 (define_peephole2
26955 [(match_scratch:W 5 "r")
26956 (parallel [(set (match_operand 0 "register_operand")
26957 (ashift (match_operand 1 "register_operand")
26958 (match_operand 2 "const_int_operand")))
26959 (clobber (reg:CC FLAGS_REG))])
26960 (parallel [(set (match_operand 3 "register_operand")
26961 (plus (match_dup 0)
26962 (match_operand 4 "x86_64_general_operand")))
26963 (clobber (reg:CC FLAGS_REG))])]
26964 "IN_RANGE (INTVAL (operands[2]), 1, 3)
26965 /* Validate MODE for lea. */
26966 && ((!TARGET_PARTIAL_REG_STALL
26967 && (GET_MODE (operands[0]) == QImode
26968 || GET_MODE (operands[0]) == HImode))
26969 || GET_MODE (operands[0]) == SImode
26970 || (TARGET_64BIT && GET_MODE (operands[0]) == DImode))
26971 && (rtx_equal_p (operands[0], operands[3])
26972 || peep2_reg_dead_p (2, operands[0]))
26973 /* We reorder load and the shift. */
26974 && !reg_overlap_mentioned_p (operands[0], operands[4])"
26975 [(set (match_dup 5) (match_dup 4))
26976 (set (match_dup 0) (match_dup 1))]
26977 {
26978 machine_mode op1mode = GET_MODE (operands[1]);
26979 machine_mode mode = op1mode == DImode ? DImode : SImode;
26980 int scale = 1 << INTVAL (operands[2]);
26981 rtx index = gen_lowpart (word_mode, operands[1]);
26982 rtx base = gen_lowpart (word_mode, operands[5]);
26983 rtx dest = gen_lowpart (mode, operands[3]);
26984
26985 operands[1] = gen_rtx_PLUS (word_mode, base,
26986 gen_rtx_MULT (word_mode, index, GEN_INT (scale)));
26987 if (mode != word_mode)
26988 operands[1] = gen_rtx_SUBREG (mode, operands[1], 0);
26989
26990 operands[5] = base;
26991 if (op1mode != word_mode)
26992 operands[5] = gen_lowpart (op1mode, operands[5]);
26993
26994 operands[0] = dest;
26995 })
26996 \f
26997 ;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5.
26998 ;; That, however, is usually mapped by the OS to SIGSEGV, which is often
26999 ;; caught for use by garbage collectors and the like. Using an insn that
27000 ;; maps to SIGILL makes it more likely the program will rightfully die.
27001 ;; Keeping with tradition, "6" is in honor of #UD.
27002 (define_insn "trap"
27003 [(trap_if (const_int 1) (const_int 6))]
27004 ""
27005 {
27006 #ifdef HAVE_AS_IX86_UD2
27007 return "ud2";
27008 #else
27009 return ASM_SHORT "0x0b0f";
27010 #endif
27011 }
27012 [(set_attr "length" "2")])
27013
27014 (define_insn "ud2"
27015 [(unspec_volatile [(const_int 0)] UNSPECV_UD2)]
27016 ""
27017 {
27018 #ifdef HAVE_AS_IX86_UD2
27019 return "ud2";
27020 #else
27021 return ASM_SHORT "0x0b0f";
27022 #endif
27023 }
27024 [(set_attr "length" "2")])
27025
27026 (define_expand "prefetch"
27027 [(prefetch (match_operand 0 "address_operand")
27028 (match_operand:SI 1 "const_int_operand")
27029 (match_operand:SI 2 "const_int_operand"))]
27030 "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW || TARGET_PREFETCHWT1"
27031 {
27032 bool write = operands[1] != const0_rtx;
27033 int locality = INTVAL (operands[2]);
27034
27035 gcc_assert (IN_RANGE (locality, 0, 3));
27036
27037 /* Use 3dNOW prefetch in case we are asking for write prefetch not
27038 supported by SSE counterpart (non-SSE2 athlon machines) or the
27039 SSE prefetch is not available (K6 machines). Otherwise use SSE
27040 prefetch as it allows specifying of locality. */
27041
27042 if (write)
27043 {
27044 if (TARGET_PREFETCHWT1)
27045 operands[2] = GEN_INT (MAX (locality, 2));
27046 else if (TARGET_PRFCHW)
27047 operands[2] = GEN_INT (3);
27048 else if (TARGET_3DNOW && !TARGET_SSE2)
27049 operands[2] = GEN_INT (3);
27050 else if (TARGET_PREFETCH_SSE)
27051 operands[1] = const0_rtx;
27052 else
27053 {
27054 gcc_assert (TARGET_3DNOW);
27055 operands[2] = GEN_INT (3);
27056 }
27057 }
27058 else
27059 {
27060 if (TARGET_PREFETCH_SSE)
27061 ;
27062 else
27063 {
27064 gcc_assert (TARGET_3DNOW);
27065 operands[2] = GEN_INT (3);
27066 }
27067 }
27068 })
27069
27070 (define_insn "*prefetch_sse"
27071 [(prefetch (match_operand 0 "address_operand" "p")
27072 (const_int 0)
27073 (match_operand:SI 1 "const_int_operand"))]
27074 "TARGET_PREFETCH_SSE"
27075 {
27076 static const char * const patterns[4] = {
27077 "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
27078 };
27079
27080 int locality = INTVAL (operands[1]);
27081 gcc_assert (IN_RANGE (locality, 0, 3));
27082
27083 return patterns[locality];
27084 }
27085 [(set_attr "type" "sse")
27086 (set_attr "atom_sse_attr" "prefetch")
27087 (set (attr "length_address")
27088 (symbol_ref "memory_address_length (operands[0], false)"))
27089 (set_attr "memory" "none")])
27090
27091 (define_insn "*prefetch_3dnow"
27092 [(prefetch (match_operand 0 "address_operand" "p")
27093 (match_operand:SI 1 "const_int_operand")
27094 (const_int 3))]
27095 "TARGET_3DNOW || TARGET_PRFCHW || TARGET_PREFETCHWT1"
27096 {
27097 if (operands[1] == const0_rtx)
27098 return "prefetch\t%a0";
27099 else
27100 return "prefetchw\t%a0";
27101 }
27102 [(set_attr "type" "mmx")
27103 (set (attr "length_address")
27104 (symbol_ref "memory_address_length (operands[0], false)"))
27105 (set_attr "memory" "none")])
27106
27107 (define_insn "*prefetch_prefetchwt1"
27108 [(prefetch (match_operand 0 "address_operand" "p")
27109 (const_int 1)
27110 (const_int 2))]
27111 "TARGET_PREFETCHWT1"
27112 "prefetchwt1\t%a0";
27113 [(set_attr "type" "sse")
27114 (set (attr "length_address")
27115 (symbol_ref "memory_address_length (operands[0], false)"))
27116 (set_attr "memory" "none")])
27117
27118 (define_insn "prefetchi"
27119 [(unspec_volatile [(match_operand 0 "local_func_symbolic_operand" "p")
27120 (match_operand:SI 1 "const_int_operand")]
27121 UNSPECV_PREFETCHI)]
27122 "TARGET_PREFETCHI && TARGET_64BIT"
27123 {
27124 static const char * const patterns[2] = {
27125 "prefetchit1\t%0", "prefetchit0\t%0"
27126 };
27127
27128 int locality = INTVAL (operands[1]);
27129 gcc_assert (IN_RANGE (locality, 2, 3));
27130
27131 return patterns[locality - 2];
27132 }
27133 [(set_attr "type" "sse")
27134 (set (attr "length_address")
27135 (symbol_ref "memory_address_length (operands[0], false)"))
27136 (set_attr "memory" "none")])
27137
27138 (define_insn "sse4_2_crc32<mode>"
27139 [(set (match_operand:SI 0 "register_operand" "=r")
27140 (unspec:SI
27141 [(match_operand:SI 1 "register_operand" "0")
27142 (match_operand:SWI124 2 "nonimmediate_operand" "<r>m")]
27143 UNSPEC_CRC32))]
27144 "TARGET_CRC32"
27145 "crc32{<imodesuffix>}\t{%2, %0|%0, %2}"
27146 [(set_attr "type" "sselog1")
27147 (set_attr "prefix_rep" "1")
27148 (set_attr "prefix_extra" "1")
27149 (set (attr "prefix_data16")
27150 (if_then_else (match_operand:HI 2)
27151 (const_string "1")
27152 (const_string "*")))
27153 (set (attr "prefix_rex")
27154 (if_then_else (match_operand:QI 2 "ext_QIreg_operand")
27155 (const_string "1")
27156 (const_string "*")))
27157 (set_attr "mode" "SI")])
27158
27159 (define_insn "sse4_2_crc32di"
27160 [(set (match_operand:DI 0 "register_operand" "=r")
27161 (zero_extend:DI
27162 (unspec:SI
27163 [(match_operand:SI 1 "register_operand" "0")
27164 (match_operand:DI 2 "nonimmediate_operand" "rm")]
27165 UNSPEC_CRC32)))]
27166 "TARGET_64BIT && TARGET_CRC32"
27167 "crc32{q}\t{%2, %0|%0, %2}"
27168 [(set_attr "type" "sselog1")
27169 (set_attr "prefix_rep" "1")
27170 (set_attr "prefix_extra" "1")
27171 (set_attr "mode" "DI")])
27172
27173 (define_insn "rdpmc"
27174 [(set (match_operand:DI 0 "register_operand" "=A")
27175 (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
27176 UNSPECV_RDPMC))]
27177 "!TARGET_64BIT"
27178 "rdpmc"
27179 [(set_attr "type" "other")
27180 (set_attr "length" "2")])
27181
27182 (define_insn "rdpmc_rex64"
27183 [(set (match_operand:DI 0 "register_operand" "=a")
27184 (unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")]
27185 UNSPECV_RDPMC))
27186 (set (match_operand:DI 1 "register_operand" "=d")
27187 (unspec_volatile:DI [(match_dup 2)] UNSPECV_RDPMC))]
27188 "TARGET_64BIT"
27189 "rdpmc"
27190 [(set_attr "type" "other")
27191 (set_attr "length" "2")])
27192
27193 (define_insn "rdtsc"
27194 [(set (match_operand:DI 0 "register_operand" "=A")
27195 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
27196 "!TARGET_64BIT"
27197 "rdtsc"
27198 [(set_attr "type" "other")
27199 (set_attr "length" "2")])
27200
27201 (define_insn "rdtsc_rex64"
27202 [(set (match_operand:DI 0 "register_operand" "=a")
27203 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))
27204 (set (match_operand:DI 1 "register_operand" "=d")
27205 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
27206 "TARGET_64BIT"
27207 "rdtsc"
27208 [(set_attr "type" "other")
27209 (set_attr "length" "2")])
27210
27211 (define_insn "rdtscp"
27212 [(set (match_operand:DI 0 "register_operand" "=A")
27213 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
27214 (set (match_operand:SI 1 "register_operand" "=c")
27215 (unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))]
27216 "!TARGET_64BIT"
27217 "rdtscp"
27218 [(set_attr "type" "other")
27219 (set_attr "length" "3")])
27220
27221 (define_insn "rdtscp_rex64"
27222 [(set (match_operand:DI 0 "register_operand" "=a")
27223 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
27224 (set (match_operand:DI 1 "register_operand" "=d")
27225 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
27226 (set (match_operand:SI 2 "register_operand" "=c")
27227 (unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))]
27228 "TARGET_64BIT"
27229 "rdtscp"
27230 [(set_attr "type" "other")
27231 (set_attr "length" "3")])
27232
27233 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27234 ;;
27235 ;; FXSR, XSAVE and XSAVEOPT instructions
27236 ;;
27237 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27238
27239 (define_insn "fxsave"
27240 [(set (match_operand:BLK 0 "memory_operand" "=m")
27241 (unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE))]
27242 "TARGET_FXSR"
27243 "fxsave\t%0"
27244 [(set_attr "type" "other")
27245 (set_attr "memory" "store")
27246 (set (attr "length")
27247 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
27248
27249 (define_insn "fxsave64"
27250 [(set (match_operand:BLK 0 "memory_operand" "=jm")
27251 (unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE64))]
27252 "TARGET_64BIT && TARGET_FXSR"
27253 "fxsave64\t%0"
27254 [(set_attr "type" "other")
27255 (set_attr "addr" "gpr16")
27256 (set_attr "memory" "store")
27257 (set (attr "length")
27258 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
27259
27260 (define_insn "fxrstor"
27261 [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
27262 UNSPECV_FXRSTOR)]
27263 "TARGET_FXSR"
27264 "fxrstor\t%0"
27265 [(set_attr "type" "other")
27266 (set_attr "memory" "load")
27267 (set (attr "length")
27268 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
27269
27270 (define_insn "fxrstor64"
27271 [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "jm")]
27272 UNSPECV_FXRSTOR64)]
27273 "TARGET_64BIT && TARGET_FXSR"
27274 "fxrstor64\t%0"
27275 [(set_attr "type" "other")
27276 (set_attr "addr" "gpr16")
27277 (set_attr "memory" "load")
27278 (set (attr "length")
27279 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
27280
27281 (define_int_iterator ANY_XSAVE
27282 [UNSPECV_XSAVE
27283 (UNSPECV_XSAVEOPT "TARGET_XSAVEOPT")
27284 (UNSPECV_XSAVEC "TARGET_XSAVEC")
27285 (UNSPECV_XSAVES "TARGET_XSAVES")])
27286
27287 (define_int_iterator ANY_XSAVE64
27288 [UNSPECV_XSAVE64
27289 (UNSPECV_XSAVEOPT64 "TARGET_XSAVEOPT")
27290 (UNSPECV_XSAVEC64 "TARGET_XSAVEC")
27291 (UNSPECV_XSAVES64 "TARGET_XSAVES")])
27292
27293 (define_int_attr xsave
27294 [(UNSPECV_XSAVE "xsave")
27295 (UNSPECV_XSAVE64 "xsave64")
27296 (UNSPECV_XSAVEOPT "xsaveopt")
27297 (UNSPECV_XSAVEOPT64 "xsaveopt64")
27298 (UNSPECV_XSAVEC "xsavec")
27299 (UNSPECV_XSAVEC64 "xsavec64")
27300 (UNSPECV_XSAVES "xsaves")
27301 (UNSPECV_XSAVES64 "xsaves64")])
27302
27303 (define_int_iterator ANY_XRSTOR
27304 [UNSPECV_XRSTOR
27305 (UNSPECV_XRSTORS "TARGET_XSAVES")])
27306
27307 (define_int_iterator ANY_XRSTOR64
27308 [UNSPECV_XRSTOR64
27309 (UNSPECV_XRSTORS64 "TARGET_XSAVES")])
27310
27311 (define_int_attr xrstor
27312 [(UNSPECV_XRSTOR "xrstor")
27313 (UNSPECV_XRSTOR64 "xrstor")
27314 (UNSPECV_XRSTORS "xrstors")
27315 (UNSPECV_XRSTORS64 "xrstors")])
27316
27317 (define_insn "<xsave>"
27318 [(set (match_operand:BLK 0 "memory_operand" "=m")
27319 (unspec_volatile:BLK
27320 [(match_operand:DI 1 "register_operand" "A")]
27321 ANY_XSAVE))]
27322 "!TARGET_64BIT && TARGET_XSAVE"
27323 "<xsave>\t%0"
27324 [(set_attr "type" "other")
27325 (set_attr "memory" "store")
27326 (set (attr "length")
27327 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
27328
27329 (define_insn "<xsave>_rex64"
27330 [(set (match_operand:BLK 0 "memory_operand" "=jm")
27331 (unspec_volatile:BLK
27332 [(match_operand:SI 1 "register_operand" "a")
27333 (match_operand:SI 2 "register_operand" "d")]
27334 ANY_XSAVE))]
27335 "TARGET_64BIT && TARGET_XSAVE"
27336 "<xsave>\t%0"
27337 [(set_attr "type" "other")
27338 (set_attr "memory" "store")
27339 (set_attr "addr" "gpr16")
27340 (set (attr "length")
27341 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
27342
27343 (define_insn "<xsave>"
27344 [(set (match_operand:BLK 0 "memory_operand" "=jm")
27345 (unspec_volatile:BLK
27346 [(match_operand:SI 1 "register_operand" "a")
27347 (match_operand:SI 2 "register_operand" "d")]
27348 ANY_XSAVE64))]
27349 "TARGET_64BIT && TARGET_XSAVE"
27350 "<xsave>\t%0"
27351 [(set_attr "type" "other")
27352 (set_attr "memory" "store")
27353 (set_attr "addr" "gpr16")
27354 (set (attr "length")
27355 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
27356
27357 (define_insn "<xrstor>"
27358 [(unspec_volatile:BLK
27359 [(match_operand:BLK 0 "memory_operand" "m")
27360 (match_operand:DI 1 "register_operand" "A")]
27361 ANY_XRSTOR)]
27362 "!TARGET_64BIT && TARGET_XSAVE"
27363 "<xrstor>\t%0"
27364 [(set_attr "type" "other")
27365 (set_attr "memory" "load")
27366 (set (attr "length")
27367 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
27368
27369 (define_insn "<xrstor>_rex64"
27370 [(unspec_volatile:BLK
27371 [(match_operand:BLK 0 "memory_operand" "jm")
27372 (match_operand:SI 1 "register_operand" "a")
27373 (match_operand:SI 2 "register_operand" "d")]
27374 ANY_XRSTOR)]
27375 "TARGET_64BIT && TARGET_XSAVE"
27376 "<xrstor>\t%0"
27377 [(set_attr "type" "other")
27378 (set_attr "memory" "load")
27379 (set_attr "addr" "gpr16")
27380 (set (attr "length")
27381 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
27382
27383 (define_insn "<xrstor>64"
27384 [(unspec_volatile:BLK
27385 [(match_operand:BLK 0 "memory_operand" "jm")
27386 (match_operand:SI 1 "register_operand" "a")
27387 (match_operand:SI 2 "register_operand" "d")]
27388 ANY_XRSTOR64)]
27389 "TARGET_64BIT && TARGET_XSAVE"
27390 "<xrstor>64\t%0"
27391 [(set_attr "type" "other")
27392 (set_attr "memory" "load")
27393 (set_attr "addr" "gpr16")
27394 (set (attr "length")
27395 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
27396
27397 (define_insn "xsetbv"
27398 [(unspec_volatile:SI
27399 [(match_operand:SI 0 "register_operand" "c")
27400 (match_operand:DI 1 "register_operand" "A")]
27401 UNSPECV_XSETBV)]
27402 "!TARGET_64BIT && TARGET_XSAVE"
27403 "xsetbv"
27404 [(set_attr "type" "other")])
27405
27406 (define_insn "xsetbv_rex64"
27407 [(unspec_volatile:SI
27408 [(match_operand:SI 0 "register_operand" "c")
27409 (match_operand:SI 1 "register_operand" "a")
27410 (match_operand:SI 2 "register_operand" "d")]
27411 UNSPECV_XSETBV)]
27412 "TARGET_64BIT && TARGET_XSAVE"
27413 "xsetbv"
27414 [(set_attr "type" "other")])
27415
27416 (define_insn "xgetbv"
27417 [(set (match_operand:DI 0 "register_operand" "=A")
27418 (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
27419 UNSPECV_XGETBV))]
27420 "!TARGET_64BIT && TARGET_XSAVE"
27421 "xgetbv"
27422 [(set_attr "type" "other")])
27423
27424 (define_insn "xgetbv_rex64"
27425 [(set (match_operand:DI 0 "register_operand" "=a")
27426 (unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")]
27427 UNSPECV_XGETBV))
27428 (set (match_operand:DI 1 "register_operand" "=d")
27429 (unspec_volatile:DI [(match_dup 2)] UNSPECV_XGETBV))]
27430 "TARGET_64BIT && TARGET_XSAVE"
27431 "xgetbv"
27432 [(set_attr "type" "other")])
27433
27434 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27435 ;;
27436 ;; Floating-point instructions for atomic compound assignments
27437 ;;
27438 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27439
27440 ; Clobber all floating-point registers on environment save and restore
27441 ; to ensure that the TOS value saved at fnstenv is valid after fldenv.
27442 (define_insn "fnstenv"
27443 [(set (match_operand:BLK 0 "memory_operand" "=m")
27444 (unspec_volatile:BLK [(const_int 0)] UNSPECV_FNSTENV))
27445 (clobber (reg:XF ST0_REG))
27446 (clobber (reg:XF ST1_REG))
27447 (clobber (reg:XF ST2_REG))
27448 (clobber (reg:XF ST3_REG))
27449 (clobber (reg:XF ST4_REG))
27450 (clobber (reg:XF ST5_REG))
27451 (clobber (reg:XF ST6_REG))
27452 (clobber (reg:XF ST7_REG))]
27453 "TARGET_80387"
27454 "fnstenv\t%0"
27455 [(set_attr "type" "other")
27456 (set_attr "memory" "store")
27457 (set (attr "length")
27458 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
27459
27460 (define_insn "fldenv"
27461 [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
27462 UNSPECV_FLDENV)
27463 (clobber (reg:XF ST0_REG))
27464 (clobber (reg:XF ST1_REG))
27465 (clobber (reg:XF ST2_REG))
27466 (clobber (reg:XF ST3_REG))
27467 (clobber (reg:XF ST4_REG))
27468 (clobber (reg:XF ST5_REG))
27469 (clobber (reg:XF ST6_REG))
27470 (clobber (reg:XF ST7_REG))]
27471 "TARGET_80387"
27472 "fldenv\t%0"
27473 [(set_attr "type" "other")
27474 (set_attr "memory" "load")
27475 (set (attr "length")
27476 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
27477
27478 (define_insn "fnstsw"
27479 [(set (match_operand:HI 0 "nonimmediate_operand" "=a,m")
27480 (unspec_volatile:HI [(const_int 0)] UNSPECV_FNSTSW))]
27481 "TARGET_80387"
27482 "fnstsw\t%0"
27483 [(set_attr "type" "other,other")
27484 (set_attr "memory" "none,store")
27485 (set (attr "length")
27486 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
27487
27488 (define_insn "fnclex"
27489 [(unspec_volatile [(const_int 0)] UNSPECV_FNCLEX)]
27490 "TARGET_80387"
27491 "fnclex"
27492 [(set_attr "type" "other")
27493 (set_attr "memory" "none")
27494 (set_attr "length" "2")])
27495
27496 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27497 ;;
27498 ;; LWP instructions
27499 ;;
27500 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27501
27502 (define_insn "@lwp_llwpcb<mode>"
27503 [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
27504 UNSPECV_LLWP_INTRINSIC)]
27505 "TARGET_LWP"
27506 "llwpcb\t%0"
27507 [(set_attr "type" "lwp")
27508 (set_attr "mode" "<MODE>")
27509 (set_attr "length" "5")])
27510
27511 (define_insn "@lwp_slwpcb<mode>"
27512 [(set (match_operand:P 0 "register_operand" "=r")
27513 (unspec_volatile:P [(const_int 0)] UNSPECV_SLWP_INTRINSIC))]
27514 "TARGET_LWP"
27515 "slwpcb\t%0"
27516 [(set_attr "type" "lwp")
27517 (set_attr "mode" "<MODE>")
27518 (set_attr "length" "5")])
27519
27520 (define_insn "@lwp_lwpval<mode>"
27521 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
27522 (match_operand:SI 1 "nonimmediate_operand" "rm")
27523 (match_operand:SI 2 "const_int_operand")]
27524 UNSPECV_LWPVAL_INTRINSIC)]
27525 "TARGET_LWP"
27526 "lwpval\t{%2, %1, %0|%0, %1, %2}"
27527 [(set_attr "type" "lwp")
27528 (set_attr "mode" "<MODE>")
27529 (set (attr "length")
27530 (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
27531
27532 (define_insn "@lwp_lwpins<mode>"
27533 [(set (reg:CCC FLAGS_REG)
27534 (unspec_volatile:CCC [(match_operand:SWI48 0 "register_operand" "r")
27535 (match_operand:SI 1 "nonimmediate_operand" "rm")
27536 (match_operand:SI 2 "const_int_operand")]
27537 UNSPECV_LWPINS_INTRINSIC))]
27538 "TARGET_LWP"
27539 "lwpins\t{%2, %1, %0|%0, %1, %2}"
27540 [(set_attr "type" "lwp")
27541 (set_attr "mode" "<MODE>")
27542 (set (attr "length")
27543 (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
27544
27545 (define_int_iterator RDFSGSBASE
27546 [UNSPECV_RDFSBASE
27547 UNSPECV_RDGSBASE])
27548
27549 (define_int_iterator WRFSGSBASE
27550 [UNSPECV_WRFSBASE
27551 UNSPECV_WRGSBASE])
27552
27553 (define_int_attr fsgs
27554 [(UNSPECV_RDFSBASE "fs")
27555 (UNSPECV_RDGSBASE "gs")
27556 (UNSPECV_WRFSBASE "fs")
27557 (UNSPECV_WRGSBASE "gs")])
27558
27559 (define_insn "rd<fsgs>base<mode>"
27560 [(set (match_operand:SWI48 0 "register_operand" "=r")
27561 (unspec_volatile:SWI48 [(const_int 0)] RDFSGSBASE))]
27562 "TARGET_64BIT && TARGET_FSGSBASE"
27563 "rd<fsgs>base\t%0"
27564 [(set_attr "type" "other")
27565 (set_attr "prefix_0f" "1")
27566 (set_attr "prefix_rep" "1")])
27567
27568 (define_insn "wr<fsgs>base<mode>"
27569 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
27570 WRFSGSBASE)]
27571 "TARGET_64BIT && TARGET_FSGSBASE"
27572 "wr<fsgs>base\t%0"
27573 [(set_attr "type" "other")
27574 (set_attr "prefix_0f" "1")
27575 (set_attr "prefix_rep" "1")])
27576
27577 (define_insn "ptwrite<mode>"
27578 [(unspec_volatile [(match_operand:SWI48 0 "nonimmediate_operand" "rm")]
27579 UNSPECV_PTWRITE)]
27580 "TARGET_PTWRITE"
27581 "ptwrite\t%0"
27582 [(set_attr "type" "other")
27583 (set_attr "prefix_0f" "1")
27584 (set_attr "prefix_rep" "1")])
27585
27586 (define_insn "@rdrand<mode>"
27587 [(set (match_operand:SWI248 0 "register_operand" "=r")
27588 (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))
27589 (set (reg:CCC FLAGS_REG)
27590 (unspec_volatile:CCC [(const_int 0)] UNSPECV_RDRAND))]
27591 "TARGET_RDRND"
27592 "rdrand\t%0"
27593 [(set_attr "type" "other")
27594 (set_attr "prefix_0f" "1")])
27595
27596 (define_insn "@rdseed<mode>"
27597 [(set (match_operand:SWI248 0 "register_operand" "=r")
27598 (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDSEED))
27599 (set (reg:CCC FLAGS_REG)
27600 (unspec_volatile:CCC [(const_int 0)] UNSPECV_RDSEED))]
27601 "TARGET_RDSEED"
27602 "rdseed\t%0"
27603 [(set_attr "type" "other")
27604 (set_attr "prefix_0f" "1")])
27605
27606 (define_expand "pause"
27607 [(set (match_dup 0)
27608 (unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))]
27609 ""
27610 {
27611 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
27612 MEM_VOLATILE_P (operands[0]) = 1;
27613 })
27614
27615 ;; Use "rep; nop", instead of "pause", to support older assemblers.
27616 ;; They have the same encoding.
27617 (define_insn "*pause"
27618 [(set (match_operand:BLK 0)
27619 (unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))]
27620 ""
27621 "rep%; nop"
27622 [(set_attr "length" "2")
27623 (set_attr "memory" "unknown")])
27624
27625 ;; CET instructions
27626 (define_insn "@rdssp<mode>"
27627 [(set (match_operand:SWI48 0 "register_operand" "=r")
27628 (unspec_volatile:SWI48 [(match_operand:SWI48 1 "register_operand" "0")]
27629 UNSPECV_NOP_RDSSP))]
27630 "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)"
27631 "rdssp<mskmodesuffix>\t%0"
27632 [(set_attr "length" "6")
27633 (set_attr "type" "other")])
27634
27635 (define_insn "@incssp<mode>"
27636 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
27637 UNSPECV_INCSSP)]
27638 "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)"
27639 "incssp<mskmodesuffix>\t%0"
27640 [(set_attr "length" "4")
27641 (set_attr "type" "other")])
27642
27643 (define_insn "saveprevssp"
27644 [(unspec_volatile [(const_int 0)] UNSPECV_SAVEPREVSSP)]
27645 "TARGET_SHSTK"
27646 "saveprevssp"
27647 [(set_attr "length" "5")
27648 (set_attr "type" "other")])
27649
27650 (define_insn "rstorssp"
27651 [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")]
27652 UNSPECV_RSTORSSP)]
27653 "TARGET_SHSTK"
27654 "rstorssp\t%0"
27655 [(set_attr "length" "5")
27656 (set_attr "type" "other")])
27657
27658 (define_insn "@wrss<mode>"
27659 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
27660 (match_operand:SWI48 1 "memory_operand" "m")]
27661 UNSPECV_WRSS)]
27662 "TARGET_SHSTK"
27663 "wrss<mskmodesuffix>\t%0, %1"
27664 [(set_attr "length" "3")
27665 (set_attr "type" "other")])
27666
27667 (define_insn "@wruss<mode>"
27668 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
27669 (match_operand:SWI48 1 "memory_operand" "m")]
27670 UNSPECV_WRUSS)]
27671 "TARGET_SHSTK"
27672 "wruss<mskmodesuffix>\t%0, %1"
27673 [(set_attr "length" "4")
27674 (set_attr "type" "other")])
27675
27676 (define_insn "setssbsy"
27677 [(unspec_volatile [(const_int 0)] UNSPECV_SETSSBSY)]
27678 "TARGET_SHSTK"
27679 "setssbsy"
27680 [(set_attr "length" "4")
27681 (set_attr "type" "other")])
27682
27683 (define_insn "clrssbsy"
27684 [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")]
27685 UNSPECV_CLRSSBSY)]
27686 "TARGET_SHSTK"
27687 "clrssbsy\t%0"
27688 [(set_attr "length" "4")
27689 (set_attr "type" "other")])
27690
27691 (define_insn "nop_endbr"
27692 [(unspec_volatile [(const_int 0)] UNSPECV_NOP_ENDBR)]
27693 "(flag_cf_protection & CF_BRANCH)"
27694 {
27695 return TARGET_64BIT ? "endbr64" : "endbr32";
27696 }
27697 [(set_attr "length" "4")
27698 (set_attr "length_immediate" "0")
27699 (set_attr "modrm" "0")])
27700
27701 ;; For RTM support
27702 (define_expand "xbegin"
27703 [(set (match_operand:SI 0 "register_operand")
27704 (unspec_volatile:SI [(const_int 0)] UNSPECV_XBEGIN))]
27705 "TARGET_RTM"
27706 {
27707 rtx_code_label *label = gen_label_rtx ();
27708
27709 /* xbegin is emitted as jump_insn, so reload won't be able
27710 to reload its operand. Force the value into AX hard register. */
27711 rtx ax_reg = gen_rtx_REG (SImode, AX_REG);
27712 emit_move_insn (ax_reg, constm1_rtx);
27713
27714 emit_jump_insn (gen_xbegin_1 (ax_reg, label));
27715
27716 emit_label (label);
27717 LABEL_NUSES (label) = 1;
27718
27719 emit_move_insn (operands[0], ax_reg);
27720
27721 DONE;
27722 })
27723
27724 (define_insn "xbegin_1"
27725 [(set (pc)
27726 (if_then_else (ne (unspec [(const_int 0)] UNSPEC_XBEGIN_ABORT)
27727 (const_int 0))
27728 (label_ref (match_operand 1))
27729 (pc)))
27730 (set (match_operand:SI 0 "register_operand" "+a")
27731 (unspec_volatile:SI [(match_dup 0)] UNSPECV_XBEGIN))]
27732 "TARGET_RTM"
27733 "xbegin\t%l1"
27734 [(set_attr "type" "other")
27735 (set_attr "length" "6")])
27736
27737 (define_insn "xend"
27738 [(unspec_volatile [(const_int 0)] UNSPECV_XEND)]
27739 "TARGET_RTM"
27740 "xend"
27741 [(set_attr "type" "other")
27742 (set_attr "length" "3")])
27743
27744 (define_insn "xabort"
27745 [(unspec_volatile [(match_operand:SI 0 "const_0_to_255_operand")]
27746 UNSPECV_XABORT)]
27747 "TARGET_RTM"
27748 "xabort\t%0"
27749 [(set_attr "type" "other")
27750 (set_attr "length" "3")])
27751
27752 (define_expand "xtest"
27753 [(set (match_operand:QI 0 "register_operand")
27754 (unspec_volatile:QI [(const_int 0)] UNSPECV_XTEST))]
27755 "TARGET_RTM"
27756 {
27757 emit_insn (gen_xtest_1 ());
27758
27759 ix86_expand_setcc (operands[0], NE,
27760 gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx);
27761 DONE;
27762 })
27763
27764 (define_insn "xtest_1"
27765 [(set (reg:CCZ FLAGS_REG)
27766 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_XTEST))]
27767 "TARGET_RTM"
27768 "xtest"
27769 [(set_attr "type" "other")
27770 (set_attr "length" "3")])
27771
27772 (define_insn "clwb"
27773 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
27774 UNSPECV_CLWB)]
27775 "TARGET_CLWB"
27776 "clwb\t%a0"
27777 [(set_attr "type" "sse")
27778 (set_attr "atom_sse_attr" "fence")
27779 (set_attr "memory" "unknown")])
27780
27781 (define_insn "clflushopt"
27782 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
27783 UNSPECV_CLFLUSHOPT)]
27784 "TARGET_CLFLUSHOPT"
27785 "clflushopt\t%a0"
27786 [(set_attr "type" "sse")
27787 (set_attr "atom_sse_attr" "fence")
27788 (set_attr "memory" "unknown")])
27789
27790 ;; MONITORX and MWAITX
27791 (define_insn "mwaitx"
27792 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
27793 (match_operand:SI 1 "register_operand" "a")
27794 (match_operand:SI 2 "register_operand" "b")]
27795 UNSPECV_MWAITX)]
27796 "TARGET_MWAITX"
27797 ;; 64bit version is "mwaitx %rax,%rcx,%rbx". But only lower 32bits are used.
27798 ;; Since 32bit register operands are implicitly zero extended to 64bit,
27799 ;; we only need to set up 32bit registers.
27800 "mwaitx"
27801 [(set_attr "length" "3")])
27802
27803 (define_insn "@monitorx_<mode>"
27804 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
27805 (match_operand:SI 1 "register_operand" "c")
27806 (match_operand:SI 2 "register_operand" "d")]
27807 UNSPECV_MONITORX)]
27808 "TARGET_MWAITX"
27809 ;; 64bit version is "monitorx %rax,%rcx,%rdx". But only lower 32bits in
27810 ;; RCX and RDX are used. Since 32bit register operands are implicitly
27811 ;; zero extended to 64bit, we only need to set up 32bit registers.
27812 "%^monitorx"
27813 [(set (attr "length")
27814 (symbol_ref ("(Pmode != word_mode) + 3")))])
27815
27816 ;; CLZERO
27817 (define_insn "@clzero_<mode>"
27818 [(unspec_volatile [(match_operand: P 0 "register_operand" "a")]
27819 UNSPECV_CLZERO)]
27820 "TARGET_CLZERO"
27821 "clzero"
27822 [(set_attr "length" "3")
27823 (set_attr "memory" "unknown")])
27824
27825 ;; RDPKRU and WRPKRU
27826
27827 (define_expand "rdpkru"
27828 [(parallel
27829 [(set (match_operand:SI 0 "register_operand")
27830 (unspec_volatile:SI [(match_dup 1)] UNSPECV_PKU))
27831 (set (match_dup 2) (const_int 0))])]
27832 "TARGET_PKU"
27833 {
27834 operands[1] = force_reg (SImode, const0_rtx);
27835 operands[2] = gen_reg_rtx (SImode);
27836 })
27837
27838 (define_insn "*rdpkru"
27839 [(set (match_operand:SI 0 "register_operand" "=a")
27840 (unspec_volatile:SI [(match_operand:SI 2 "register_operand" "c")]
27841 UNSPECV_PKU))
27842 (set (match_operand:SI 1 "register_operand" "=d")
27843 (const_int 0))]
27844 "TARGET_PKU"
27845 "rdpkru"
27846 [(set_attr "type" "other")])
27847
27848 (define_expand "wrpkru"
27849 [(unspec_volatile:SI
27850 [(match_operand:SI 0 "register_operand")
27851 (match_dup 1) (match_dup 2)] UNSPECV_PKU)]
27852 "TARGET_PKU"
27853 {
27854 operands[1] = force_reg (SImode, const0_rtx);
27855 operands[2] = force_reg (SImode, const0_rtx);
27856 })
27857
27858 (define_insn "*wrpkru"
27859 [(unspec_volatile:SI
27860 [(match_operand:SI 0 "register_operand" "a")
27861 (match_operand:SI 1 "register_operand" "d")
27862 (match_operand:SI 2 "register_operand" "c")] UNSPECV_PKU)]
27863 "TARGET_PKU"
27864 "wrpkru"
27865 [(set_attr "type" "other")])
27866
27867 (define_insn "rdpid"
27868 [(set (match_operand:SI 0 "register_operand" "=r")
27869 (unspec_volatile:SI [(const_int 0)] UNSPECV_RDPID))]
27870 "!TARGET_64BIT && TARGET_RDPID"
27871 "rdpid\t%0"
27872 [(set_attr "type" "other")])
27873
27874 (define_insn "rdpid_rex64"
27875 [(set (match_operand:DI 0 "register_operand" "=r")
27876 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDPID))]
27877 "TARGET_64BIT && TARGET_RDPID"
27878 "rdpid\t%0"
27879 [(set_attr "type" "other")])
27880
27881 ;; Intirinsics for > i486
27882
27883 (define_insn "wbinvd"
27884 [(unspec_volatile [(const_int 0)] UNSPECV_WBINVD)]
27885 ""
27886 "wbinvd"
27887 [(set_attr "type" "other")])
27888
27889 (define_insn "wbnoinvd"
27890 [(unspec_volatile [(const_int 0)] UNSPECV_WBNOINVD)]
27891 "TARGET_WBNOINVD"
27892 "wbnoinvd"
27893 [(set_attr "type" "other")])
27894
27895 ;; MOVDIRI and MOVDIR64B
27896
27897 (define_insn "movdiri<mode>"
27898 [(set (match_operand:SWI48 0 "memory_operand" "=m")
27899 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
27900 UNSPEC_MOVDIRI))]
27901 "TARGET_MOVDIRI"
27902 "movdiri\t{%1, %0|%0, %1}"
27903 [(set_attr "type" "other")])
27904
27905 (define_insn "@movdir64b_<mode>"
27906 [(set (mem:XI (match_operand:P 0 "register_operand" "r"))
27907 (unspec:XI [(match_operand:XI 1 "memory_operand" "m")]
27908 UNSPEC_MOVDIR64B))]
27909 "TARGET_MOVDIR64B"
27910 "movdir64b\t{%1, %0|%0, %1}"
27911 [(set_attr "type" "other")])
27912
27913 ;; TSXLDTRK
27914 (define_int_iterator TSXLDTRK [UNSPECV_XSUSLDTRK UNSPECV_XRESLDTRK])
27915 (define_int_attr tsxldtrk [(UNSPECV_XSUSLDTRK "xsusldtrk")
27916 (UNSPECV_XRESLDTRK "xresldtrk")])
27917 (define_insn "<tsxldtrk>"
27918 [(unspec_volatile [(const_int 0)] TSXLDTRK)]
27919 "TARGET_TSXLDTRK"
27920 "<tsxldtrk>"
27921 [(set_attr "type" "other")
27922 (set_attr "length" "4")])
27923
27924 ;; ENQCMD and ENQCMDS
27925
27926 (define_int_iterator ENQCMD [UNSPECV_ENQCMD UNSPECV_ENQCMDS])
27927 (define_int_attr enqcmd_sfx [(UNSPECV_ENQCMD "") (UNSPECV_ENQCMDS "s")])
27928
27929 (define_insn "@enqcmd<enqcmd_sfx>_<mode>"
27930 [(set (reg:CCZ FLAGS_REG)
27931 (unspec_volatile:CCZ [(match_operand:P 0 "register_operand" "r")
27932 (match_operand:XI 1 "memory_operand" "m")]
27933 ENQCMD))]
27934 "TARGET_ENQCMD"
27935 "enqcmd<enqcmd_sfx>\t{%1, %0|%0, %1}"
27936 [(set_attr "type" "other")])
27937
27938 ;; UINTR
27939 (define_int_iterator UINTR [UNSPECV_CLUI UNSPECV_STUI])
27940 (define_int_attr uintr [(UNSPECV_CLUI "clui") (UNSPECV_STUI "stui")])
27941
27942 (define_insn "<uintr>"
27943 [(unspec_volatile [(const_int 0)] UINTR)]
27944 "TARGET_UINTR && TARGET_64BIT"
27945 "<uintr>"
27946 [(set_attr "type" "other")
27947 (set_attr "length" "4")])
27948
27949 (define_insn "testui"
27950 [(set (reg:CCC FLAGS_REG)
27951 (unspec_volatile:CCC [(const_int 0)] UNSPECV_TESTUI))]
27952 "TARGET_UINTR && TARGET_64BIT"
27953 "testui"
27954 [(set_attr "type" "other")
27955 (set_attr "length" "4")])
27956
27957 (define_insn "senduipi"
27958 [(unspec_volatile
27959 [(match_operand:DI 0 "register_operand" "r")]
27960 UNSPECV_SENDUIPI)]
27961 "TARGET_UINTR && TARGET_64BIT"
27962 "senduipi\t%0"
27963 [(set_attr "type" "other")
27964 (set_attr "length" "4")])
27965
27966 ;; WAITPKG
27967
27968 (define_insn "umwait"
27969 [(set (reg:CCC FLAGS_REG)
27970 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
27971 (match_operand:DI 1 "register_operand" "A")]
27972 UNSPECV_UMWAIT))]
27973 "!TARGET_64BIT && TARGET_WAITPKG"
27974 "umwait\t%0"
27975 [(set_attr "length" "3")])
27976
27977 (define_insn "umwait_rex64"
27978 [(set (reg:CCC FLAGS_REG)
27979 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
27980 (match_operand:SI 1 "register_operand" "a")
27981 (match_operand:SI 2 "register_operand" "d")]
27982 UNSPECV_UMWAIT))]
27983 "TARGET_64BIT && TARGET_WAITPKG"
27984 "umwait\t%0"
27985 [(set_attr "length" "3")])
27986
27987 (define_insn "@umonitor_<mode>"
27988 [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
27989 UNSPECV_UMONITOR)]
27990 "TARGET_WAITPKG"
27991 "umonitor\t%0"
27992 [(set (attr "length")
27993 (symbol_ref ("(Pmode != word_mode) + 3")))])
27994
27995 (define_insn "tpause"
27996 [(set (reg:CCC FLAGS_REG)
27997 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
27998 (match_operand:DI 1 "register_operand" "A")]
27999 UNSPECV_TPAUSE))]
28000 "!TARGET_64BIT && TARGET_WAITPKG"
28001 "tpause\t%0"
28002 [(set_attr "length" "3")])
28003
28004 (define_insn "tpause_rex64"
28005 [(set (reg:CCC FLAGS_REG)
28006 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
28007 (match_operand:SI 1 "register_operand" "a")
28008 (match_operand:SI 2 "register_operand" "d")]
28009 UNSPECV_TPAUSE))]
28010 "TARGET_64BIT && TARGET_WAITPKG"
28011 "tpause\t%0"
28012 [(set_attr "length" "3")])
28013
28014 (define_insn "cldemote"
28015 [(unspec_volatile[(match_operand 0 "address_operand" "p")]
28016 UNSPECV_CLDEMOTE)]
28017 "TARGET_CLDEMOTE"
28018 "cldemote\t%a0"
28019 [(set_attr "type" "other")
28020 (set_attr "memory" "unknown")])
28021
28022 (define_insn "speculation_barrier"
28023 [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)]
28024 ""
28025 "lfence"
28026 [(set_attr "type" "other")
28027 (set_attr "length" "3")])
28028
28029 (define_insn "serialize"
28030 [(unspec_volatile [(const_int 0)] UNSPECV_SERIALIZE)]
28031 "TARGET_SERIALIZE"
28032 "serialize"
28033 [(set_attr "type" "other")
28034 (set_attr "length" "3")])
28035
28036 (define_insn "patchable_area"
28037 [(unspec_volatile [(match_operand 0 "const_int_operand")
28038 (match_operand 1 "const_int_operand")]
28039 UNSPECV_PATCHABLE_AREA)]
28040 ""
28041 {
28042 ix86_output_patchable_area (INTVAL (operands[0]),
28043 INTVAL (operands[1]) != 0);
28044 return "";
28045 }
28046 [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
28047 (set_attr "length_immediate" "0")
28048 (set_attr "modrm" "0")])
28049
28050 (define_insn "hreset"
28051 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")]
28052 UNSPECV_HRESET)]
28053 "TARGET_HRESET"
28054 "hreset\t{$0|0}"
28055 [(set_attr "type" "other")
28056 (set_attr "length" "4")])
28057
28058 ;; Spaceship optimization
28059 (define_expand "spaceship<mode>3"
28060 [(match_operand:SI 0 "register_operand")
28061 (match_operand:MODEF 1 "cmp_fp_expander_operand")
28062 (match_operand:MODEF 2 "cmp_fp_expander_operand")]
28063 "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
28064 && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
28065 {
28066 ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
28067 DONE;
28068 })
28069
28070 (define_expand "spaceshipxf3"
28071 [(match_operand:SI 0 "register_operand")
28072 (match_operand:XF 1 "nonmemory_operand")
28073 (match_operand:XF 2 "nonmemory_operand")]
28074 "TARGET_80387 && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
28075 {
28076 ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
28077 DONE;
28078 })
28079
28080 ;; Defined because the generic expand_builtin_issignaling for XFmode
28081 ;; only tests for sNaNs, but i387 treats also pseudo numbers as always
28082 ;; signaling.
28083 (define_expand "issignalingxf2"
28084 [(match_operand:SI 0 "register_operand")
28085 (match_operand:XF 1 "general_operand")]
28086 ""
28087 {
28088 rtx temp = operands[1];
28089 if (!MEM_P (temp))
28090 {
28091 rtx mem = assign_stack_temp (XFmode, GET_MODE_SIZE (XFmode));
28092 emit_move_insn (mem, temp);
28093 temp = mem;
28094 }
28095 rtx ex = adjust_address (temp, HImode, 8);
28096 rtx hi = adjust_address (temp, SImode, 4);
28097 rtx lo = adjust_address (temp, SImode, 0);
28098 rtx val = GEN_INT (HOST_WIDE_INT_M1U << 30);
28099 rtx mask = GEN_INT (0x7fff);
28100 rtx bit = GEN_INT (HOST_WIDE_INT_1U << 30);
28101 /* Expand to:
28102 ((ex & mask) && (int) hi >= 0)
28103 || ((ex & mask) == mask && ((hi ^ bit) | ((lo | -lo) >> 31)) > val). */
28104 rtx nlo = expand_unop (SImode, neg_optab, lo, NULL_RTX, 0);
28105 lo = expand_binop (SImode, ior_optab, lo, nlo,
28106 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28107 lo = expand_shift (RSHIFT_EXPR, SImode, lo, 31, NULL_RTX, 1);
28108 temp = expand_binop (SImode, xor_optab, hi, bit,
28109 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28110 temp = expand_binop (SImode, ior_optab, temp, lo,
28111 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28112 temp = emit_store_flag_force (gen_reg_rtx (SImode), GTU, temp, val,
28113 SImode, 1, 1);
28114 ex = expand_binop (HImode, and_optab, ex, mask,
28115 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28116 rtx temp2 = emit_store_flag_force (gen_reg_rtx (SImode), NE,
28117 ex, const0_rtx, SImode, 1, 1);
28118 ex = emit_store_flag_force (gen_reg_rtx (SImode), EQ,
28119 ex, mask, HImode, 1, 1);
28120 temp = expand_binop (SImode, and_optab, temp, ex,
28121 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28122 rtx temp3 = emit_store_flag_force (gen_reg_rtx (SImode), GE,
28123 hi, const0_rtx, SImode, 0, 1);
28124 temp2 = expand_binop (SImode, and_optab, temp2, temp3,
28125 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28126 temp = expand_binop (SImode, ior_optab, temp, temp2,
28127 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28128 emit_move_insn (operands[0], temp);
28129 DONE;
28130 })
28131
28132 (define_insn "urdmsr"
28133 [(set (match_operand:DI 0 "register_operand" "=r")
28134 (unspec_volatile:DI
28135 [(match_operand:DI 1 "x86_64_szext_nonmemory_operand" "reZ")]
28136 UNSPECV_URDMSR))]
28137 "TARGET_USER_MSR && TARGET_64BIT"
28138 "urdmsr\t{%1, %0|%0, %1}"
28139 [(set_attr "prefix" "vex")
28140 (set_attr "type" "other")])
28141
28142 (define_insn "uwrmsr"
28143 [(unspec_volatile
28144 [(match_operand:DI 0 "x86_64_szext_nonmemory_operand" "reZ")
28145 (match_operand:DI 1 "register_operand" "r")]
28146 UNSPECV_UWRMSR)]
28147 "TARGET_USER_MSR && TARGET_64BIT"
28148 "uwrmsr\t{%1, %0|%0, %1}"
28149 [(set_attr "prefix" "vex")
28150 (set_attr "type" "other")])
28151
28152 (include "mmx.md")
28153 (include "sse.md")
28154 (include "sync.md")