1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
71 struct processor_costs size_cost
= { /* costs for tuning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123 /* Processor costs (relative to an add) */
125 struct processor_costs i386_cost
= { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
178 struct processor_costs i486_cost
= { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
231 struct processor_costs pentium_cost
= {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
284 struct processor_costs pentiumpro_cost
= {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
337 struct processor_costs k6_cost
= {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (2), /* cost of a lea instruction */
340 COSTS_N_INSNS (1), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (3), /* HI */
344 COSTS_N_INSNS (3), /* SI */
345 COSTS_N_INSNS (3), /* DI */
346 COSTS_N_INSNS (3)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (18), /* HI */
350 COSTS_N_INSNS (18), /* SI */
351 COSTS_N_INSNS (18), /* DI */
352 COSTS_N_INSNS (18)}, /* other */
353 COSTS_N_INSNS (2), /* cost of movsx */
354 COSTS_N_INSNS (2), /* cost of movzx */
355 8, /* "large" insn */
357 3, /* cost for loading QImode using movzbl */
358 {4, 5, 4}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {2, 3, 2}, /* cost of storing integer registers */
362 4, /* cost of reg,reg fld/fst */
363 {6, 6, 6}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 4, 4}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
367 2, /* cost of moving MMX register */
368 {2, 2}, /* cost of loading MMX registers
369 in SImode and DImode */
370 {2, 2}, /* cost of storing MMX registers
371 in SImode and DImode */
372 2, /* cost of moving SSE register */
373 {2, 2, 8}, /* cost of loading SSE registers
374 in SImode, DImode and TImode */
375 {2, 2, 8}, /* cost of storing SSE registers
376 in SImode, DImode and TImode */
377 6, /* MMX or SSE register to integer */
378 32, /* size of prefetch block */
379 1, /* number of parallel prefetches */
381 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
382 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
383 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
384 COSTS_N_INSNS (2), /* cost of FABS instruction. */
385 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
386 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
390 struct processor_costs athlon_cost
= {
391 COSTS_N_INSNS (1), /* cost of an add instruction */
392 COSTS_N_INSNS (2), /* cost of a lea instruction */
393 COSTS_N_INSNS (1), /* variable shift costs */
394 COSTS_N_INSNS (1), /* constant shift costs */
395 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
396 COSTS_N_INSNS (5), /* HI */
397 COSTS_N_INSNS (5), /* SI */
398 COSTS_N_INSNS (5), /* DI */
399 COSTS_N_INSNS (5)}, /* other */
400 0, /* cost of multiply per each bit set */
401 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
402 COSTS_N_INSNS (26), /* HI */
403 COSTS_N_INSNS (42), /* SI */
404 COSTS_N_INSNS (74), /* DI */
405 COSTS_N_INSNS (74)}, /* other */
406 COSTS_N_INSNS (1), /* cost of movsx */
407 COSTS_N_INSNS (1), /* cost of movzx */
408 8, /* "large" insn */
410 4, /* cost for loading QImode using movzbl */
411 {3, 4, 3}, /* cost of loading integer registers
412 in QImode, HImode and SImode.
413 Relative to reg-reg move (2). */
414 {3, 4, 3}, /* cost of storing integer registers */
415 4, /* cost of reg,reg fld/fst */
416 {4, 4, 12}, /* cost of loading fp registers
417 in SFmode, DFmode and XFmode */
418 {6, 6, 8}, /* cost of storing fp registers
419 in SFmode, DFmode and XFmode */
420 2, /* cost of moving MMX register */
421 {4, 4}, /* cost of loading MMX registers
422 in SImode and DImode */
423 {4, 4}, /* cost of storing MMX registers
424 in SImode and DImode */
425 2, /* cost of moving SSE register */
426 {4, 4, 6}, /* cost of loading SSE registers
427 in SImode, DImode and TImode */
428 {4, 4, 5}, /* cost of storing SSE registers
429 in SImode, DImode and TImode */
430 5, /* MMX or SSE register to integer */
431 64, /* size of prefetch block */
432 6, /* number of parallel prefetches */
434 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
435 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
436 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
437 COSTS_N_INSNS (2), /* cost of FABS instruction. */
438 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
439 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
443 struct processor_costs k8_cost
= {
444 COSTS_N_INSNS (1), /* cost of an add instruction */
445 COSTS_N_INSNS (2), /* cost of a lea instruction */
446 COSTS_N_INSNS (1), /* variable shift costs */
447 COSTS_N_INSNS (1), /* constant shift costs */
448 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
449 COSTS_N_INSNS (4), /* HI */
450 COSTS_N_INSNS (3), /* SI */
451 COSTS_N_INSNS (4), /* DI */
452 COSTS_N_INSNS (5)}, /* other */
453 0, /* cost of multiply per each bit set */
454 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
455 COSTS_N_INSNS (26), /* HI */
456 COSTS_N_INSNS (42), /* SI */
457 COSTS_N_INSNS (74), /* DI */
458 COSTS_N_INSNS (74)}, /* other */
459 COSTS_N_INSNS (1), /* cost of movsx */
460 COSTS_N_INSNS (1), /* cost of movzx */
461 8, /* "large" insn */
463 4, /* cost for loading QImode using movzbl */
464 {3, 4, 3}, /* cost of loading integer registers
465 in QImode, HImode and SImode.
466 Relative to reg-reg move (2). */
467 {3, 4, 3}, /* cost of storing integer registers */
468 4, /* cost of reg,reg fld/fst */
469 {4, 4, 12}, /* cost of loading fp registers
470 in SFmode, DFmode and XFmode */
471 {6, 6, 8}, /* cost of storing fp registers
472 in SFmode, DFmode and XFmode */
473 2, /* cost of moving MMX register */
474 {3, 3}, /* cost of loading MMX registers
475 in SImode and DImode */
476 {4, 4}, /* cost of storing MMX registers
477 in SImode and DImode */
478 2, /* cost of moving SSE register */
479 {4, 3, 6}, /* cost of loading SSE registers
480 in SImode, DImode and TImode */
481 {4, 4, 5}, /* cost of storing SSE registers
482 in SImode, DImode and TImode */
483 5, /* MMX or SSE register to integer */
484 64, /* size of prefetch block */
485 6, /* number of parallel prefetches */
487 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (2), /* cost of FABS instruction. */
491 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
496 struct processor_costs pentium4_cost
= {
497 COSTS_N_INSNS (1), /* cost of an add instruction */
498 COSTS_N_INSNS (3), /* cost of a lea instruction */
499 COSTS_N_INSNS (4), /* variable shift costs */
500 COSTS_N_INSNS (4), /* constant shift costs */
501 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
502 COSTS_N_INSNS (15), /* HI */
503 COSTS_N_INSNS (15), /* SI */
504 COSTS_N_INSNS (15), /* DI */
505 COSTS_N_INSNS (15)}, /* other */
506 0, /* cost of multiply per each bit set */
507 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
508 COSTS_N_INSNS (56), /* HI */
509 COSTS_N_INSNS (56), /* SI */
510 COSTS_N_INSNS (56), /* DI */
511 COSTS_N_INSNS (56)}, /* other */
512 COSTS_N_INSNS (1), /* cost of movsx */
513 COSTS_N_INSNS (1), /* cost of movzx */
514 16, /* "large" insn */
516 2, /* cost for loading QImode using movzbl */
517 {4, 5, 4}, /* cost of loading integer registers
518 in QImode, HImode and SImode.
519 Relative to reg-reg move (2). */
520 {2, 3, 2}, /* cost of storing integer registers */
521 2, /* cost of reg,reg fld/fst */
522 {2, 2, 6}, /* cost of loading fp registers
523 in SFmode, DFmode and XFmode */
524 {4, 4, 6}, /* cost of storing fp registers
525 in SFmode, DFmode and XFmode */
526 2, /* cost of moving MMX register */
527 {2, 2}, /* cost of loading MMX registers
528 in SImode and DImode */
529 {2, 2}, /* cost of storing MMX registers
530 in SImode and DImode */
531 12, /* cost of moving SSE register */
532 {12, 12, 12}, /* cost of loading SSE registers
533 in SImode, DImode and TImode */
534 {2, 2, 8}, /* cost of storing SSE registers
535 in SImode, DImode and TImode */
536 10, /* MMX or SSE register to integer */
537 64, /* size of prefetch block */
538 6, /* number of parallel prefetches */
540 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
541 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
542 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
543 COSTS_N_INSNS (2), /* cost of FABS instruction. */
544 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
545 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
549 struct processor_costs nocona_cost
= {
550 COSTS_N_INSNS (1), /* cost of an add instruction */
551 COSTS_N_INSNS (1), /* cost of a lea instruction */
552 COSTS_N_INSNS (1), /* variable shift costs */
553 COSTS_N_INSNS (1), /* constant shift costs */
554 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
555 COSTS_N_INSNS (10), /* HI */
556 COSTS_N_INSNS (10), /* SI */
557 COSTS_N_INSNS (10), /* DI */
558 COSTS_N_INSNS (10)}, /* other */
559 0, /* cost of multiply per each bit set */
560 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
561 COSTS_N_INSNS (66), /* HI */
562 COSTS_N_INSNS (66), /* SI */
563 COSTS_N_INSNS (66), /* DI */
564 COSTS_N_INSNS (66)}, /* other */
565 COSTS_N_INSNS (1), /* cost of movsx */
566 COSTS_N_INSNS (1), /* cost of movzx */
567 16, /* "large" insn */
569 4, /* cost for loading QImode using movzbl */
570 {4, 4, 4}, /* cost of loading integer registers
571 in QImode, HImode and SImode.
572 Relative to reg-reg move (2). */
573 {4, 4, 4}, /* cost of storing integer registers */
574 3, /* cost of reg,reg fld/fst */
575 {12, 12, 12}, /* cost of loading fp registers
576 in SFmode, DFmode and XFmode */
577 {4, 4, 4}, /* cost of storing fp registers
578 in SFmode, DFmode and XFmode */
579 6, /* cost of moving MMX register */
580 {12, 12}, /* cost of loading MMX registers
581 in SImode and DImode */
582 {12, 12}, /* cost of storing MMX registers
583 in SImode and DImode */
584 6, /* cost of moving SSE register */
585 {12, 12, 12}, /* cost of loading SSE registers
586 in SImode, DImode and TImode */
587 {12, 12, 12}, /* cost of storing SSE registers
588 in SImode, DImode and TImode */
589 8, /* MMX or SSE register to integer */
590 128, /* size of prefetch block */
591 8, /* number of parallel prefetches */
593 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
594 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
595 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
596 COSTS_N_INSNS (3), /* cost of FABS instruction. */
597 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
598 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
601 /* Generic64 should produce code tuned for Nocona and K8. */
603 struct processor_costs generic64_cost
= {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 /* On all chips taken into consideration lea is 2 cycles and more. With
606 this cost however our current implementation of synth_mult results in
607 use of unnecessary temporary registers causing regression on several
608 SPECfp benchmarks. */
609 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (2)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (26), /* HI */
620 COSTS_N_INSNS (42), /* SI */
621 COSTS_N_INSNS (74), /* DI */
622 COSTS_N_INSNS (74)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {4, 4, 4}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {4, 4, 4}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {12, 12, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {8, 8}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {8, 8}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {8, 8, 8}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {8, 8, 8}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 5, /* MMX or SSE register to integer */
648 64, /* size of prefetch block */
649 6, /* number of parallel prefetches */
650 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
651 is increased to perhaps more appropriate value of 5. */
653 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (8), /* cost of FABS instruction. */
657 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
661 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
663 struct processor_costs generic32_cost
= {
664 COSTS_N_INSNS (1), /* cost of an add instruction */
665 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
666 COSTS_N_INSNS (1), /* variable shift costs */
667 COSTS_N_INSNS (1), /* constant shift costs */
668 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
669 COSTS_N_INSNS (4), /* HI */
670 COSTS_N_INSNS (3), /* SI */
671 COSTS_N_INSNS (4), /* DI */
672 COSTS_N_INSNS (2)}, /* other */
673 0, /* cost of multiply per each bit set */
674 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
675 COSTS_N_INSNS (26), /* HI */
676 COSTS_N_INSNS (42), /* SI */
677 COSTS_N_INSNS (74), /* DI */
678 COSTS_N_INSNS (74)}, /* other */
679 COSTS_N_INSNS (1), /* cost of movsx */
680 COSTS_N_INSNS (1), /* cost of movzx */
681 8, /* "large" insn */
683 4, /* cost for loading QImode using movzbl */
684 {4, 4, 4}, /* cost of loading integer registers
685 in QImode, HImode and SImode.
686 Relative to reg-reg move (2). */
687 {4, 4, 4}, /* cost of storing integer registers */
688 4, /* cost of reg,reg fld/fst */
689 {12, 12, 12}, /* cost of loading fp registers
690 in SFmode, DFmode and XFmode */
691 {6, 6, 8}, /* cost of storing fp registers
692 in SFmode, DFmode and XFmode */
693 2, /* cost of moving MMX register */
694 {8, 8}, /* cost of loading MMX registers
695 in SImode and DImode */
696 {8, 8}, /* cost of storing MMX registers
697 in SImode and DImode */
698 2, /* cost of moving SSE register */
699 {8, 8, 8}, /* cost of loading SSE registers
700 in SImode, DImode and TImode */
701 {8, 8, 8}, /* cost of storing SSE registers
702 in SImode, DImode and TImode */
703 5, /* MMX or SSE register to integer */
704 64, /* size of prefetch block */
705 6, /* number of parallel prefetches */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
715 const struct processor_costs
*ix86_cost
= &pentium_cost
;
717 /* Processor feature/optimization bitmasks. */
718 #define m_386 (1<<PROCESSOR_I386)
719 #define m_486 (1<<PROCESSOR_I486)
720 #define m_PENT (1<<PROCESSOR_PENTIUM)
721 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
722 #define m_K6 (1<<PROCESSOR_K6)
723 #define m_ATHLON (1<<PROCESSOR_ATHLON)
724 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
725 #define m_K8 (1<<PROCESSOR_K8)
726 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
727 #define m_NOCONA (1<<PROCESSOR_NOCONA)
728 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
729 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
730 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
732 /* Generic instruction choice should be common subset of supported CPUs
733 (PPro/PENT4/NOCONA/Athlon/K8). */
735 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
736 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
737 generic because it is not working well with PPro base chips. */
738 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON_K8
| m_GENERIC64
;
739 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
740 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
741 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
| m_GENERIC
/* m_386 | m_K6 */;
742 const int x86_double_with_add
= ~m_386
;
743 const int x86_use_bit_test
= m_386
;
744 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
| m_GENERIC
;
745 const int x86_cmove
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
746 const int x86_fisttp
= m_NOCONA
;
747 const int x86_3dnow_a
= m_ATHLON_K8
;
748 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
749 /* Branch hints were put in P4 based on simulation result. But
750 after P4 was made, no performance benefit was observed with
751 branch hints. It also increases the code size. As the result,
752 icc never generates branch hints. */
753 const int x86_branch_hints
= 0;
754 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
| m_NOCONA
| m_GENERIC32
; /*m_GENERIC | m_ATHLON_K8 ? */
755 /* We probably ought to watch for partial register stalls on Generic32
756 compilation setting as well. However in current implementation the
757 partial register stalls are not eliminated very well - they can
758 be introduced via subregs synthesized by combine and can happen
759 in caller/callee saving sequences.
760 Because this option pays back little on PPro based chips and is in conflict
761 with partial reg. dependencies used by Athlon/P4 based chips, it is better
762 to leave it off for generic32 for now. */
763 const int x86_partial_reg_stall
= m_PPRO
;
764 const int x86_use_himode_fiop
= m_386
| m_486
| m_K6
;
765 const int x86_use_simode_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
| m_GENERIC
);
766 const int x86_use_mov0
= m_K6
;
767 const int x86_use_cltd
= ~(m_PENT
| m_K6
| m_GENERIC
);
768 const int x86_read_modify_write
= ~m_PENT
;
769 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
770 const int x86_split_long_moves
= m_PPRO
;
771 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
| m_GENERIC
; /* m_PENT4 ? */
772 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
773 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
774 const int x86_qimode_math
= ~(0);
775 const int x86_promote_qi_regs
= 0;
776 /* On PPro this flag is meant to avoid partial register stalls. Just like
777 the x86_partial_reg_stall this option might be considered for Generic32
778 if our scheme for avoiding partial stalls was more effective. */
779 const int x86_himode_math
= ~(m_PPRO
);
780 const int x86_promote_hi_regs
= m_PPRO
;
781 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
| m_GENERIC
;
782 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
| m_NOCONA
| m_GENERIC
;
783 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6
| m_PENT4
| m_NOCONA
| m_GENERIC
;
784 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
| m_NOCONA
| m_GENERIC
;
785 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC
);
786 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
787 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
788 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC
;
789 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
| m_GENERIC
;
790 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
| m_GENERIC
;
791 const int x86_shift1
= ~m_486
;
792 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
793 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
794 that thread 128bit SSE registers as single units versus K8 based chips that
795 divide SSE registers to two 64bit halves.
796 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
797 to allow register renaming on 128bit SSE units, but usually results in one
798 extra microop on 64bit SSE units. Experimental results shows that disabling
799 this option on P4 brings over 20% SPECfp regression, while enabling it on
800 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
802 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC
;
803 /* Set for machines where the type and dependencies are resolved on SSE
804 register parts instead of whole registers, so we may maintain just
805 lower part of scalar values in proper format leaving the upper part
807 const int x86_sse_split_regs
= m_ATHLON_K8
;
808 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
809 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
810 const int x86_use_ffreep
= m_ATHLON_K8
;
811 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6
;
812 const int x86_use_incdec
= ~(m_PENT4
| m_NOCONA
| m_GENERIC
);
814 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
815 integer data in xmm registers. Which results in pretty abysmal code. */
816 const int x86_inter_unit_moves
= 0 /* ~(m_ATHLON_K8) */;
818 const int x86_ext_80387_constants
= m_K6
| m_ATHLON
| m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC32
;
819 /* Some CPU cores are not able to predict more than 4 branch instructions in
820 the 16 byte window. */
821 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
822 const int x86_schedule
= m_PPRO
| m_ATHLON_K8
| m_K6
| m_PENT
| m_GENERIC
;
823 const int x86_use_bt
= m_ATHLON_K8
;
824 /* Compare and exchange was added for 80486. */
825 const int x86_cmpxchg
= ~m_386
;
826 /* Compare and exchange 8 bytes was added for pentium. */
827 const int x86_cmpxchg8b
= ~(m_386
| m_486
);
828 /* Compare and exchange 16 bytes was added for nocona. */
829 const int x86_cmpxchg16b
= m_NOCONA
;
830 /* Exchange and add was added for 80486. */
831 const int x86_xadd
= ~m_386
;
832 const int x86_pad_returns
= m_ATHLON_K8
| m_GENERIC
;
834 /* In case the average insn count for single function invocation is
835 lower than this constant, emit fast (but longer) prologue and
837 #define FAST_PROLOGUE_INSN_COUNT 20
839 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
840 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
841 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
842 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
844 /* Array of the smallest class containing reg number REGNO, indexed by
845 REGNO. Used by REGNO_REG_CLASS in i386.h. */
847 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
850 AREG
, DREG
, CREG
, BREG
,
852 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
854 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
855 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
858 /* flags, fpsr, dirflag, frame */
859 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
860 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
862 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
864 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
865 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
866 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
870 /* The "default" register map used in 32bit mode. */
872 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
874 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
875 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
876 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
877 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
878 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
879 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
880 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
883 static int const x86_64_int_parameter_registers
[6] =
885 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
886 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
889 static int const x86_64_int_return_registers
[4] =
891 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
894 /* The "default" register map used in 64bit mode. */
895 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
897 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
898 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
899 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
900 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
901 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
902 8,9,10,11,12,13,14,15, /* extended integer registers */
903 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
906 /* Define the register numbers to be used in Dwarf debugging information.
907 The SVR4 reference port C compiler uses the following register numbers
908 in its Dwarf output code:
909 0 for %eax (gcc regno = 0)
910 1 for %ecx (gcc regno = 2)
911 2 for %edx (gcc regno = 1)
912 3 for %ebx (gcc regno = 3)
913 4 for %esp (gcc regno = 7)
914 5 for %ebp (gcc regno = 6)
915 6 for %esi (gcc regno = 4)
916 7 for %edi (gcc regno = 5)
917 The following three DWARF register numbers are never generated by
918 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
919 believes these numbers have these meanings.
920 8 for %eip (no gcc equivalent)
921 9 for %eflags (gcc regno = 17)
922 10 for %trapno (no gcc equivalent)
923 It is not at all clear how we should number the FP stack registers
924 for the x86 architecture. If the version of SDB on x86/svr4 were
925 a bit less brain dead with respect to floating-point then we would
926 have a precedent to follow with respect to DWARF register numbers
927 for x86 FP registers, but the SDB on x86/svr4 is so completely
928 broken with respect to FP registers that it is hardly worth thinking
929 of it as something to strive for compatibility with.
930 The version of x86/svr4 SDB I have at the moment does (partially)
931 seem to believe that DWARF register number 11 is associated with
932 the x86 register %st(0), but that's about all. Higher DWARF
933 register numbers don't seem to be associated with anything in
934 particular, and even for DWARF regno 11, SDB only seems to under-
935 stand that it should say that a variable lives in %st(0) (when
936 asked via an `=' command) if we said it was in DWARF regno 11,
937 but SDB still prints garbage when asked for the value of the
938 variable in question (via a `/' command).
939 (Also note that the labels SDB prints for various FP stack regs
940 when doing an `x' command are all wrong.)
941 Note that these problems generally don't affect the native SVR4
942 C compiler because it doesn't allow the use of -O with -g and
943 because when it is *not* optimizing, it allocates a memory
944 location for each floating-point variable, and the memory
945 location is what gets described in the DWARF AT_location
946 attribute for the variable in question.
947 Regardless of the severe mental illness of the x86/svr4 SDB, we
948 do something sensible here and we use the following DWARF
949 register numbers. Note that these are all stack-top-relative
951 11 for %st(0) (gcc regno = 8)
952 12 for %st(1) (gcc regno = 9)
953 13 for %st(2) (gcc regno = 10)
954 14 for %st(3) (gcc regno = 11)
955 15 for %st(4) (gcc regno = 12)
956 16 for %st(5) (gcc regno = 13)
957 17 for %st(6) (gcc regno = 14)
958 18 for %st(7) (gcc regno = 15)
960 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
962 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
963 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
964 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
965 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
966 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
967 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
968 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
971 /* Test and compare insns in i386.md store the information needed to
972 generate branch and scc insns here. */
974 rtx ix86_compare_op0
= NULL_RTX
;
975 rtx ix86_compare_op1
= NULL_RTX
;
976 rtx ix86_compare_emitted
= NULL_RTX
;
978 /* Size of the register save area. */
979 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
981 /* Define the structure for the machine field in struct function. */
983 struct stack_local_entry
GTY(())
988 struct stack_local_entry
*next
;
991 /* Structure describing stack frame layout.
992 Stack grows downward:
998 saved frame pointer if frame_pointer_needed
999 <- HARD_FRAME_POINTER
1004 [va_arg registers] (
1005 > to_allocate <- FRAME_POINTER
1015 HOST_WIDE_INT frame
;
1017 int outgoing_arguments_size
;
1020 HOST_WIDE_INT to_allocate
;
1021 /* The offsets relative to ARG_POINTER. */
1022 HOST_WIDE_INT frame_pointer_offset
;
1023 HOST_WIDE_INT hard_frame_pointer_offset
;
1024 HOST_WIDE_INT stack_pointer_offset
;
1026 /* When save_regs_using_mov is set, emit prologue using
1027 move instead of push instructions. */
1028 bool save_regs_using_mov
;
1031 /* Code model option. */
1032 enum cmodel ix86_cmodel
;
1034 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1036 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1038 /* Which unit we are generating floating point math for. */
1039 enum fpmath_unit ix86_fpmath
;
1041 /* Which cpu are we scheduling for. */
1042 enum processor_type ix86_tune
;
1043 /* Which instruction set architecture to use. */
1044 enum processor_type ix86_arch
;
1046 /* true if sse prefetch instruction is not NOOP. */
1047 int x86_prefetch_sse
;
1049 /* ix86_regparm_string as a number */
1050 static int ix86_regparm
;
1052 /* -mstackrealign option */
1053 extern int ix86_force_align_arg_pointer
;
1054 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1056 /* Preferred alignment for stack boundary in bits. */
1057 unsigned int ix86_preferred_stack_boundary
;
1059 /* Values 1-5: see jump.c */
1060 int ix86_branch_cost
;
1062 /* Variables which are this size or smaller are put in the data/bss
1063 or ldata/lbss sections. */
1065 int ix86_section_threshold
= 65536;
1067 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1068 char internal_label_prefix
[16];
1069 int internal_label_prefix_len
;
1071 static bool ix86_handle_option (size_t, const char *, int);
1072 static void output_pic_addr_const (FILE *, rtx
, int);
1073 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1075 static const char *get_some_local_dynamic_name (void);
1076 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1077 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1078 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1080 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1081 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1083 static rtx
get_thread_pointer (int);
1084 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1085 static void get_pc_thunk_name (char [32], unsigned int);
1086 static rtx
gen_push (rtx
);
1087 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1088 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1089 static struct machine_function
* ix86_init_machine_status (void);
1090 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1091 static int ix86_nsaved_regs (void);
1092 static void ix86_emit_save_regs (void);
1093 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1094 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1095 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1096 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1097 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1098 static rtx
ix86_expand_aligntest (rtx
, int);
1099 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1100 static int ix86_issue_rate (void);
1101 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1102 static int ia32_multipass_dfa_lookahead (void);
1103 static void ix86_init_mmx_sse_builtins (void);
1104 static rtx
x86_this_parameter (tree
);
1105 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1106 HOST_WIDE_INT
, tree
);
1107 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1108 static void x86_file_start (void);
1109 static void ix86_reorg (void);
1110 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1111 static tree
ix86_build_builtin_va_list (void);
1112 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1114 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1115 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1116 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1118 static int ix86_address_cost (rtx
);
1119 static bool ix86_cannot_force_const_mem (rtx
);
1120 static rtx
ix86_delegitimize_address (rtx
);
1122 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1124 struct builtin_description
;
1125 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1127 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1129 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1130 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1131 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1132 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1133 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1134 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1135 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1136 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1137 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1138 static int ix86_fp_comparison_cost (enum rtx_code code
);
1139 static unsigned int ix86_select_alt_pic_regnum (void);
1140 static int ix86_save_reg (unsigned int, int);
1141 static void ix86_compute_frame_layout (struct ix86_frame
*);
1142 static int ix86_comp_type_attributes (tree
, tree
);
1143 static int ix86_function_regparm (tree
, tree
);
1144 const struct attribute_spec ix86_attribute_table
[];
1145 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1146 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1147 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1148 static bool contains_128bit_aligned_vector_p (tree
);
1149 static rtx
ix86_struct_value_rtx (tree
, int);
1150 static bool ix86_ms_bitfield_layout_p (tree
);
1151 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1152 static int extended_reg_mentioned_1 (rtx
*, void *);
1153 static bool ix86_rtx_costs (rtx
, int, int, int *);
1154 static int min_insn_size (rtx
);
1155 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1156 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1157 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1159 static void ix86_init_builtins (void);
1160 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1161 static const char *ix86_mangle_fundamental_type (tree
);
1162 static tree
ix86_stack_protect_fail (void);
1163 static rtx
ix86_internal_arg_pointer (void);
1164 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1166 /* This function is only used on Solaris. */
1167 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1170 /* Register class used for passing given 64bit part of the argument.
1171 These represent classes as documented by the PS ABI, with the exception
1172 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1173 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1175 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1176 whenever possible (upper half does contain padding).
1178 enum x86_64_reg_class
1181 X86_64_INTEGER_CLASS
,
1182 X86_64_INTEGERSI_CLASS
,
1189 X86_64_COMPLEX_X87_CLASS
,
1192 static const char * const x86_64_reg_class_name
[] = {
1193 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1194 "sseup", "x87", "x87up", "cplx87", "no"
1197 #define MAX_CLASSES 4
1199 /* Table of constants used by fldpi, fldln2, etc.... */
1200 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1201 static bool ext_80387_constants_init
= 0;
1202 static void init_ext_80387_constants (void);
1203 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1204 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1205 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1206 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1207 unsigned HOST_WIDE_INT align
)
1210 /* Initialize the GCC target structure. */
1211 #undef TARGET_ATTRIBUTE_TABLE
1212 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1213 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1214 # undef TARGET_MERGE_DECL_ATTRIBUTES
1215 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1218 #undef TARGET_COMP_TYPE_ATTRIBUTES
1219 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1221 #undef TARGET_INIT_BUILTINS
1222 #define TARGET_INIT_BUILTINS ix86_init_builtins
1223 #undef TARGET_EXPAND_BUILTIN
1224 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1226 #undef TARGET_ASM_FUNCTION_EPILOGUE
1227 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1229 #undef TARGET_ENCODE_SECTION_INFO
1230 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1231 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1233 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1236 #undef TARGET_ASM_OPEN_PAREN
1237 #define TARGET_ASM_OPEN_PAREN ""
1238 #undef TARGET_ASM_CLOSE_PAREN
1239 #define TARGET_ASM_CLOSE_PAREN ""
1241 #undef TARGET_ASM_ALIGNED_HI_OP
1242 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1243 #undef TARGET_ASM_ALIGNED_SI_OP
1244 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1246 #undef TARGET_ASM_ALIGNED_DI_OP
1247 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1250 #undef TARGET_ASM_UNALIGNED_HI_OP
1251 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1252 #undef TARGET_ASM_UNALIGNED_SI_OP
1253 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1254 #undef TARGET_ASM_UNALIGNED_DI_OP
1255 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1257 #undef TARGET_SCHED_ADJUST_COST
1258 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1259 #undef TARGET_SCHED_ISSUE_RATE
1260 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1261 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1262 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1263 ia32_multipass_dfa_lookahead
1265 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1266 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1269 #undef TARGET_HAVE_TLS
1270 #define TARGET_HAVE_TLS true
1272 #undef TARGET_CANNOT_FORCE_CONST_MEM
1273 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1274 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1275 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1277 #undef TARGET_DELEGITIMIZE_ADDRESS
1278 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1280 #undef TARGET_MS_BITFIELD_LAYOUT_P
1281 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1284 #undef TARGET_BINDS_LOCAL_P
1285 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1288 #undef TARGET_ASM_OUTPUT_MI_THUNK
1289 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1290 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1291 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1293 #undef TARGET_ASM_FILE_START
1294 #define TARGET_ASM_FILE_START x86_file_start
1296 #undef TARGET_DEFAULT_TARGET_FLAGS
1297 #define TARGET_DEFAULT_TARGET_FLAGS \
1299 | TARGET_64BIT_DEFAULT \
1300 | TARGET_SUBTARGET_DEFAULT \
1301 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1303 #undef TARGET_HANDLE_OPTION
1304 #define TARGET_HANDLE_OPTION ix86_handle_option
1306 #undef TARGET_RTX_COSTS
1307 #define TARGET_RTX_COSTS ix86_rtx_costs
1308 #undef TARGET_ADDRESS_COST
1309 #define TARGET_ADDRESS_COST ix86_address_cost
1311 #undef TARGET_FIXED_CONDITION_CODE_REGS
1312 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1313 #undef TARGET_CC_MODES_COMPATIBLE
1314 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1316 #undef TARGET_MACHINE_DEPENDENT_REORG
1317 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1319 #undef TARGET_BUILD_BUILTIN_VA_LIST
1320 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1322 #undef TARGET_MD_ASM_CLOBBERS
1323 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1325 #undef TARGET_PROMOTE_PROTOTYPES
1326 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1327 #undef TARGET_STRUCT_VALUE_RTX
1328 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1329 #undef TARGET_SETUP_INCOMING_VARARGS
1330 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1331 #undef TARGET_MUST_PASS_IN_STACK
1332 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1333 #undef TARGET_PASS_BY_REFERENCE
1334 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1335 #undef TARGET_INTERNAL_ARG_POINTER
1336 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1337 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1338 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1340 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1341 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1343 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1344 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1346 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1347 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1350 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1351 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1354 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1355 #undef TARGET_INSERT_ATTRIBUTES
1356 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1359 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1360 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1362 #undef TARGET_STACK_PROTECT_FAIL
1363 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1365 #undef TARGET_FUNCTION_VALUE
1366 #define TARGET_FUNCTION_VALUE ix86_function_value
1368 struct gcc_target targetm
= TARGET_INITIALIZER
;
1371 /* The svr4 ABI for the i386 says that records and unions are returned
1373 #ifndef DEFAULT_PCC_STRUCT_RETURN
1374 #define DEFAULT_PCC_STRUCT_RETURN 1
1377 /* Implement TARGET_HANDLE_OPTION. */
1380 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1387 target_flags
&= ~MASK_3DNOW_A
;
1388 target_flags_explicit
|= MASK_3DNOW_A
;
1395 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1396 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1403 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
);
1404 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
;
1411 target_flags
&= ~MASK_SSE3
;
1412 target_flags_explicit
|= MASK_SSE3
;
1421 /* Sometimes certain combinations of command options do not make
1422 sense on a particular target machine. You can define a macro
1423 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1424 defined, is executed once just after all the command options have
1427 Don't use this macro to turn on various extra optimizations for
1428 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1431 override_options (void)
1434 int ix86_tune_defaulted
= 0;
1436 /* Comes from final.c -- no real reason to change it. */
1437 #define MAX_CODE_ALIGN 16
1441 const struct processor_costs
*cost
; /* Processor costs */
1442 const int target_enable
; /* Target flags to enable. */
1443 const int target_disable
; /* Target flags to disable. */
1444 const int align_loop
; /* Default alignments. */
1445 const int align_loop_max_skip
;
1446 const int align_jump
;
1447 const int align_jump_max_skip
;
1448 const int align_func
;
1450 const processor_target_table
[PROCESSOR_max
] =
1452 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1453 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1454 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1455 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1456 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1457 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1458 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1459 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1460 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1461 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1462 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16}
1465 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1468 const char *const name
; /* processor name or nickname. */
1469 const enum processor_type processor
;
1470 const enum pta_flags
1476 PTA_PREFETCH_SSE
= 16,
1482 const processor_alias_table
[] =
1484 {"i386", PROCESSOR_I386
, 0},
1485 {"i486", PROCESSOR_I486
, 0},
1486 {"i586", PROCESSOR_PENTIUM
, 0},
1487 {"pentium", PROCESSOR_PENTIUM
, 0},
1488 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1489 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1490 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1491 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1492 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1493 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1494 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1495 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1496 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1497 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1498 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1499 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1500 | PTA_MMX
| PTA_PREFETCH_SSE
},
1501 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1502 | PTA_MMX
| PTA_PREFETCH_SSE
},
1503 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1504 | PTA_MMX
| PTA_PREFETCH_SSE
},
1505 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1506 | PTA_MMX
| PTA_PREFETCH_SSE
},
1507 {"k6", PROCESSOR_K6
, PTA_MMX
},
1508 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1509 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1510 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1512 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1513 | PTA_3DNOW
| PTA_3DNOW_A
},
1514 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1515 | PTA_3DNOW_A
| PTA_SSE
},
1516 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1517 | PTA_3DNOW_A
| PTA_SSE
},
1518 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1519 | PTA_3DNOW_A
| PTA_SSE
},
1520 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1521 | PTA_SSE
| PTA_SSE2
},
1522 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1523 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1524 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1525 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1526 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1527 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1528 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1529 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1530 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1531 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1534 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1536 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1537 SUBTARGET_OVERRIDE_OPTIONS
;
1540 /* Set the default values for switches whose default depends on TARGET_64BIT
1541 in case they weren't overwritten by command line options. */
1544 if (flag_omit_frame_pointer
== 2)
1545 flag_omit_frame_pointer
= 1;
1546 if (flag_asynchronous_unwind_tables
== 2)
1547 flag_asynchronous_unwind_tables
= 1;
1548 if (flag_pcc_struct_return
== 2)
1549 flag_pcc_struct_return
= 0;
1553 if (flag_omit_frame_pointer
== 2)
1554 flag_omit_frame_pointer
= 0;
1555 if (flag_asynchronous_unwind_tables
== 2)
1556 flag_asynchronous_unwind_tables
= 0;
1557 if (flag_pcc_struct_return
== 2)
1558 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1561 /* Need to check -mtune=generic first. */
1562 if (ix86_tune_string
)
1564 if (!strcmp (ix86_tune_string
, "generic")
1565 || !strcmp (ix86_tune_string
, "i686")
1566 /* As special support for cross compilers we read -mtune=native
1567 as -mtune=generic. With native compilers we won't see the
1568 -mtune=native, as it was changed by the driver. */
1569 || !strcmp (ix86_tune_string
, "native"))
1572 ix86_tune_string
= "generic64";
1574 ix86_tune_string
= "generic32";
1576 else if (!strncmp (ix86_tune_string
, "generic", 7))
1577 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1581 if (ix86_arch_string
)
1582 ix86_tune_string
= ix86_arch_string
;
1583 if (!ix86_tune_string
)
1585 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1586 ix86_tune_defaulted
= 1;
1589 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1590 need to use a sensible tune option. */
1591 if (!strcmp (ix86_tune_string
, "generic")
1592 || !strcmp (ix86_tune_string
, "x86-64")
1593 || !strcmp (ix86_tune_string
, "i686"))
1596 ix86_tune_string
= "generic64";
1598 ix86_tune_string
= "generic32";
1601 if (!strcmp (ix86_tune_string
, "x86-64"))
1602 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1603 "-mtune=generic instead as appropriate.");
1605 if (!ix86_arch_string
)
1606 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1607 if (!strcmp (ix86_arch_string
, "generic"))
1608 error ("generic CPU can be used only for -mtune= switch");
1609 if (!strncmp (ix86_arch_string
, "generic", 7))
1610 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1612 if (ix86_cmodel_string
!= 0)
1614 if (!strcmp (ix86_cmodel_string
, "small"))
1615 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1616 else if (!strcmp (ix86_cmodel_string
, "medium"))
1617 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
1619 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1620 else if (!strcmp (ix86_cmodel_string
, "32"))
1621 ix86_cmodel
= CM_32
;
1622 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1623 ix86_cmodel
= CM_KERNEL
;
1624 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1625 ix86_cmodel
= CM_LARGE
;
1627 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1631 ix86_cmodel
= CM_32
;
1633 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1635 if (ix86_asm_string
!= 0)
1638 && !strcmp (ix86_asm_string
, "intel"))
1639 ix86_asm_dialect
= ASM_INTEL
;
1640 else if (!strcmp (ix86_asm_string
, "att"))
1641 ix86_asm_dialect
= ASM_ATT
;
1643 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1645 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1646 error ("code model %qs not supported in the %s bit mode",
1647 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1648 if (ix86_cmodel
== CM_LARGE
)
1649 sorry ("code model %<large%> not supported yet");
1650 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1651 sorry ("%i-bit mode not compiled in",
1652 (target_flags
& MASK_64BIT
) ? 64 : 32);
1654 for (i
= 0; i
< pta_size
; i
++)
1655 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1657 ix86_arch
= processor_alias_table
[i
].processor
;
1658 /* Default cpu tuning to the architecture. */
1659 ix86_tune
= ix86_arch
;
1660 if (processor_alias_table
[i
].flags
& PTA_MMX
1661 && !(target_flags_explicit
& MASK_MMX
))
1662 target_flags
|= MASK_MMX
;
1663 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1664 && !(target_flags_explicit
& MASK_3DNOW
))
1665 target_flags
|= MASK_3DNOW
;
1666 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1667 && !(target_flags_explicit
& MASK_3DNOW_A
))
1668 target_flags
|= MASK_3DNOW_A
;
1669 if (processor_alias_table
[i
].flags
& PTA_SSE
1670 && !(target_flags_explicit
& MASK_SSE
))
1671 target_flags
|= MASK_SSE
;
1672 if (processor_alias_table
[i
].flags
& PTA_SSE2
1673 && !(target_flags_explicit
& MASK_SSE2
))
1674 target_flags
|= MASK_SSE2
;
1675 if (processor_alias_table
[i
].flags
& PTA_SSE3
1676 && !(target_flags_explicit
& MASK_SSE3
))
1677 target_flags
|= MASK_SSE3
;
1678 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1679 x86_prefetch_sse
= true;
1680 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1681 error ("CPU you selected does not support x86-64 "
1687 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1689 for (i
= 0; i
< pta_size
; i
++)
1690 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1692 ix86_tune
= processor_alias_table
[i
].processor
;
1693 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1695 if (ix86_tune_defaulted
)
1697 ix86_tune_string
= "x86-64";
1698 for (i
= 0; i
< pta_size
; i
++)
1699 if (! strcmp (ix86_tune_string
,
1700 processor_alias_table
[i
].name
))
1702 ix86_tune
= processor_alias_table
[i
].processor
;
1705 error ("CPU you selected does not support x86-64 "
1708 /* Intel CPUs have always interpreted SSE prefetch instructions as
1709 NOPs; so, we can enable SSE prefetch instructions even when
1710 -mtune (rather than -march) points us to a processor that has them.
1711 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1712 higher processors. */
1713 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1714 x86_prefetch_sse
= true;
1718 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1721 ix86_cost
= &size_cost
;
1723 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1724 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1725 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1727 /* Arrange to set up i386_stack_locals for all functions. */
1728 init_machine_status
= ix86_init_machine_status
;
1730 /* Validate -mregparm= value. */
1731 if (ix86_regparm_string
)
1733 i
= atoi (ix86_regparm_string
);
1734 if (i
< 0 || i
> REGPARM_MAX
)
1735 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1741 ix86_regparm
= REGPARM_MAX
;
1743 /* If the user has provided any of the -malign-* options,
1744 warn and use that value only if -falign-* is not set.
1745 Remove this code in GCC 3.2 or later. */
1746 if (ix86_align_loops_string
)
1748 warning (0, "-malign-loops is obsolete, use -falign-loops");
1749 if (align_loops
== 0)
1751 i
= atoi (ix86_align_loops_string
);
1752 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1753 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1755 align_loops
= 1 << i
;
1759 if (ix86_align_jumps_string
)
1761 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1762 if (align_jumps
== 0)
1764 i
= atoi (ix86_align_jumps_string
);
1765 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1766 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1768 align_jumps
= 1 << i
;
1772 if (ix86_align_funcs_string
)
1774 warning (0, "-malign-functions is obsolete, use -falign-functions");
1775 if (align_functions
== 0)
1777 i
= atoi (ix86_align_funcs_string
);
1778 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1779 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1781 align_functions
= 1 << i
;
1785 /* Default align_* from the processor table. */
1786 if (align_loops
== 0)
1788 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1789 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1791 if (align_jumps
== 0)
1793 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1794 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1796 if (align_functions
== 0)
1798 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1801 /* Validate -mpreferred-stack-boundary= value, or provide default.
1802 The default of 128 bits is for Pentium III's SSE __m128, but we
1803 don't want additional code to keep the stack aligned when
1804 optimizing for code size. */
1805 ix86_preferred_stack_boundary
= ((TARGET_64BIT
|| TARGET_MACHO
|| !optimize_size
)
1807 if (ix86_preferred_stack_boundary_string
)
1809 i
= atoi (ix86_preferred_stack_boundary_string
);
1810 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1811 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1812 TARGET_64BIT
? 4 : 2);
1814 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1817 /* Validate -mbranch-cost= value, or provide default. */
1818 ix86_branch_cost
= ix86_cost
->branch_cost
;
1819 if (ix86_branch_cost_string
)
1821 i
= atoi (ix86_branch_cost_string
);
1823 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1825 ix86_branch_cost
= i
;
1827 if (ix86_section_threshold_string
)
1829 i
= atoi (ix86_section_threshold_string
);
1831 error ("-mlarge-data-threshold=%d is negative", i
);
1833 ix86_section_threshold
= i
;
1836 if (ix86_tls_dialect_string
)
1838 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1839 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1840 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
1841 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
1842 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1843 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1845 error ("bad value (%s) for -mtls-dialect= switch",
1846 ix86_tls_dialect_string
);
1849 /* Keep nonleaf frame pointers. */
1850 if (flag_omit_frame_pointer
)
1851 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
1852 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1853 flag_omit_frame_pointer
= 1;
1855 /* If we're doing fast math, we don't care about comparison order
1856 wrt NaNs. This lets us use a shorter comparison sequence. */
1857 if (flag_unsafe_math_optimizations
)
1858 target_flags
&= ~MASK_IEEE_FP
;
1860 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1861 since the insns won't need emulation. */
1862 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1863 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1865 /* Likewise, if the target doesn't have a 387, or we've specified
1866 software floating point, don't use 387 inline intrinsics. */
1868 target_flags
|= MASK_NO_FANCY_MATH_387
;
1870 /* Turn on SSE2 builtins for -msse3. */
1872 target_flags
|= MASK_SSE2
;
1874 /* Turn on SSE builtins for -msse2. */
1876 target_flags
|= MASK_SSE
;
1878 /* Turn on MMX builtins for -msse. */
1881 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
1882 x86_prefetch_sse
= true;
1885 /* Turn on MMX builtins for 3Dnow. */
1887 target_flags
|= MASK_MMX
;
1891 if (TARGET_ALIGN_DOUBLE
)
1892 error ("-malign-double makes no sense in the 64bit mode");
1894 error ("-mrtd calling convention not supported in the 64bit mode");
1896 /* Enable by default the SSE and MMX builtins. Do allow the user to
1897 explicitly disable any of these. In particular, disabling SSE and
1898 MMX for kernel code is extremely useful. */
1900 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
1901 & ~target_flags_explicit
);
1905 /* i386 ABI does not specify red zone. It still makes sense to use it
1906 when programmer takes care to stack from being destroyed. */
1907 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
1908 target_flags
|= MASK_NO_RED_ZONE
;
1911 /* Accept -msseregparm only if at least SSE support is enabled. */
1912 if (TARGET_SSEREGPARM
1914 error ("-msseregparm used without SSE enabled");
1916 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
1918 if (ix86_fpmath_string
!= 0)
1920 if (! strcmp (ix86_fpmath_string
, "387"))
1921 ix86_fpmath
= FPMATH_387
;
1922 else if (! strcmp (ix86_fpmath_string
, "sse"))
1926 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1927 ix86_fpmath
= FPMATH_387
;
1930 ix86_fpmath
= FPMATH_SSE
;
1932 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1933 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1937 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1938 ix86_fpmath
= FPMATH_387
;
1940 else if (!TARGET_80387
)
1942 warning (0, "387 instruction set disabled, using SSE arithmetics");
1943 ix86_fpmath
= FPMATH_SSE
;
1946 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1949 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1952 /* If the i387 is disabled, then do not return values in it. */
1954 target_flags
&= ~MASK_FLOAT_RETURNS
;
1956 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
1957 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1959 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1961 /* ??? Unwind info is not correct around the CFG unless either a frame
1962 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1963 unwind info generation to be aware of the CFG and propagating states
1965 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
1966 || flag_exceptions
|| flag_non_call_exceptions
)
1967 && flag_omit_frame_pointer
1968 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
1970 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1971 warning (0, "unwind tables currently require either a frame pointer "
1972 "or -maccumulate-outgoing-args for correctness");
1973 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1976 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1979 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1980 p
= strchr (internal_label_prefix
, 'X');
1981 internal_label_prefix_len
= p
- internal_label_prefix
;
1985 /* When scheduling description is not available, disable scheduler pass
1986 so it won't slow down the compilation and make x87 code slower. */
1987 if (!TARGET_SCHEDULE
)
1988 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
1991 /* switch to the appropriate section for output of DECL.
1992 DECL is either a `VAR_DECL' node or a constant of some sort.
1993 RELOC indicates whether forming the initial value of DECL requires
1994 link-time relocations. */
1997 x86_64_elf_select_section (tree decl
, int reloc
,
1998 unsigned HOST_WIDE_INT align
)
2000 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2001 && ix86_in_large_data_p (decl
))
2003 const char *sname
= NULL
;
2004 unsigned int flags
= SECTION_WRITE
;
2005 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2010 case SECCAT_DATA_REL
:
2011 sname
= ".ldata.rel";
2013 case SECCAT_DATA_REL_LOCAL
:
2014 sname
= ".ldata.rel.local";
2016 case SECCAT_DATA_REL_RO
:
2017 sname
= ".ldata.rel.ro";
2019 case SECCAT_DATA_REL_RO_LOCAL
:
2020 sname
= ".ldata.rel.ro.local";
2024 flags
|= SECTION_BSS
;
2027 case SECCAT_RODATA_MERGE_STR
:
2028 case SECCAT_RODATA_MERGE_STR_INIT
:
2029 case SECCAT_RODATA_MERGE_CONST
:
2033 case SECCAT_SRODATA
:
2040 /* We don't split these for medium model. Place them into
2041 default sections and hope for best. */
2046 /* We might get called with string constants, but get_named_section
2047 doesn't like them as they are not DECLs. Also, we need to set
2048 flags in that case. */
2050 return get_section (sname
, flags
, NULL
);
2051 return get_named_section (decl
, sname
, reloc
);
2054 return default_elf_select_section (decl
, reloc
, align
);
2057 /* Build up a unique section name, expressed as a
2058 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2059 RELOC indicates whether the initial value of EXP requires
2060 link-time relocations. */
2063 x86_64_elf_unique_section (tree decl
, int reloc
)
2065 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2066 && ix86_in_large_data_p (decl
))
2068 const char *prefix
= NULL
;
2069 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2070 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2072 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2075 case SECCAT_DATA_REL
:
2076 case SECCAT_DATA_REL_LOCAL
:
2077 case SECCAT_DATA_REL_RO
:
2078 case SECCAT_DATA_REL_RO_LOCAL
:
2079 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2082 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2085 case SECCAT_RODATA_MERGE_STR
:
2086 case SECCAT_RODATA_MERGE_STR_INIT
:
2087 case SECCAT_RODATA_MERGE_CONST
:
2088 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2090 case SECCAT_SRODATA
:
2097 /* We don't split these for medium model. Place them into
2098 default sections and hope for best. */
2106 plen
= strlen (prefix
);
2108 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2109 name
= targetm
.strip_name_encoding (name
);
2110 nlen
= strlen (name
);
2112 string
= alloca (nlen
+ plen
+ 1);
2113 memcpy (string
, prefix
, plen
);
2114 memcpy (string
+ plen
, name
, nlen
+ 1);
2116 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2120 default_unique_section (decl
, reloc
);
2123 #ifdef COMMON_ASM_OP
2124 /* This says how to output assembler code to declare an
2125 uninitialized external linkage data object.
2127 For medium model x86-64 we need to use .largecomm opcode for
2130 x86_elf_aligned_common (FILE *file
,
2131 const char *name
, unsigned HOST_WIDE_INT size
,
2134 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2135 && size
> (unsigned int)ix86_section_threshold
)
2136 fprintf (file
, ".largecomm\t");
2138 fprintf (file
, "%s", COMMON_ASM_OP
);
2139 assemble_name (file
, name
);
2140 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2141 size
, align
/ BITS_PER_UNIT
);
2144 /* Utility function for targets to use in implementing
2145 ASM_OUTPUT_ALIGNED_BSS. */
2148 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2149 const char *name
, unsigned HOST_WIDE_INT size
,
2152 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2153 && size
> (unsigned int)ix86_section_threshold
)
2154 switch_to_section (get_named_section (decl
, ".lbss", 0));
2156 switch_to_section (bss_section
);
2157 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2158 #ifdef ASM_DECLARE_OBJECT_NAME
2159 last_assemble_variable_decl
= decl
;
2160 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2162 /* Standard thing is just output label for the object. */
2163 ASM_OUTPUT_LABEL (file
, name
);
2164 #endif /* ASM_DECLARE_OBJECT_NAME */
2165 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2170 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2172 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2173 make the problem with not enough registers even worse. */
2174 #ifdef INSN_SCHEDULING
2176 flag_schedule_insns
= 0;
2180 /* The Darwin libraries never set errno, so we might as well
2181 avoid calling them when that's the only reason we would. */
2182 flag_errno_math
= 0;
2184 /* The default values of these switches depend on the TARGET_64BIT
2185 that is not known at this moment. Mark these values with 2 and
2186 let user the to override these. In case there is no command line option
2187 specifying them, we will set the defaults in override_options. */
2189 flag_omit_frame_pointer
= 2;
2190 flag_pcc_struct_return
= 2;
2191 flag_asynchronous_unwind_tables
= 2;
2192 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2193 SUBTARGET_OPTIMIZATION_OPTIONS
;
2197 /* Table of valid machine attributes. */
2198 const struct attribute_spec ix86_attribute_table
[] =
2200 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2201 /* Stdcall attribute says callee is responsible for popping arguments
2202 if they are not variable. */
2203 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2204 /* Fastcall attribute says callee is responsible for popping arguments
2205 if they are not variable. */
2206 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2207 /* Cdecl attribute says the callee is a normal C declaration */
2208 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2209 /* Regparm attribute specifies how many integer arguments are to be
2210 passed in registers. */
2211 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2212 /* Sseregparm attribute says we are using x86_64 calling conventions
2213 for FP arguments. */
2214 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2215 /* force_align_arg_pointer says this function realigns the stack at entry. */
2216 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2217 false, true, true, ix86_handle_cconv_attribute
},
2218 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2219 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2220 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2221 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2223 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2224 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2225 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2226 SUBTARGET_ATTRIBUTE_TABLE
,
2228 { NULL
, 0, 0, false, false, false, NULL
}
2231 /* Decide whether we can make a sibling call to a function. DECL is the
2232 declaration of the function being targeted by the call and EXP is the
2233 CALL_EXPR representing the call. */
2236 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2241 /* If we are generating position-independent code, we cannot sibcall
2242 optimize any indirect call, or a direct call to a global function,
2243 as the PLT requires %ebx be live. */
2244 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2251 func
= TREE_TYPE (TREE_OPERAND (exp
, 0));
2252 if (POINTER_TYPE_P (func
))
2253 func
= TREE_TYPE (func
);
2256 /* Check that the return value locations are the same. Like
2257 if we are returning floats on the 80387 register stack, we cannot
2258 make a sibcall from a function that doesn't return a float to a
2259 function that does or, conversely, from a function that does return
2260 a float to a function that doesn't; the necessary stack adjustment
2261 would not be executed. This is also the place we notice
2262 differences in the return value ABI. Note that it is ok for one
2263 of the functions to have void return type as long as the return
2264 value of the other is passed in a register. */
2265 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2266 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2268 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2270 if (!rtx_equal_p (a
, b
))
2273 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2275 else if (!rtx_equal_p (a
, b
))
2278 /* If this call is indirect, we'll need to be able to use a call-clobbered
2279 register for the address of the target function. Make sure that all
2280 such registers are not used for passing parameters. */
2281 if (!decl
&& !TARGET_64BIT
)
2285 /* We're looking at the CALL_EXPR, we need the type of the function. */
2286 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
2287 type
= TREE_TYPE (type
); /* pointer type */
2288 type
= TREE_TYPE (type
); /* function type */
2290 if (ix86_function_regparm (type
, NULL
) >= 3)
2292 /* ??? Need to count the actual number of registers to be used,
2293 not the possible number of registers. Fix later. */
2298 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2299 /* Dllimport'd functions are also called indirectly. */
2300 if (decl
&& DECL_DLLIMPORT_P (decl
)
2301 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2305 /* If we forced aligned the stack, then sibcalling would unalign the
2306 stack, which may break the called function. */
2307 if (cfun
->machine
->force_align_arg_pointer
)
2310 /* Otherwise okay. That also includes certain types of indirect calls. */
2314 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2315 calling convention attributes;
2316 arguments as in struct attribute_spec.handler. */
2319 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2321 int flags ATTRIBUTE_UNUSED
,
2324 if (TREE_CODE (*node
) != FUNCTION_TYPE
2325 && TREE_CODE (*node
) != METHOD_TYPE
2326 && TREE_CODE (*node
) != FIELD_DECL
2327 && TREE_CODE (*node
) != TYPE_DECL
)
2329 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2330 IDENTIFIER_POINTER (name
));
2331 *no_add_attrs
= true;
2335 /* Can combine regparm with all attributes but fastcall. */
2336 if (is_attribute_p ("regparm", name
))
2340 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2342 error ("fastcall and regparm attributes are not compatible");
2345 cst
= TREE_VALUE (args
);
2346 if (TREE_CODE (cst
) != INTEGER_CST
)
2348 warning (OPT_Wattributes
,
2349 "%qs attribute requires an integer constant argument",
2350 IDENTIFIER_POINTER (name
));
2351 *no_add_attrs
= true;
2353 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2355 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2356 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2357 *no_add_attrs
= true;
2361 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2362 TYPE_ATTRIBUTES (*node
))
2363 && compare_tree_int (cst
, REGPARM_MAX
-1))
2365 error ("%s functions limited to %d register parameters",
2366 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2374 warning (OPT_Wattributes
, "%qs attribute ignored",
2375 IDENTIFIER_POINTER (name
));
2376 *no_add_attrs
= true;
2380 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2381 if (is_attribute_p ("fastcall", name
))
2383 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2385 error ("fastcall and cdecl attributes are not compatible");
2387 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2389 error ("fastcall and stdcall attributes are not compatible");
2391 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2393 error ("fastcall and regparm attributes are not compatible");
2397 /* Can combine stdcall with fastcall (redundant), regparm and
2399 else if (is_attribute_p ("stdcall", name
))
2401 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2403 error ("stdcall and cdecl attributes are not compatible");
2405 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2407 error ("stdcall and fastcall attributes are not compatible");
2411 /* Can combine cdecl with regparm and sseregparm. */
2412 else if (is_attribute_p ("cdecl", name
))
2414 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2416 error ("stdcall and cdecl attributes are not compatible");
2418 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2420 error ("fastcall and cdecl attributes are not compatible");
2424 /* Can combine sseregparm with all attributes. */
2429 /* Return 0 if the attributes for two types are incompatible, 1 if they
2430 are compatible, and 2 if they are nearly compatible (which causes a
2431 warning to be generated). */
2434 ix86_comp_type_attributes (tree type1
, tree type2
)
2436 /* Check for mismatch of non-default calling convention. */
2437 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2439 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2442 /* Check for mismatched fastcall/regparm types. */
2443 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2444 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2445 || (ix86_function_regparm (type1
, NULL
)
2446 != ix86_function_regparm (type2
, NULL
)))
2449 /* Check for mismatched sseregparm types. */
2450 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2451 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2454 /* Check for mismatched return types (cdecl vs stdcall). */
2455 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2456 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2462 /* Return the regparm value for a function with the indicated TYPE and DECL.
2463 DECL may be NULL when calling function indirectly
2464 or considering a libcall. */
2467 ix86_function_regparm (tree type
, tree decl
)
2470 int regparm
= ix86_regparm
;
2471 bool user_convention
= false;
2475 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2478 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2479 user_convention
= true;
2482 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2485 user_convention
= true;
2488 /* Use register calling convention for local functions when possible. */
2489 if (!TARGET_64BIT
&& !user_convention
&& decl
2490 && flag_unit_at_a_time
&& !profile_flag
)
2492 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2495 int local_regparm
, globals
= 0, regno
;
2497 /* Make sure no regparm register is taken by a global register
2499 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2500 if (global_regs
[local_regparm
])
2502 /* We can't use regparm(3) for nested functions as these use
2503 static chain pointer in third argument. */
2504 if (local_regparm
== 3
2505 && decl_function_context (decl
)
2506 && !DECL_NO_STATIC_CHAIN (decl
))
2508 /* If the function realigns its stackpointer, the
2509 prologue will clobber %ecx. If we've already
2510 generated code for the callee, the callee
2511 DECL_STRUCT_FUNCTION is gone, so we fall back to
2512 scanning the attributes for the self-realigning
2514 if ((DECL_STRUCT_FUNCTION (decl
)
2515 && DECL_STRUCT_FUNCTION (decl
)->machine
->force_align_arg_pointer
)
2516 || (!DECL_STRUCT_FUNCTION (decl
)
2517 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2518 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
2520 /* Each global register variable increases register preassure,
2521 so the more global reg vars there are, the smaller regparm
2522 optimization use, unless requested by the user explicitly. */
2523 for (regno
= 0; regno
< 6; regno
++)
2524 if (global_regs
[regno
])
2527 = globals
< local_regparm
? local_regparm
- globals
: 0;
2529 if (local_regparm
> regparm
)
2530 regparm
= local_regparm
;
2537 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2538 in SSE registers for a function with the indicated TYPE and DECL.
2539 DECL may be NULL when calling function indirectly
2540 or considering a libcall. Otherwise return 0. */
2543 ix86_function_sseregparm (tree type
, tree decl
)
2545 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2546 by the sseregparm attribute. */
2547 if (TARGET_SSEREGPARM
2549 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
2554 error ("Calling %qD with attribute sseregparm without "
2555 "SSE/SSE2 enabled", decl
);
2557 error ("Calling %qT with attribute sseregparm without "
2558 "SSE/SSE2 enabled", type
);
2565 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2566 in SSE registers even for 32-bit mode and not just 3, but up to
2567 8 SSE arguments in registers. */
2568 if (!TARGET_64BIT
&& decl
2569 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
2571 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2573 return TARGET_SSE2
? 2 : 1;
2579 /* Return true if EAX is live at the start of the function. Used by
2580 ix86_expand_prologue to determine if we need special help before
2581 calling allocate_stack_worker. */
2584 ix86_eax_live_at_start_p (void)
2586 /* Cheat. Don't bother working forward from ix86_function_regparm
2587 to the function type to whether an actual argument is located in
2588 eax. Instead just look at cfg info, which is still close enough
2589 to correct at this point. This gives false positives for broken
2590 functions that might use uninitialized data that happens to be
2591 allocated in eax, but who cares? */
2592 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
2595 /* Value is the number of bytes of arguments automatically
2596 popped when returning from a subroutine call.
2597 FUNDECL is the declaration node of the function (as a tree),
2598 FUNTYPE is the data type of the function (as a tree),
2599 or for a library call it is an identifier node for the subroutine name.
2600 SIZE is the number of bytes of arguments passed on the stack.
2602 On the 80386, the RTD insn may be used to pop them if the number
2603 of args is fixed, but if the number is variable then the caller
2604 must pop them all. RTD can't be used for library calls now
2605 because the library is compiled with the Unix compiler.
2606 Use of RTD is a selectable option, since it is incompatible with
2607 standard Unix calling sequences. If the option is not selected,
2608 the caller must always pop the args.
2610 The attribute stdcall is equivalent to RTD on a per module basis. */
2613 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
2615 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
2617 /* Cdecl functions override -mrtd, and never pop the stack. */
2618 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
2620 /* Stdcall and fastcall functions will pop the stack if not
2622 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
2623 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
2627 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
2628 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
2629 == void_type_node
)))
2633 /* Lose any fake structure return argument if it is passed on the stack. */
2634 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
2636 && !KEEP_AGGREGATE_RETURN_POINTER
)
2638 int nregs
= ix86_function_regparm (funtype
, fundecl
);
2641 return GET_MODE_SIZE (Pmode
);
2647 /* Argument support functions. */
2649 /* Return true when register may be used to pass function parameters. */
2651 ix86_function_arg_regno_p (int regno
)
2655 return (regno
< REGPARM_MAX
2656 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
2657 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
2658 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
2659 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
2661 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
2662 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
2664 /* RAX is used as hidden argument to va_arg functions. */
2667 for (i
= 0; i
< REGPARM_MAX
; i
++)
2668 if (regno
== x86_64_int_parameter_registers
[i
])
2673 /* Return if we do not know how to pass TYPE solely in registers. */
2676 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
2678 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
2681 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2682 The layout_type routine is crafty and tries to trick us into passing
2683 currently unsupported vector types on the stack by using TImode. */
2684 return (!TARGET_64BIT
&& mode
== TImode
2685 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
2688 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2689 for a call to a function whose data type is FNTYPE.
2690 For a library call, FNTYPE is 0. */
2693 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
2694 tree fntype
, /* tree ptr for function decl */
2695 rtx libname
, /* SYMBOL_REF of library name or 0 */
2698 static CUMULATIVE_ARGS zero_cum
;
2699 tree param
, next_param
;
2701 if (TARGET_DEBUG_ARG
)
2703 fprintf (stderr
, "\ninit_cumulative_args (");
2705 fprintf (stderr
, "fntype code = %s, ret code = %s",
2706 tree_code_name
[(int) TREE_CODE (fntype
)],
2707 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
2709 fprintf (stderr
, "no fntype");
2712 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
2717 /* Set up the number of registers to use for passing arguments. */
2718 cum
->nregs
= ix86_regparm
;
2720 cum
->sse_nregs
= SSE_REGPARM_MAX
;
2722 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
2723 cum
->warn_sse
= true;
2724 cum
->warn_mmx
= true;
2725 cum
->maybe_vaarg
= false;
2727 /* Use ecx and edx registers if function has fastcall attribute,
2728 else look for regparm information. */
2729 if (fntype
&& !TARGET_64BIT
)
2731 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
2737 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
2740 /* Set up the number of SSE registers used for passing SFmode
2741 and DFmode arguments. Warn for mismatching ABI. */
2742 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
2744 /* Determine if this function has variable arguments. This is
2745 indicated by the last argument being 'void_type_mode' if there
2746 are no variable arguments. If there are variable arguments, then
2747 we won't pass anything in registers in 32-bit mode. */
2749 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
2751 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
2752 param
!= 0; param
= next_param
)
2754 next_param
= TREE_CHAIN (param
);
2755 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
2765 cum
->float_in_sse
= 0;
2767 cum
->maybe_vaarg
= true;
2771 if ((!fntype
&& !libname
)
2772 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
2773 cum
->maybe_vaarg
= true;
2775 if (TARGET_DEBUG_ARG
)
2776 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
2781 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2782 But in the case of vector types, it is some vector mode.
2784 When we have only some of our vector isa extensions enabled, then there
2785 are some modes for which vector_mode_supported_p is false. For these
2786 modes, the generic vector support in gcc will choose some non-vector mode
2787 in order to implement the type. By computing the natural mode, we'll
2788 select the proper ABI location for the operand and not depend on whatever
2789 the middle-end decides to do with these vector types. */
2791 static enum machine_mode
2792 type_natural_mode (tree type
)
2794 enum machine_mode mode
= TYPE_MODE (type
);
2796 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
2798 HOST_WIDE_INT size
= int_size_in_bytes (type
);
2799 if ((size
== 8 || size
== 16)
2800 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2801 && TYPE_VECTOR_SUBPARTS (type
) > 1)
2803 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
2805 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
2806 mode
= MIN_MODE_VECTOR_FLOAT
;
2808 mode
= MIN_MODE_VECTOR_INT
;
2810 /* Get the mode which has this inner mode and number of units. */
2811 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
2812 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
2813 && GET_MODE_INNER (mode
) == innermode
)
2823 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2824 this may not agree with the mode that the type system has chosen for the
2825 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2826 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2829 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
2834 if (orig_mode
!= BLKmode
)
2835 tmp
= gen_rtx_REG (orig_mode
, regno
);
2838 tmp
= gen_rtx_REG (mode
, regno
);
2839 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
2840 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
2846 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2847 of this code is to classify each 8bytes of incoming argument by the register
2848 class and assign registers accordingly. */
2850 /* Return the union class of CLASS1 and CLASS2.
2851 See the x86-64 PS ABI for details. */
2853 static enum x86_64_reg_class
2854 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2856 /* Rule #1: If both classes are equal, this is the resulting class. */
2857 if (class1
== class2
)
2860 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2862 if (class1
== X86_64_NO_CLASS
)
2864 if (class2
== X86_64_NO_CLASS
)
2867 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2868 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2869 return X86_64_MEMORY_CLASS
;
2871 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2872 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2873 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2874 return X86_64_INTEGERSI_CLASS
;
2875 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2876 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2877 return X86_64_INTEGER_CLASS
;
2879 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2881 if (class1
== X86_64_X87_CLASS
2882 || class1
== X86_64_X87UP_CLASS
2883 || class1
== X86_64_COMPLEX_X87_CLASS
2884 || class2
== X86_64_X87_CLASS
2885 || class2
== X86_64_X87UP_CLASS
2886 || class2
== X86_64_COMPLEX_X87_CLASS
)
2887 return X86_64_MEMORY_CLASS
;
2889 /* Rule #6: Otherwise class SSE is used. */
2890 return X86_64_SSE_CLASS
;
2893 /* Classify the argument of type TYPE and mode MODE.
2894 CLASSES will be filled by the register class used to pass each word
2895 of the operand. The number of words is returned. In case the parameter
2896 should be passed in memory, 0 is returned. As a special case for zero
2897 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2899 BIT_OFFSET is used internally for handling records and specifies offset
2900 of the offset in bits modulo 256 to avoid overflow cases.
2902 See the x86-64 PS ABI for details.
2906 classify_argument (enum machine_mode mode
, tree type
,
2907 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2909 HOST_WIDE_INT bytes
=
2910 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2911 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2913 /* Variable sized entities are always passed/returned in memory. */
2917 if (mode
!= VOIDmode
2918 && targetm
.calls
.must_pass_in_stack (mode
, type
))
2921 if (type
&& AGGREGATE_TYPE_P (type
))
2925 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2927 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2931 for (i
= 0; i
< words
; i
++)
2932 classes
[i
] = X86_64_NO_CLASS
;
2934 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2935 signalize memory class, so handle it as special case. */
2938 classes
[0] = X86_64_NO_CLASS
;
2942 /* Classify each field of record and merge classes. */
2943 switch (TREE_CODE (type
))
2946 /* For classes first merge in the field of the subclasses. */
2947 if (TYPE_BINFO (type
))
2949 tree binfo
, base_binfo
;
2952 for (binfo
= TYPE_BINFO (type
), basenum
= 0;
2953 BINFO_BASE_ITERATE (binfo
, basenum
, base_binfo
); basenum
++)
2956 int offset
= tree_low_cst (BINFO_OFFSET (base_binfo
), 0) * 8;
2957 tree type
= BINFO_TYPE (base_binfo
);
2959 num
= classify_argument (TYPE_MODE (type
),
2961 (offset
+ bit_offset
) % 256);
2964 for (i
= 0; i
< num
; i
++)
2966 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2968 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2972 /* And now merge the fields of structure. */
2973 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2975 if (TREE_CODE (field
) == FIELD_DECL
)
2979 if (TREE_TYPE (field
) == error_mark_node
)
2982 /* Bitfields are always classified as integer. Handle them
2983 early, since later code would consider them to be
2984 misaligned integers. */
2985 if (DECL_BIT_FIELD (field
))
2987 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
2988 i
< ((int_bit_position (field
) + (bit_offset
% 64))
2989 + tree_low_cst (DECL_SIZE (field
), 0)
2992 merge_classes (X86_64_INTEGER_CLASS
,
2997 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2998 TREE_TYPE (field
), subclasses
,
2999 (int_bit_position (field
)
3000 + bit_offset
) % 256);
3003 for (i
= 0; i
< num
; i
++)
3006 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3008 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3016 /* Arrays are handled as small records. */
3019 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3020 TREE_TYPE (type
), subclasses
, bit_offset
);
3024 /* The partial classes are now full classes. */
3025 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3026 subclasses
[0] = X86_64_SSE_CLASS
;
3027 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3028 subclasses
[0] = X86_64_INTEGER_CLASS
;
3030 for (i
= 0; i
< words
; i
++)
3031 classes
[i
] = subclasses
[i
% num
];
3036 case QUAL_UNION_TYPE
:
3037 /* Unions are similar to RECORD_TYPE but offset is always 0.
3040 /* Unions are not derived. */
3041 gcc_assert (!TYPE_BINFO (type
)
3042 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type
)));
3043 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3045 if (TREE_CODE (field
) == FIELD_DECL
)
3049 if (TREE_TYPE (field
) == error_mark_node
)
3052 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3053 TREE_TYPE (field
), subclasses
,
3057 for (i
= 0; i
< num
; i
++)
3058 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3067 /* Final merger cleanup. */
3068 for (i
= 0; i
< words
; i
++)
3070 /* If one class is MEMORY, everything should be passed in
3072 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3075 /* The X86_64_SSEUP_CLASS should be always preceded by
3076 X86_64_SSE_CLASS. */
3077 if (classes
[i
] == X86_64_SSEUP_CLASS
3078 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3079 classes
[i
] = X86_64_SSE_CLASS
;
3081 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3082 if (classes
[i
] == X86_64_X87UP_CLASS
3083 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3084 classes
[i
] = X86_64_SSE_CLASS
;
3089 /* Compute alignment needed. We align all types to natural boundaries with
3090 exception of XFmode that is aligned to 64bits. */
3091 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3093 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3096 mode_alignment
= 128;
3097 else if (mode
== XCmode
)
3098 mode_alignment
= 256;
3099 if (COMPLEX_MODE_P (mode
))
3100 mode_alignment
/= 2;
3101 /* Misaligned fields are always returned in memory. */
3102 if (bit_offset
% mode_alignment
)
3106 /* for V1xx modes, just use the base mode */
3107 if (VECTOR_MODE_P (mode
)
3108 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3109 mode
= GET_MODE_INNER (mode
);
3111 /* Classification of atomic types. */
3116 classes
[0] = X86_64_SSE_CLASS
;
3119 classes
[0] = X86_64_SSE_CLASS
;
3120 classes
[1] = X86_64_SSEUP_CLASS
;
3129 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3130 classes
[0] = X86_64_INTEGERSI_CLASS
;
3132 classes
[0] = X86_64_INTEGER_CLASS
;
3136 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3141 if (!(bit_offset
% 64))
3142 classes
[0] = X86_64_SSESF_CLASS
;
3144 classes
[0] = X86_64_SSE_CLASS
;
3147 classes
[0] = X86_64_SSEDF_CLASS
;
3150 classes
[0] = X86_64_X87_CLASS
;
3151 classes
[1] = X86_64_X87UP_CLASS
;
3154 classes
[0] = X86_64_SSE_CLASS
;
3155 classes
[1] = X86_64_SSEUP_CLASS
;
3158 classes
[0] = X86_64_SSE_CLASS
;
3161 classes
[0] = X86_64_SSEDF_CLASS
;
3162 classes
[1] = X86_64_SSEDF_CLASS
;
3165 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3168 /* This modes is larger than 16 bytes. */
3176 classes
[0] = X86_64_SSE_CLASS
;
3177 classes
[1] = X86_64_SSEUP_CLASS
;
3183 classes
[0] = X86_64_SSE_CLASS
;
3189 gcc_assert (VECTOR_MODE_P (mode
));
3194 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3196 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3197 classes
[0] = X86_64_INTEGERSI_CLASS
;
3199 classes
[0] = X86_64_INTEGER_CLASS
;
3200 classes
[1] = X86_64_INTEGER_CLASS
;
3201 return 1 + (bytes
> 8);
3205 /* Examine the argument and return set number of register required in each
3206 class. Return 0 iff parameter should be passed in memory. */
3208 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3209 int *int_nregs
, int *sse_nregs
)
3211 enum x86_64_reg_class
class[MAX_CLASSES
];
3212 int n
= classify_argument (mode
, type
, class, 0);
3218 for (n
--; n
>= 0; n
--)
3221 case X86_64_INTEGER_CLASS
:
3222 case X86_64_INTEGERSI_CLASS
:
3225 case X86_64_SSE_CLASS
:
3226 case X86_64_SSESF_CLASS
:
3227 case X86_64_SSEDF_CLASS
:
3230 case X86_64_NO_CLASS
:
3231 case X86_64_SSEUP_CLASS
:
3233 case X86_64_X87_CLASS
:
3234 case X86_64_X87UP_CLASS
:
3238 case X86_64_COMPLEX_X87_CLASS
:
3239 return in_return
? 2 : 0;
3240 case X86_64_MEMORY_CLASS
:
3246 /* Construct container for the argument used by GCC interface. See
3247 FUNCTION_ARG for the detailed description. */
3250 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3251 tree type
, int in_return
, int nintregs
, int nsseregs
,
3252 const int *intreg
, int sse_regno
)
3254 /* The following variables hold the static issued_error state. */
3255 static bool issued_sse_arg_error
;
3256 static bool issued_sse_ret_error
;
3257 static bool issued_x87_ret_error
;
3259 enum machine_mode tmpmode
;
3261 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3262 enum x86_64_reg_class
class[MAX_CLASSES
];
3266 int needed_sseregs
, needed_intregs
;
3267 rtx exp
[MAX_CLASSES
];
3270 n
= classify_argument (mode
, type
, class, 0);
3271 if (TARGET_DEBUG_ARG
)
3274 fprintf (stderr
, "Memory class\n");
3277 fprintf (stderr
, "Classes:");
3278 for (i
= 0; i
< n
; i
++)
3280 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3282 fprintf (stderr
, "\n");
3287 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3290 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3293 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3294 some less clueful developer tries to use floating-point anyway. */
3295 if (needed_sseregs
&& !TARGET_SSE
)
3299 if (!issued_sse_ret_error
)
3301 error ("SSE register return with SSE disabled");
3302 issued_sse_ret_error
= true;
3305 else if (!issued_sse_arg_error
)
3307 error ("SSE register argument with SSE disabled");
3308 issued_sse_arg_error
= true;
3313 /* Likewise, error if the ABI requires us to return values in the
3314 x87 registers and the user specified -mno-80387. */
3315 if (!TARGET_80387
&& in_return
)
3316 for (i
= 0; i
< n
; i
++)
3317 if (class[i
] == X86_64_X87_CLASS
3318 || class[i
] == X86_64_X87UP_CLASS
3319 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3321 if (!issued_x87_ret_error
)
3323 error ("x87 register return with x87 disabled");
3324 issued_x87_ret_error
= true;
3329 /* First construct simple cases. Avoid SCmode, since we want to use
3330 single register to pass this type. */
3331 if (n
== 1 && mode
!= SCmode
)
3334 case X86_64_INTEGER_CLASS
:
3335 case X86_64_INTEGERSI_CLASS
:
3336 return gen_rtx_REG (mode
, intreg
[0]);
3337 case X86_64_SSE_CLASS
:
3338 case X86_64_SSESF_CLASS
:
3339 case X86_64_SSEDF_CLASS
:
3340 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3341 case X86_64_X87_CLASS
:
3342 case X86_64_COMPLEX_X87_CLASS
:
3343 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3344 case X86_64_NO_CLASS
:
3345 /* Zero sized array, struct or class. */
3350 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3352 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3354 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3355 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3356 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3357 && class[1] == X86_64_INTEGER_CLASS
3358 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3359 && intreg
[0] + 1 == intreg
[1])
3360 return gen_rtx_REG (mode
, intreg
[0]);
3362 /* Otherwise figure out the entries of the PARALLEL. */
3363 for (i
= 0; i
< n
; i
++)
3367 case X86_64_NO_CLASS
:
3369 case X86_64_INTEGER_CLASS
:
3370 case X86_64_INTEGERSI_CLASS
:
3371 /* Merge TImodes on aligned occasions here too. */
3372 if (i
* 8 + 8 > bytes
)
3373 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3374 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3378 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3379 if (tmpmode
== BLKmode
)
3381 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3382 gen_rtx_REG (tmpmode
, *intreg
),
3386 case X86_64_SSESF_CLASS
:
3387 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3388 gen_rtx_REG (SFmode
,
3389 SSE_REGNO (sse_regno
)),
3393 case X86_64_SSEDF_CLASS
:
3394 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3395 gen_rtx_REG (DFmode
,
3396 SSE_REGNO (sse_regno
)),
3400 case X86_64_SSE_CLASS
:
3401 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3405 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3406 gen_rtx_REG (tmpmode
,
3407 SSE_REGNO (sse_regno
)),
3409 if (tmpmode
== TImode
)
3418 /* Empty aligned struct, union or class. */
3422 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3423 for (i
= 0; i
< nexps
; i
++)
3424 XVECEXP (ret
, 0, i
) = exp
[i
];
3428 /* Update the data in CUM to advance over an argument
3429 of mode MODE and data type TYPE.
3430 (TYPE is null for libcalls where that information may not be available.) */
3433 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3434 tree type
, int named
)
3437 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3438 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3441 mode
= type_natural_mode (type
);
3443 if (TARGET_DEBUG_ARG
)
3444 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3445 "mode=%s, named=%d)\n\n",
3446 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3447 GET_MODE_NAME (mode
), named
);
3451 int int_nregs
, sse_nregs
;
3452 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3453 cum
->words
+= words
;
3454 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3456 cum
->nregs
-= int_nregs
;
3457 cum
->sse_nregs
-= sse_nregs
;
3458 cum
->regno
+= int_nregs
;
3459 cum
->sse_regno
+= sse_nregs
;
3462 cum
->words
+= words
;
3480 cum
->words
+= words
;
3481 cum
->nregs
-= words
;
3482 cum
->regno
+= words
;
3484 if (cum
->nregs
<= 0)
3492 if (cum
->float_in_sse
< 2)
3495 if (cum
->float_in_sse
< 1)
3506 if (!type
|| !AGGREGATE_TYPE_P (type
))
3508 cum
->sse_words
+= words
;
3509 cum
->sse_nregs
-= 1;
3510 cum
->sse_regno
+= 1;
3511 if (cum
->sse_nregs
<= 0)
3523 if (!type
|| !AGGREGATE_TYPE_P (type
))
3525 cum
->mmx_words
+= words
;
3526 cum
->mmx_nregs
-= 1;
3527 cum
->mmx_regno
+= 1;
3528 if (cum
->mmx_nregs
<= 0)
3539 /* Define where to put the arguments to a function.
3540 Value is zero to push the argument on the stack,
3541 or a hard register in which to store the argument.
3543 MODE is the argument's machine mode.
3544 TYPE is the data type of the argument (as a tree).
3545 This is null for libcalls where that information may
3547 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3548 the preceding args and about the function being called.
3549 NAMED is nonzero if this argument is a named parameter
3550 (otherwise it is an extra parameter matching an ellipsis). */
3553 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
3554 tree type
, int named
)
3556 enum machine_mode mode
= orig_mode
;
3559 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3560 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3561 static bool warnedsse
, warnedmmx
;
3563 /* To simplify the code below, represent vector types with a vector mode
3564 even if MMX/SSE are not active. */
3565 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
3566 mode
= type_natural_mode (type
);
3568 /* Handle a hidden AL argument containing number of registers for varargs
3569 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3571 if (mode
== VOIDmode
)
3574 return GEN_INT (cum
->maybe_vaarg
3575 ? (cum
->sse_nregs
< 0
3583 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
3585 &x86_64_int_parameter_registers
[cum
->regno
],
3590 /* For now, pass fp/complex values on the stack. */
3602 if (words
<= cum
->nregs
)
3604 int regno
= cum
->regno
;
3606 /* Fastcall allocates the first two DWORD (SImode) or
3607 smaller arguments to ECX and EDX. */
3610 if (mode
== BLKmode
|| mode
== DImode
)
3613 /* ECX not EAX is the first allocated register. */
3617 ret
= gen_rtx_REG (mode
, regno
);
3621 if (cum
->float_in_sse
< 2)
3624 if (cum
->float_in_sse
< 1)
3634 if (!type
|| !AGGREGATE_TYPE_P (type
))
3636 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
3639 warning (0, "SSE vector argument without SSE enabled "
3643 ret
= gen_reg_or_parallel (mode
, orig_mode
,
3644 cum
->sse_regno
+ FIRST_SSE_REG
);
3651 if (!type
|| !AGGREGATE_TYPE_P (type
))
3653 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
3656 warning (0, "MMX vector argument without MMX enabled "
3660 ret
= gen_reg_or_parallel (mode
, orig_mode
,
3661 cum
->mmx_regno
+ FIRST_MMX_REG
);
3666 if (TARGET_DEBUG_ARG
)
3669 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3670 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
3673 print_simple_rtl (stderr
, ret
);
3675 fprintf (stderr
, ", stack");
3677 fprintf (stderr
, " )\n");
3683 /* A C expression that indicates when an argument must be passed by
3684 reference. If nonzero for an argument, a copy of that argument is
3685 made in memory and a pointer to the argument is passed instead of
3686 the argument itself. The pointer is passed in whatever way is
3687 appropriate for passing a pointer to that type. */
3690 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
3691 enum machine_mode mode ATTRIBUTE_UNUSED
,
3692 tree type
, bool named ATTRIBUTE_UNUSED
)
3697 if (type
&& int_size_in_bytes (type
) == -1)
3699 if (TARGET_DEBUG_ARG
)
3700 fprintf (stderr
, "function_arg_pass_by_reference\n");
3707 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3708 ABI. Only called if TARGET_SSE. */
3710 contains_128bit_aligned_vector_p (tree type
)
3712 enum machine_mode mode
= TYPE_MODE (type
);
3713 if (SSE_REG_MODE_P (mode
)
3714 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
3716 if (TYPE_ALIGN (type
) < 128)
3719 if (AGGREGATE_TYPE_P (type
))
3721 /* Walk the aggregates recursively. */
3722 switch (TREE_CODE (type
))
3726 case QUAL_UNION_TYPE
:
3730 if (TYPE_BINFO (type
))
3732 tree binfo
, base_binfo
;
3735 for (binfo
= TYPE_BINFO (type
), i
= 0;
3736 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
3737 if (contains_128bit_aligned_vector_p
3738 (BINFO_TYPE (base_binfo
)))
3741 /* And now merge the fields of structure. */
3742 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3744 if (TREE_CODE (field
) == FIELD_DECL
3745 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
3752 /* Just for use if some languages passes arrays by value. */
3753 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
3764 /* Gives the alignment boundary, in bits, of an argument with the
3765 specified mode and type. */
3768 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
3772 align
= TYPE_ALIGN (type
);
3774 align
= GET_MODE_ALIGNMENT (mode
);
3775 if (align
< PARM_BOUNDARY
)
3776 align
= PARM_BOUNDARY
;
3779 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3780 make an exception for SSE modes since these require 128bit
3783 The handling here differs from field_alignment. ICC aligns MMX
3784 arguments to 4 byte boundaries, while structure fields are aligned
3785 to 8 byte boundaries. */
3787 align
= PARM_BOUNDARY
;
3790 if (!SSE_REG_MODE_P (mode
))
3791 align
= PARM_BOUNDARY
;
3795 if (!contains_128bit_aligned_vector_p (type
))
3796 align
= PARM_BOUNDARY
;
3804 /* Return true if N is a possible register number of function value. */
3806 ix86_function_value_regno_p (int regno
)
3809 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
3810 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
3814 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
3820 /* Define how to find the value returned by a function.
3821 VALTYPE is the data type of the value (as a tree).
3822 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3823 otherwise, FUNC is 0. */
3825 ix86_function_value (tree valtype
, tree fntype_or_decl
,
3826 bool outgoing ATTRIBUTE_UNUSED
)
3828 enum machine_mode natmode
= type_natural_mode (valtype
);
3832 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
3833 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
3834 x86_64_int_return_registers
, 0);
3835 /* For zero sized structures, construct_container return NULL, but we
3836 need to keep rest of compiler happy by returning meaningful value. */
3838 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
3843 tree fn
= NULL_TREE
, fntype
;
3845 && DECL_P (fntype_or_decl
))
3846 fn
= fntype_or_decl
;
3847 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
3848 return gen_rtx_REG (TYPE_MODE (valtype
),
3849 ix86_value_regno (natmode
, fn
, fntype
));
3853 /* Return true iff type is returned in memory. */
3855 ix86_return_in_memory (tree type
)
3857 int needed_intregs
, needed_sseregs
, size
;
3858 enum machine_mode mode
= type_natural_mode (type
);
3861 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
3863 if (mode
== BLKmode
)
3866 size
= int_size_in_bytes (type
);
3868 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
3871 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
3873 /* User-created vectors small enough to fit in EAX. */
3877 /* MMX/3dNow values are returned in MM0,
3878 except when it doesn't exits. */
3880 return (TARGET_MMX
? 0 : 1);
3882 /* SSE values are returned in XMM0, except when it doesn't exist. */
3884 return (TARGET_SSE
? 0 : 1);
3898 /* When returning SSE vector types, we have a choice of either
3899 (1) being abi incompatible with a -march switch, or
3900 (2) generating an error.
3901 Given no good solution, I think the safest thing is one warning.
3902 The user won't be able to use -Werror, but....
3904 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3905 called in response to actually generating a caller or callee that
3906 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3907 via aggregate_value_p for general type probing from tree-ssa. */
3910 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
3912 static bool warnedsse
, warnedmmx
;
3916 /* Look at the return type of the function, not the function type. */
3917 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
3919 if (!TARGET_SSE
&& !warnedsse
)
3922 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3925 warning (0, "SSE vector return without SSE enabled "
3930 if (!TARGET_MMX
&& !warnedmmx
)
3932 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
3935 warning (0, "MMX vector return without MMX enabled "
3944 /* Define how to find the value returned by a library function
3945 assuming the value has mode MODE. */
3947 ix86_libcall_value (enum machine_mode mode
)
3961 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
3964 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
3968 return gen_rtx_REG (mode
, 0);
3972 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
3975 /* Given a mode, return the register to use for a return value. */
3978 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
3980 gcc_assert (!TARGET_64BIT
);
3982 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3983 we prevent this case when mmx is not available. */
3984 if ((VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8))
3985 return FIRST_MMX_REG
;
3987 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3988 we prevent this case when sse is not available. */
3989 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3990 return FIRST_SSE_REG
;
3992 /* Decimal floating point values can go in %eax, unlike other float modes. */
3993 if (DECIMAL_FLOAT_MODE_P (mode
))
3996 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3997 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4000 /* Floating point return values in %st(0), except for local functions when
4001 SSE math is enabled or for functions with sseregparm attribute. */
4002 if ((func
|| fntype
)
4003 && (mode
== SFmode
|| mode
== DFmode
))
4005 int sse_level
= ix86_function_sseregparm (fntype
, func
);
4006 if ((sse_level
>= 1 && mode
== SFmode
)
4007 || (sse_level
== 2 && mode
== DFmode
))
4008 return FIRST_SSE_REG
;
4011 return FIRST_FLOAT_REG
;
4014 /* Create the va_list data type. */
4017 ix86_build_builtin_va_list (void)
4019 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4021 /* For i386 we use plain pointer to argument area. */
4023 return build_pointer_type (char_type_node
);
4025 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4026 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4028 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4029 unsigned_type_node
);
4030 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4031 unsigned_type_node
);
4032 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4034 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4037 va_list_gpr_counter_field
= f_gpr
;
4038 va_list_fpr_counter_field
= f_fpr
;
4040 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4041 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4042 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4043 DECL_FIELD_CONTEXT (f_sav
) = record
;
4045 TREE_CHAIN (record
) = type_decl
;
4046 TYPE_NAME (record
) = type_decl
;
4047 TYPE_FIELDS (record
) = f_gpr
;
4048 TREE_CHAIN (f_gpr
) = f_fpr
;
4049 TREE_CHAIN (f_fpr
) = f_ovf
;
4050 TREE_CHAIN (f_ovf
) = f_sav
;
4052 layout_type (record
);
4054 /* The correct type is an array type of one element. */
4055 return build_array_type (record
, build_index_type (size_zero_node
));
4058 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4061 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4062 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4065 CUMULATIVE_ARGS next_cum
;
4066 rtx save_area
= NULL_RTX
, mem
;
4079 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4082 /* Indicate to allocate space on the stack for varargs save area. */
4083 ix86_save_varrargs_registers
= 1;
4085 cfun
->stack_alignment_needed
= 128;
4087 fntype
= TREE_TYPE (current_function_decl
);
4088 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4089 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4090 != void_type_node
));
4092 /* For varargs, we do not want to skip the dummy va_dcl argument.
4093 For stdargs, we do want to skip the last named argument. */
4096 function_arg_advance (&next_cum
, mode
, type
, 1);
4099 save_area
= frame_pointer_rtx
;
4101 set
= get_varargs_alias_set ();
4103 for (i
= next_cum
.regno
;
4105 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4108 mem
= gen_rtx_MEM (Pmode
,
4109 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4110 MEM_NOTRAP_P (mem
) = 1;
4111 set_mem_alias_set (mem
, set
);
4112 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4113 x86_64_int_parameter_registers
[i
]));
4116 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4118 /* Now emit code to save SSE registers. The AX parameter contains number
4119 of SSE parameter registers used to call this function. We use
4120 sse_prologue_save insn template that produces computed jump across
4121 SSE saves. We need some preparation work to get this working. */
4123 label
= gen_label_rtx ();
4124 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4126 /* Compute address to jump to :
4127 label - 5*eax + nnamed_sse_arguments*5 */
4128 tmp_reg
= gen_reg_rtx (Pmode
);
4129 nsse_reg
= gen_reg_rtx (Pmode
);
4130 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4131 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4132 gen_rtx_MULT (Pmode
, nsse_reg
,
4134 if (next_cum
.sse_regno
)
4137 gen_rtx_CONST (DImode
,
4138 gen_rtx_PLUS (DImode
,
4140 GEN_INT (next_cum
.sse_regno
* 4))));
4142 emit_move_insn (nsse_reg
, label_ref
);
4143 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4145 /* Compute address of memory block we save into. We always use pointer
4146 pointing 127 bytes after first byte to store - this is needed to keep
4147 instruction size limited by 4 bytes. */
4148 tmp_reg
= gen_reg_rtx (Pmode
);
4149 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4150 plus_constant (save_area
,
4151 8 * REGPARM_MAX
+ 127)));
4152 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4153 MEM_NOTRAP_P (mem
) = 1;
4154 set_mem_alias_set (mem
, set
);
4155 set_mem_align (mem
, BITS_PER_WORD
);
4157 /* And finally do the dirty job! */
4158 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4159 GEN_INT (next_cum
.sse_regno
), label
));
4164 /* Implement va_start. */
4167 ix86_va_start (tree valist
, rtx nextarg
)
4169 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4170 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4171 tree gpr
, fpr
, ovf
, sav
, t
;
4174 /* Only 64bit target needs something special. */
4177 std_expand_builtin_va_start (valist
, nextarg
);
4181 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4182 f_fpr
= TREE_CHAIN (f_gpr
);
4183 f_ovf
= TREE_CHAIN (f_fpr
);
4184 f_sav
= TREE_CHAIN (f_ovf
);
4186 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4187 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4188 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4189 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4190 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4192 /* Count number of gp and fp argument registers used. */
4193 words
= current_function_args_info
.words
;
4194 n_gpr
= current_function_args_info
.regno
;
4195 n_fpr
= current_function_args_info
.sse_regno
;
4197 if (TARGET_DEBUG_ARG
)
4198 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4199 (int) words
, (int) n_gpr
, (int) n_fpr
);
4201 if (cfun
->va_list_gpr_size
)
4203 type
= TREE_TYPE (gpr
);
4204 t
= build2 (MODIFY_EXPR
, type
, gpr
,
4205 build_int_cst (type
, n_gpr
* 8));
4206 TREE_SIDE_EFFECTS (t
) = 1;
4207 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4210 if (cfun
->va_list_fpr_size
)
4212 type
= TREE_TYPE (fpr
);
4213 t
= build2 (MODIFY_EXPR
, type
, fpr
,
4214 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4215 TREE_SIDE_EFFECTS (t
) = 1;
4216 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4219 /* Find the overflow area. */
4220 type
= TREE_TYPE (ovf
);
4221 t
= make_tree (type
, virtual_incoming_args_rtx
);
4223 t
= build2 (PLUS_EXPR
, type
, t
,
4224 build_int_cst (type
, words
* UNITS_PER_WORD
));
4225 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
4226 TREE_SIDE_EFFECTS (t
) = 1;
4227 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4229 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4231 /* Find the register save area.
4232 Prologue of the function save it right above stack frame. */
4233 type
= TREE_TYPE (sav
);
4234 t
= make_tree (type
, frame_pointer_rtx
);
4235 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
4236 TREE_SIDE_EFFECTS (t
) = 1;
4237 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4241 /* Implement va_arg. */
4244 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4246 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4247 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4248 tree gpr
, fpr
, ovf
, sav
, t
;
4250 tree lab_false
, lab_over
= NULL_TREE
;
4255 enum machine_mode nat_mode
;
4257 /* Only 64bit target needs something special. */
4259 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4261 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4262 f_fpr
= TREE_CHAIN (f_gpr
);
4263 f_ovf
= TREE_CHAIN (f_fpr
);
4264 f_sav
= TREE_CHAIN (f_ovf
);
4266 valist
= build_va_arg_indirect_ref (valist
);
4267 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4268 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4269 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4270 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4272 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4274 type
= build_pointer_type (type
);
4275 size
= int_size_in_bytes (type
);
4276 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4278 nat_mode
= type_natural_mode (type
);
4279 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4280 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4282 /* Pull the value out of the saved registers. */
4284 addr
= create_tmp_var (ptr_type_node
, "addr");
4285 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4289 int needed_intregs
, needed_sseregs
;
4291 tree int_addr
, sse_addr
;
4293 lab_false
= create_artificial_label ();
4294 lab_over
= create_artificial_label ();
4296 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4298 need_temp
= (!REG_P (container
)
4299 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4300 || TYPE_ALIGN (type
) > 128));
4302 /* In case we are passing structure, verify that it is consecutive block
4303 on the register save area. If not we need to do moves. */
4304 if (!need_temp
&& !REG_P (container
))
4306 /* Verify that all registers are strictly consecutive */
4307 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4311 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4313 rtx slot
= XVECEXP (container
, 0, i
);
4314 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4315 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4323 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4325 rtx slot
= XVECEXP (container
, 0, i
);
4326 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4327 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4339 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4340 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4341 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4342 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4345 /* First ensure that we fit completely in registers. */
4348 t
= build_int_cst (TREE_TYPE (gpr
),
4349 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4350 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4351 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4352 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4353 gimplify_and_add (t
, pre_p
);
4357 t
= build_int_cst (TREE_TYPE (fpr
),
4358 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4360 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4361 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4362 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4363 gimplify_and_add (t
, pre_p
);
4366 /* Compute index to start of area used for integer regs. */
4369 /* int_addr = gpr + sav; */
4370 t
= fold_convert (ptr_type_node
, gpr
);
4371 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4372 t
= build2 (MODIFY_EXPR
, void_type_node
, int_addr
, t
);
4373 gimplify_and_add (t
, pre_p
);
4377 /* sse_addr = fpr + sav; */
4378 t
= fold_convert (ptr_type_node
, fpr
);
4379 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4380 t
= build2 (MODIFY_EXPR
, void_type_node
, sse_addr
, t
);
4381 gimplify_and_add (t
, pre_p
);
4386 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4389 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4390 t
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
4391 gimplify_and_add (t
, pre_p
);
4393 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4395 rtx slot
= XVECEXP (container
, 0, i
);
4396 rtx reg
= XEXP (slot
, 0);
4397 enum machine_mode mode
= GET_MODE (reg
);
4398 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4399 tree addr_type
= build_pointer_type (piece_type
);
4402 tree dest_addr
, dest
;
4404 if (SSE_REGNO_P (REGNO (reg
)))
4406 src_addr
= sse_addr
;
4407 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4411 src_addr
= int_addr
;
4412 src_offset
= REGNO (reg
) * 8;
4414 src_addr
= fold_convert (addr_type
, src_addr
);
4415 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
4416 size_int (src_offset
)));
4417 src
= build_va_arg_indirect_ref (src_addr
);
4419 dest_addr
= fold_convert (addr_type
, addr
);
4420 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4421 size_int (INTVAL (XEXP (slot
, 1)))));
4422 dest
= build_va_arg_indirect_ref (dest_addr
);
4424 t
= build2 (MODIFY_EXPR
, void_type_node
, dest
, src
);
4425 gimplify_and_add (t
, pre_p
);
4431 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4432 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4433 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
4434 gimplify_and_add (t
, pre_p
);
4438 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4439 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4440 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
4441 gimplify_and_add (t
, pre_p
);
4444 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4445 gimplify_and_add (t
, pre_p
);
4447 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4448 append_to_statement_list (t
, pre_p
);
4451 /* ... otherwise out of the overflow area. */
4453 /* Care for on-stack alignment if needed. */
4454 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4455 || integer_zerop (TYPE_SIZE (type
)))
4459 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4460 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4461 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4462 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4463 build_int_cst (TREE_TYPE (t
), -align
));
4465 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4467 t2
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
4468 gimplify_and_add (t2
, pre_p
);
4470 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4471 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4472 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
4473 gimplify_and_add (t
, pre_p
);
4477 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4478 append_to_statement_list (t
, pre_p
);
4481 ptrtype
= build_pointer_type (type
);
4482 addr
= fold_convert (ptrtype
, addr
);
4485 addr
= build_va_arg_indirect_ref (addr
);
4486 return build_va_arg_indirect_ref (addr
);
4489 /* Return nonzero if OPNUM's MEM should be matched
4490 in movabs* patterns. */
4493 ix86_check_movabs (rtx insn
, int opnum
)
4497 set
= PATTERN (insn
);
4498 if (GET_CODE (set
) == PARALLEL
)
4499 set
= XVECEXP (set
, 0, 0);
4500 gcc_assert (GET_CODE (set
) == SET
);
4501 mem
= XEXP (set
, opnum
);
4502 while (GET_CODE (mem
) == SUBREG
)
4503 mem
= SUBREG_REG (mem
);
4504 gcc_assert (GET_CODE (mem
) == MEM
);
4505 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
4508 /* Initialize the table of extra 80387 mathematical constants. */
4511 init_ext_80387_constants (void)
4513 static const char * cst
[5] =
4515 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4516 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4517 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4518 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4519 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4523 for (i
= 0; i
< 5; i
++)
4525 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4526 /* Ensure each constant is rounded to XFmode precision. */
4527 real_convert (&ext_80387_constants_table
[i
],
4528 XFmode
, &ext_80387_constants_table
[i
]);
4531 ext_80387_constants_init
= 1;
4534 /* Return true if the constant is something that can be loaded with
4535 a special instruction. */
4538 standard_80387_constant_p (rtx x
)
4540 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4543 if (x
== CONST0_RTX (GET_MODE (x
)))
4545 if (x
== CONST1_RTX (GET_MODE (x
)))
4548 /* For XFmode constants, try to find a special 80387 instruction when
4549 optimizing for size or on those CPUs that benefit from them. */
4550 if (GET_MODE (x
) == XFmode
4551 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
4556 if (! ext_80387_constants_init
)
4557 init_ext_80387_constants ();
4559 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4560 for (i
= 0; i
< 5; i
++)
4561 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
4568 /* Return the opcode of the special instruction to be used to load
4572 standard_80387_constant_opcode (rtx x
)
4574 switch (standard_80387_constant_p (x
))
4595 /* Return the CONST_DOUBLE representing the 80387 constant that is
4596 loaded by the specified special instruction. The argument IDX
4597 matches the return value from standard_80387_constant_p. */
4600 standard_80387_constant_rtx (int idx
)
4604 if (! ext_80387_constants_init
)
4605 init_ext_80387_constants ();
4621 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
4625 /* Return 1 if mode is a valid mode for sse. */
4627 standard_sse_mode_p (enum machine_mode mode
)
4644 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4647 standard_sse_constant_p (rtx x
)
4649 enum machine_mode mode
= GET_MODE (x
);
4651 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
4653 if (vector_all_ones_operand (x
, mode
)
4654 && standard_sse_mode_p (mode
))
4655 return TARGET_SSE2
? 2 : -1;
4660 /* Return the opcode of the special instruction to be used to load
4664 standard_sse_constant_opcode (rtx insn
, rtx x
)
4666 switch (standard_sse_constant_p (x
))
4669 if (get_attr_mode (insn
) == MODE_V4SF
)
4670 return "xorps\t%0, %0";
4671 else if (get_attr_mode (insn
) == MODE_V2DF
)
4672 return "xorpd\t%0, %0";
4674 return "pxor\t%0, %0";
4676 return "pcmpeqd\t%0, %0";
4681 /* Returns 1 if OP contains a symbol reference */
4684 symbolic_reference_mentioned_p (rtx op
)
4689 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4692 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4693 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4699 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4700 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4704 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4711 /* Return 1 if it is appropriate to emit `ret' instructions in the
4712 body of a function. Do this only if the epilogue is simple, needing a
4713 couple of insns. Prior to reloading, we can't tell how many registers
4714 must be saved, so return 0 then. Return 0 if there is no frame
4715 marker to de-allocate. */
4718 ix86_can_use_return_insn_p (void)
4720 struct ix86_frame frame
;
4722 if (! reload_completed
|| frame_pointer_needed
)
4725 /* Don't allow more than 32 pop, since that's all we can do
4726 with one instruction. */
4727 if (current_function_pops_args
4728 && current_function_args_size
>= 32768)
4731 ix86_compute_frame_layout (&frame
);
4732 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
4735 /* Value should be nonzero if functions must have frame pointers.
4736 Zero means the frame pointer need not be set up (and parms may
4737 be accessed via the stack pointer) in functions that seem suitable. */
4740 ix86_frame_pointer_required (void)
4742 /* If we accessed previous frames, then the generated code expects
4743 to be able to access the saved ebp value in our frame. */
4744 if (cfun
->machine
->accesses_prev_frame
)
4747 /* Several x86 os'es need a frame pointer for other reasons,
4748 usually pertaining to setjmp. */
4749 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4752 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4753 the frame pointer by default. Turn it back on now if we've not
4754 got a leaf function. */
4755 if (TARGET_OMIT_LEAF_FRAME_POINTER
4756 && (!current_function_is_leaf
4757 || ix86_current_function_calls_tls_descriptor
))
4760 if (current_function_profile
)
4766 /* Record that the current function accesses previous call frames. */
4769 ix86_setup_frame_addresses (void)
4771 cfun
->machine
->accesses_prev_frame
= 1;
4774 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4775 # define USE_HIDDEN_LINKONCE 1
4777 # define USE_HIDDEN_LINKONCE 0
4780 static int pic_labels_used
;
4782 /* Fills in the label name that should be used for a pc thunk for
4783 the given register. */
4786 get_pc_thunk_name (char name
[32], unsigned int regno
)
4788 if (USE_HIDDEN_LINKONCE
)
4789 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
4791 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
4795 /* This function generates code for -fpic that loads %ebx with
4796 the return address of the caller and then returns. */
4799 ix86_file_end (void)
4804 for (regno
= 0; regno
< 8; ++regno
)
4808 if (! ((pic_labels_used
>> regno
) & 1))
4811 get_pc_thunk_name (name
, regno
);
4816 switch_to_section (darwin_sections
[text_coal_section
]);
4817 fputs ("\t.weak_definition\t", asm_out_file
);
4818 assemble_name (asm_out_file
, name
);
4819 fputs ("\n\t.private_extern\t", asm_out_file
);
4820 assemble_name (asm_out_file
, name
);
4821 fputs ("\n", asm_out_file
);
4822 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4826 if (USE_HIDDEN_LINKONCE
)
4830 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
4832 TREE_PUBLIC (decl
) = 1;
4833 TREE_STATIC (decl
) = 1;
4834 DECL_ONE_ONLY (decl
) = 1;
4836 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4837 switch_to_section (get_named_section (decl
, NULL
, 0));
4839 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
4840 fputs ("\t.hidden\t", asm_out_file
);
4841 assemble_name (asm_out_file
, name
);
4842 fputc ('\n', asm_out_file
);
4843 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
4847 switch_to_section (text_section
);
4848 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4851 xops
[0] = gen_rtx_REG (SImode
, regno
);
4852 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4853 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4854 output_asm_insn ("ret", xops
);
4857 if (NEED_INDICATE_EXEC_STACK
)
4858 file_end_indicate_exec_stack ();
4861 /* Emit code for the SET_GOT patterns. */
4864 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
4869 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4871 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4873 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
4876 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4878 output_asm_insn ("call\t%a2", xops
);
4881 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4882 is what will be referenced by the Mach-O PIC subsystem. */
4884 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4887 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
4888 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4891 output_asm_insn ("pop{l}\t%0", xops
);
4896 get_pc_thunk_name (name
, REGNO (dest
));
4897 pic_labels_used
|= 1 << REGNO (dest
);
4899 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4900 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4901 output_asm_insn ("call\t%X2", xops
);
4902 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4903 is what will be referenced by the Mach-O PIC subsystem. */
4906 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4908 targetm
.asm_out
.internal_label (asm_out_file
, "L",
4909 CODE_LABEL_NUMBER (label
));
4916 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4917 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4919 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
4924 /* Generate an "push" pattern for input ARG. */
4929 return gen_rtx_SET (VOIDmode
,
4931 gen_rtx_PRE_DEC (Pmode
,
4932 stack_pointer_rtx
)),
4936 /* Return >= 0 if there is an unused call-clobbered register available
4937 for the entire function. */
4940 ix86_select_alt_pic_regnum (void)
4942 if (current_function_is_leaf
&& !current_function_profile
4943 && !ix86_current_function_calls_tls_descriptor
)
4946 for (i
= 2; i
>= 0; --i
)
4947 if (!regs_ever_live
[i
])
4951 return INVALID_REGNUM
;
4954 /* Return 1 if we need to save REGNO. */
4956 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
4958 if (pic_offset_table_rtx
4959 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4960 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4961 || current_function_profile
4962 || current_function_calls_eh_return
4963 || current_function_uses_const_pool
))
4965 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
4970 if (current_function_calls_eh_return
&& maybe_eh_return
)
4975 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4976 if (test
== INVALID_REGNUM
)
4983 if (cfun
->machine
->force_align_arg_pointer
4984 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
4987 return (regs_ever_live
[regno
]
4988 && !call_used_regs
[regno
]
4989 && !fixed_regs
[regno
]
4990 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4993 /* Return number of registers to be saved on the stack. */
4996 ix86_nsaved_regs (void)
5001 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5002 if (ix86_save_reg (regno
, true))
5007 /* Return the offset between two registers, one to be eliminated, and the other
5008 its replacement, at the start of a routine. */
5011 ix86_initial_elimination_offset (int from
, int to
)
5013 struct ix86_frame frame
;
5014 ix86_compute_frame_layout (&frame
);
5016 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5017 return frame
.hard_frame_pointer_offset
;
5018 else if (from
== FRAME_POINTER_REGNUM
5019 && to
== HARD_FRAME_POINTER_REGNUM
)
5020 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5023 gcc_assert (to
== STACK_POINTER_REGNUM
);
5025 if (from
== ARG_POINTER_REGNUM
)
5026 return frame
.stack_pointer_offset
;
5028 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5029 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5033 /* Fill structure ix86_frame about frame of currently computed function. */
5036 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5038 HOST_WIDE_INT total_size
;
5039 unsigned int stack_alignment_needed
;
5040 HOST_WIDE_INT offset
;
5041 unsigned int preferred_alignment
;
5042 HOST_WIDE_INT size
= get_frame_size ();
5044 frame
->nregs
= ix86_nsaved_regs ();
5047 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5048 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5050 /* During reload iteration the amount of registers saved can change.
5051 Recompute the value as needed. Do not recompute when amount of registers
5052 didn't change as reload does multiple calls to the function and does not
5053 expect the decision to change within single iteration. */
5055 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5057 int count
= frame
->nregs
;
5059 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5060 /* The fast prologue uses move instead of push to save registers. This
5061 is significantly longer, but also executes faster as modern hardware
5062 can execute the moves in parallel, but can't do that for push/pop.
5064 Be careful about choosing what prologue to emit: When function takes
5065 many instructions to execute we may use slow version as well as in
5066 case function is known to be outside hot spot (this is known with
5067 feedback only). Weight the size of function by number of registers
5068 to save as it is cheap to use one or two push instructions but very
5069 slow to use many of them. */
5071 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5072 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5073 || (flag_branch_probabilities
5074 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5075 cfun
->machine
->use_fast_prologue_epilogue
= false;
5077 cfun
->machine
->use_fast_prologue_epilogue
5078 = !expensive_function_p (count
);
5080 if (TARGET_PROLOGUE_USING_MOVE
5081 && cfun
->machine
->use_fast_prologue_epilogue
)
5082 frame
->save_regs_using_mov
= true;
5084 frame
->save_regs_using_mov
= false;
5087 /* Skip return address and saved base pointer. */
5088 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5090 frame
->hard_frame_pointer_offset
= offset
;
5092 /* Do some sanity checking of stack_alignment_needed and
5093 preferred_alignment, since i386 port is the only using those features
5094 that may break easily. */
5096 gcc_assert (!size
|| stack_alignment_needed
);
5097 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5098 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5099 gcc_assert (stack_alignment_needed
5100 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5102 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5103 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5105 /* Register save area */
5106 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5109 if (ix86_save_varrargs_registers
)
5111 offset
+= X86_64_VARARGS_SIZE
;
5112 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5115 frame
->va_arg_size
= 0;
5117 /* Align start of frame for local function. */
5118 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5119 & -stack_alignment_needed
) - offset
;
5121 offset
+= frame
->padding1
;
5123 /* Frame pointer points here. */
5124 frame
->frame_pointer_offset
= offset
;
5128 /* Add outgoing arguments area. Can be skipped if we eliminated
5129 all the function calls as dead code.
5130 Skipping is however impossible when function calls alloca. Alloca
5131 expander assumes that last current_function_outgoing_args_size
5132 of stack frame are unused. */
5133 if (ACCUMULATE_OUTGOING_ARGS
5134 && (!current_function_is_leaf
|| current_function_calls_alloca
5135 || ix86_current_function_calls_tls_descriptor
))
5137 offset
+= current_function_outgoing_args_size
;
5138 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5141 frame
->outgoing_arguments_size
= 0;
5143 /* Align stack boundary. Only needed if we're calling another function
5145 if (!current_function_is_leaf
|| current_function_calls_alloca
5146 || ix86_current_function_calls_tls_descriptor
)
5147 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5148 & -preferred_alignment
) - offset
;
5150 frame
->padding2
= 0;
5152 offset
+= frame
->padding2
;
5154 /* We've reached end of stack frame. */
5155 frame
->stack_pointer_offset
= offset
;
5157 /* Size prologue needs to allocate. */
5158 frame
->to_allocate
=
5159 (size
+ frame
->padding1
+ frame
->padding2
5160 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5162 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5163 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5164 frame
->save_regs_using_mov
= false;
5166 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5167 && current_function_is_leaf
5168 && !ix86_current_function_calls_tls_descriptor
)
5170 frame
->red_zone_size
= frame
->to_allocate
;
5171 if (frame
->save_regs_using_mov
)
5172 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5173 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5174 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5177 frame
->red_zone_size
= 0;
5178 frame
->to_allocate
-= frame
->red_zone_size
;
5179 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5181 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
5182 fprintf (stderr
, "size: %i\n", size
);
5183 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
5184 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
5185 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
5186 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
5187 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
5188 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
5189 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
5190 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
5191 frame
->hard_frame_pointer_offset
);
5192 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
5196 /* Emit code to save registers in the prologue. */
5199 ix86_emit_save_regs (void)
5204 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5205 if (ix86_save_reg (regno
, true))
5207 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5208 RTX_FRAME_RELATED_P (insn
) = 1;
5212 /* Emit code to save registers using MOV insns. First register
5213 is restored from POINTER + OFFSET. */
5215 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5220 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5221 if (ix86_save_reg (regno
, true))
5223 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5225 gen_rtx_REG (Pmode
, regno
));
5226 RTX_FRAME_RELATED_P (insn
) = 1;
5227 offset
+= UNITS_PER_WORD
;
5231 /* Expand prologue or epilogue stack adjustment.
5232 The pattern exist to put a dependency on all ebp-based memory accesses.
5233 STYLE should be negative if instructions should be marked as frame related,
5234 zero if %r11 register is live and cannot be freely used and positive
5238 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5243 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5244 else if (x86_64_immediate_operand (offset
, DImode
))
5245 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5249 /* r11 is used by indirect sibcall return as well, set before the
5250 epilogue and used after the epilogue. ATM indirect sibcall
5251 shouldn't be used together with huge frame sizes in one
5252 function because of the frame_size check in sibcall.c. */
5254 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5255 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5257 RTX_FRAME_RELATED_P (insn
) = 1;
5258 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5262 RTX_FRAME_RELATED_P (insn
) = 1;
5265 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5268 ix86_internal_arg_pointer (void)
5270 bool has_force_align_arg_pointer
=
5271 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5272 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5273 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5274 && DECL_NAME (current_function_decl
)
5275 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5276 && DECL_FILE_SCOPE_P (current_function_decl
))
5277 || ix86_force_align_arg_pointer
5278 || has_force_align_arg_pointer
)
5280 /* Nested functions can't realign the stack due to a register
5282 if (DECL_CONTEXT (current_function_decl
)
5283 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5285 if (ix86_force_align_arg_pointer
)
5286 warning (0, "-mstackrealign ignored for nested functions");
5287 if (has_force_align_arg_pointer
)
5288 error ("%s not supported for nested functions",
5289 ix86_force_align_arg_pointer_string
);
5290 return virtual_incoming_args_rtx
;
5292 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5293 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5296 return virtual_incoming_args_rtx
;
5299 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5300 This is called from dwarf2out.c to emit call frame instructions
5301 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5303 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5305 rtx unspec
= SET_SRC (pattern
);
5306 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5310 case UNSPEC_REG_SAVE
:
5311 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5312 SET_DEST (pattern
));
5314 case UNSPEC_DEF_CFA
:
5315 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5316 INTVAL (XVECEXP (unspec
, 0, 0)));
5323 /* Expand the prologue into a bunch of separate insns. */
5326 ix86_expand_prologue (void)
5330 struct ix86_frame frame
;
5331 HOST_WIDE_INT allocate
;
5333 ix86_compute_frame_layout (&frame
);
5335 if (cfun
->machine
->force_align_arg_pointer
)
5339 /* Grab the argument pointer. */
5340 x
= plus_constant (stack_pointer_rtx
, 4);
5341 y
= cfun
->machine
->force_align_arg_pointer
;
5342 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5343 RTX_FRAME_RELATED_P (insn
) = 1;
5345 /* The unwind info consists of two parts: install the fafp as the cfa,
5346 and record the fafp as the "save register" of the stack pointer.
5347 The later is there in order that the unwinder can see where it
5348 should restore the stack pointer across the and insn. */
5349 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5350 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5351 RTX_FRAME_RELATED_P (x
) = 1;
5352 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5354 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5355 RTX_FRAME_RELATED_P (y
) = 1;
5356 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5357 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5358 REG_NOTES (insn
) = x
;
5360 /* Align the stack. */
5361 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5364 /* And here we cheat like madmen with the unwind info. We force the
5365 cfa register back to sp+4, which is exactly what it was at the
5366 start of the function. Re-pushing the return address results in
5367 the return at the same spot relative to the cfa, and thus is
5368 correct wrt the unwind info. */
5369 x
= cfun
->machine
->force_align_arg_pointer
;
5370 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5371 insn
= emit_insn (gen_push (x
));
5372 RTX_FRAME_RELATED_P (insn
) = 1;
5375 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5376 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5377 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5378 REG_NOTES (insn
) = x
;
5381 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5382 slower on all targets. Also sdb doesn't like it. */
5384 if (frame_pointer_needed
)
5386 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5387 RTX_FRAME_RELATED_P (insn
) = 1;
5389 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5390 RTX_FRAME_RELATED_P (insn
) = 1;
5393 allocate
= frame
.to_allocate
;
5395 if (!frame
.save_regs_using_mov
)
5396 ix86_emit_save_regs ();
5398 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5400 /* When using red zone we may start register saving before allocating
5401 the stack frame saving one cycle of the prologue. */
5402 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5403 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5404 : stack_pointer_rtx
,
5405 -frame
.nregs
* UNITS_PER_WORD
);
5409 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5410 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5411 GEN_INT (-allocate
), -1);
5414 /* Only valid for Win32. */
5415 rtx eax
= gen_rtx_REG (SImode
, 0);
5416 bool eax_live
= ix86_eax_live_at_start_p ();
5419 gcc_assert (!TARGET_64BIT
);
5423 emit_insn (gen_push (eax
));
5427 emit_move_insn (eax
, GEN_INT (allocate
));
5429 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5430 RTX_FRAME_RELATED_P (insn
) = 1;
5431 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5432 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5433 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5434 t
, REG_NOTES (insn
));
5438 if (frame_pointer_needed
)
5439 t
= plus_constant (hard_frame_pointer_rtx
,
5442 - frame
.nregs
* UNITS_PER_WORD
);
5444 t
= plus_constant (stack_pointer_rtx
, allocate
);
5445 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5449 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5451 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5452 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5454 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5455 -frame
.nregs
* UNITS_PER_WORD
);
5458 pic_reg_used
= false;
5459 if (pic_offset_table_rtx
5460 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5461 || current_function_profile
))
5463 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5465 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5466 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5468 pic_reg_used
= true;
5474 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
5476 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5478 /* Even with accurate pre-reload life analysis, we can wind up
5479 deleting all references to the pic register after reload.
5480 Consider if cross-jumping unifies two sides of a branch
5481 controlled by a comparison vs the only read from a global.
5482 In which case, allow the set_got to be deleted, though we're
5483 too late to do anything about the ebx save in the prologue. */
5484 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5487 /* Prevent function calls from be scheduled before the call to mcount.
5488 In the pic_reg_used case, make sure that the got load isn't deleted. */
5489 if (current_function_profile
)
5490 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5493 /* Emit code to restore saved registers using MOV insns. First register
5494 is restored from POINTER + OFFSET. */
5496 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5497 int maybe_eh_return
)
5500 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5502 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5503 if (ix86_save_reg (regno
, maybe_eh_return
))
5505 /* Ensure that adjust_address won't be forced to produce pointer
5506 out of range allowed by x86-64 instruction set. */
5507 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5511 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5512 emit_move_insn (r11
, GEN_INT (offset
));
5513 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5514 base_address
= gen_rtx_MEM (Pmode
, r11
);
5517 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5518 adjust_address (base_address
, Pmode
, offset
));
5519 offset
+= UNITS_PER_WORD
;
5523 /* Restore function stack, frame, and registers. */
5526 ix86_expand_epilogue (int style
)
5529 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5530 struct ix86_frame frame
;
5531 HOST_WIDE_INT offset
;
5533 ix86_compute_frame_layout (&frame
);
5535 /* Calculate start of saved registers relative to ebp. Special care
5536 must be taken for the normal return case of a function using
5537 eh_return: the eax and edx registers are marked as saved, but not
5538 restored along this path. */
5539 offset
= frame
.nregs
;
5540 if (current_function_calls_eh_return
&& style
!= 2)
5542 offset
*= -UNITS_PER_WORD
;
5544 /* If we're only restoring one register and sp is not valid then
5545 using a move instruction to restore the register since it's
5546 less work than reloading sp and popping the register.
5548 The default code result in stack adjustment using add/lea instruction,
5549 while this code results in LEAVE instruction (or discrete equivalent),
5550 so it is profitable in some other cases as well. Especially when there
5551 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5552 and there is exactly one register to pop. This heuristic may need some
5553 tuning in future. */
5554 if ((!sp_valid
&& frame
.nregs
<= 1)
5555 || (TARGET_EPILOGUE_USING_MOVE
5556 && cfun
->machine
->use_fast_prologue_epilogue
5557 && (frame
.nregs
> 1 || frame
.to_allocate
))
5558 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
5559 || (frame_pointer_needed
&& TARGET_USE_LEAVE
5560 && cfun
->machine
->use_fast_prologue_epilogue
5561 && frame
.nregs
== 1)
5562 || current_function_calls_eh_return
)
5564 /* Restore registers. We can use ebp or esp to address the memory
5565 locations. If both are available, default to ebp, since offsets
5566 are known to be small. Only exception is esp pointing directly to the
5567 end of block of saved registers, where we may simplify addressing
5570 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
5571 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
5572 frame
.to_allocate
, style
== 2);
5574 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
5575 offset
, style
== 2);
5577 /* eh_return epilogues need %ecx added to the stack pointer. */
5580 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
5582 if (frame_pointer_needed
)
5584 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
5585 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
5586 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
5588 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
5589 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
5591 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
5596 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
5597 tmp
= plus_constant (tmp
, (frame
.to_allocate
5598 + frame
.nregs
* UNITS_PER_WORD
));
5599 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
5602 else if (!frame_pointer_needed
)
5603 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5604 GEN_INT (frame
.to_allocate
5605 + frame
.nregs
* UNITS_PER_WORD
),
5607 /* If not an i386, mov & pop is faster than "leave". */
5608 else if (TARGET_USE_LEAVE
|| optimize_size
5609 || !cfun
->machine
->use_fast_prologue_epilogue
)
5610 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5613 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5614 hard_frame_pointer_rtx
,
5617 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5619 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5624 /* First step is to deallocate the stack frame so that we can
5625 pop the registers. */
5628 gcc_assert (frame_pointer_needed
);
5629 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5630 hard_frame_pointer_rtx
,
5631 GEN_INT (offset
), style
);
5633 else if (frame
.to_allocate
)
5634 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5635 GEN_INT (frame
.to_allocate
), style
);
5637 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5638 if (ix86_save_reg (regno
, false))
5641 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
5643 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
5645 if (frame_pointer_needed
)
5647 /* Leave results in shorter dependency chains on CPUs that are
5648 able to grok it fast. */
5649 if (TARGET_USE_LEAVE
)
5650 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5651 else if (TARGET_64BIT
)
5652 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5654 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5658 if (cfun
->machine
->force_align_arg_pointer
)
5660 emit_insn (gen_addsi3 (stack_pointer_rtx
,
5661 cfun
->machine
->force_align_arg_pointer
,
5665 /* Sibcall epilogues don't want a return instruction. */
5669 if (current_function_pops_args
&& current_function_args_size
)
5671 rtx popc
= GEN_INT (current_function_pops_args
);
5673 /* i386 can only pop 64K bytes. If asked to pop more, pop
5674 return address, do explicit add, and jump indirectly to the
5677 if (current_function_pops_args
>= 65536)
5679 rtx ecx
= gen_rtx_REG (SImode
, 2);
5681 /* There is no "pascal" calling convention in 64bit ABI. */
5682 gcc_assert (!TARGET_64BIT
);
5684 emit_insn (gen_popsi1 (ecx
));
5685 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
5686 emit_jump_insn (gen_return_indirect_internal (ecx
));
5689 emit_jump_insn (gen_return_pop_internal (popc
));
5692 emit_jump_insn (gen_return_internal ());
5695 /* Reset from the function's potential modifications. */
5698 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
5699 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
5701 if (pic_offset_table_rtx
)
5702 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
5704 /* Mach-O doesn't support labels at the end of objects, so if
5705 it looks like we might want one, insert a NOP. */
5707 rtx insn
= get_last_insn ();
5710 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
5711 insn
= PREV_INSN (insn
);
5715 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
5716 fputs ("\tnop\n", file
);
5722 /* Extract the parts of an RTL expression that is a valid memory address
5723 for an instruction. Return 0 if the structure of the address is
5724 grossly off. Return -1 if the address contains ASHIFT, so it is not
5725 strictly valid, but still used for computing length of lea instruction. */
5728 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
5730 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
5731 rtx base_reg
, index_reg
;
5732 HOST_WIDE_INT scale
= 1;
5733 rtx scale_rtx
= NULL_RTX
;
5735 enum ix86_address_seg seg
= SEG_DEFAULT
;
5737 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
5739 else if (GET_CODE (addr
) == PLUS
)
5749 addends
[n
++] = XEXP (op
, 1);
5752 while (GET_CODE (op
) == PLUS
);
5757 for (i
= n
; i
>= 0; --i
)
5760 switch (GET_CODE (op
))
5765 index
= XEXP (op
, 0);
5766 scale_rtx
= XEXP (op
, 1);
5770 if (XINT (op
, 1) == UNSPEC_TP
5771 && TARGET_TLS_DIRECT_SEG_REFS
5772 && seg
== SEG_DEFAULT
)
5773 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
5802 else if (GET_CODE (addr
) == MULT
)
5804 index
= XEXP (addr
, 0); /* index*scale */
5805 scale_rtx
= XEXP (addr
, 1);
5807 else if (GET_CODE (addr
) == ASHIFT
)
5811 /* We're called for lea too, which implements ashift on occasion. */
5812 index
= XEXP (addr
, 0);
5813 tmp
= XEXP (addr
, 1);
5814 if (GET_CODE (tmp
) != CONST_INT
)
5816 scale
= INTVAL (tmp
);
5817 if ((unsigned HOST_WIDE_INT
) scale
> 3)
5823 disp
= addr
; /* displacement */
5825 /* Extract the integral value of scale. */
5828 if (GET_CODE (scale_rtx
) != CONST_INT
)
5830 scale
= INTVAL (scale_rtx
);
5833 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
5834 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
5836 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5837 if (base_reg
&& index_reg
&& scale
== 1
5838 && (index_reg
== arg_pointer_rtx
5839 || index_reg
== frame_pointer_rtx
5840 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
5843 tmp
= base
, base
= index
, index
= tmp
;
5844 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
5847 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5848 if ((base_reg
== hard_frame_pointer_rtx
5849 || base_reg
== frame_pointer_rtx
5850 || base_reg
== arg_pointer_rtx
) && !disp
)
5853 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5854 Avoid this by transforming to [%esi+0]. */
5855 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
5856 && base_reg
&& !index_reg
&& !disp
5858 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
5861 /* Special case: encode reg+reg instead of reg*2. */
5862 if (!base
&& index
&& scale
&& scale
== 2)
5863 base
= index
, base_reg
= index_reg
, scale
= 1;
5865 /* Special case: scaling cannot be encoded without base or displacement. */
5866 if (!base
&& !disp
&& index
&& scale
!= 1)
5878 /* Return cost of the memory address x.
5879 For i386, it is better to use a complex address than let gcc copy
5880 the address into a reg and make a new pseudo. But not if the address
5881 requires to two regs - that would mean more pseudos with longer
5884 ix86_address_cost (rtx x
)
5886 struct ix86_address parts
;
5888 int ok
= ix86_decompose_address (x
, &parts
);
5892 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
5893 parts
.base
= SUBREG_REG (parts
.base
);
5894 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
5895 parts
.index
= SUBREG_REG (parts
.index
);
5897 /* More complex memory references are better. */
5898 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
5900 if (parts
.seg
!= SEG_DEFAULT
)
5903 /* Attempt to minimize number of registers in the address. */
5905 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
5907 && (!REG_P (parts
.index
)
5908 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
5912 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
5914 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
5915 && parts
.base
!= parts
.index
)
5918 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5919 since it's predecode logic can't detect the length of instructions
5920 and it degenerates to vector decoded. Increase cost of such
5921 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5922 to split such addresses or even refuse such addresses at all.
5924 Following addressing modes are affected:
5929 The first and last case may be avoidable by explicitly coding the zero in
5930 memory address, but I don't have AMD-K6 machine handy to check this
5934 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5935 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5936 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
5942 /* If X is a machine specific address (i.e. a symbol or label being
5943 referenced as a displacement from the GOT implemented using an
5944 UNSPEC), then return the base term. Otherwise return X. */
5947 ix86_find_base_term (rtx x
)
5953 if (GET_CODE (x
) != CONST
)
5956 if (GET_CODE (term
) == PLUS
5957 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
5958 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
5959 term
= XEXP (term
, 0);
5960 if (GET_CODE (term
) != UNSPEC
5961 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
5964 term
= XVECEXP (term
, 0, 0);
5966 if (GET_CODE (term
) != SYMBOL_REF
5967 && GET_CODE (term
) != LABEL_REF
)
5973 term
= ix86_delegitimize_address (x
);
5975 if (GET_CODE (term
) != SYMBOL_REF
5976 && GET_CODE (term
) != LABEL_REF
)
5982 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5983 this is used for to form addresses to local data when -fPIC is in
5987 darwin_local_data_pic (rtx disp
)
5989 if (GET_CODE (disp
) == MINUS
)
5991 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
5992 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
5993 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
5995 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
5996 if (! strcmp (sym_name
, "<pic base>"))
6004 /* Determine if a given RTX is a valid constant. We already know this
6005 satisfies CONSTANT_P. */
6008 legitimate_constant_p (rtx x
)
6010 switch (GET_CODE (x
))
6015 if (GET_CODE (x
) == PLUS
)
6017 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6022 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6025 /* Only some unspecs are valid as "constants". */
6026 if (GET_CODE (x
) == UNSPEC
)
6027 switch (XINT (x
, 1))
6030 return TARGET_64BIT
;
6033 x
= XVECEXP (x
, 0, 0);
6034 return (GET_CODE (x
) == SYMBOL_REF
6035 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6037 x
= XVECEXP (x
, 0, 0);
6038 return (GET_CODE (x
) == SYMBOL_REF
6039 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6044 /* We must have drilled down to a symbol. */
6045 if (GET_CODE (x
) == LABEL_REF
)
6047 if (GET_CODE (x
) != SYMBOL_REF
)
6052 /* TLS symbols are never valid. */
6053 if (SYMBOL_REF_TLS_MODEL (x
))
6058 if (GET_MODE (x
) == TImode
6059 && x
!= CONST0_RTX (TImode
)
6065 if (x
== CONST0_RTX (GET_MODE (x
)))
6073 /* Otherwise we handle everything else in the move patterns. */
6077 /* Determine if it's legal to put X into the constant pool. This
6078 is not possible for the address of thread-local symbols, which
6079 is checked above. */
6082 ix86_cannot_force_const_mem (rtx x
)
6084 /* We can always put integral constants and vectors in memory. */
6085 switch (GET_CODE (x
))
6095 return !legitimate_constant_p (x
);
6098 /* Determine if a given RTX is a valid constant address. */
6101 constant_address_p (rtx x
)
6103 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6106 /* Nonzero if the constant value X is a legitimate general operand
6107 when generating PIC code. It is given that flag_pic is on and
6108 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6111 legitimate_pic_operand_p (rtx x
)
6115 switch (GET_CODE (x
))
6118 inner
= XEXP (x
, 0);
6119 if (GET_CODE (inner
) == PLUS
6120 && GET_CODE (XEXP (inner
, 1)) == CONST_INT
)
6121 inner
= XEXP (inner
, 0);
6123 /* Only some unspecs are valid as "constants". */
6124 if (GET_CODE (inner
) == UNSPEC
)
6125 switch (XINT (inner
, 1))
6128 return TARGET_64BIT
;
6130 x
= XVECEXP (inner
, 0, 0);
6131 return (GET_CODE (x
) == SYMBOL_REF
6132 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6140 return legitimate_pic_address_disp_p (x
);
6147 /* Determine if a given CONST RTX is a valid memory displacement
6151 legitimate_pic_address_disp_p (rtx disp
)
6155 /* In 64bit mode we can allow direct addresses of symbols and labels
6156 when they are not dynamic symbols. */
6159 rtx op0
= disp
, op1
;
6161 switch (GET_CODE (disp
))
6167 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6169 op0
= XEXP (XEXP (disp
, 0), 0);
6170 op1
= XEXP (XEXP (disp
, 0), 1);
6171 if (GET_CODE (op1
) != CONST_INT
6172 || INTVAL (op1
) >= 16*1024*1024
6173 || INTVAL (op1
) < -16*1024*1024)
6175 if (GET_CODE (op0
) == LABEL_REF
)
6177 if (GET_CODE (op0
) != SYMBOL_REF
)
6182 /* TLS references should always be enclosed in UNSPEC. */
6183 if (SYMBOL_REF_TLS_MODEL (op0
))
6185 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
))
6193 if (GET_CODE (disp
) != CONST
)
6195 disp
= XEXP (disp
, 0);
6199 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6200 of GOT tables. We should not need these anyway. */
6201 if (GET_CODE (disp
) != UNSPEC
6202 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6203 && XINT (disp
, 1) != UNSPEC_GOTOFF
))
6206 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6207 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6213 if (GET_CODE (disp
) == PLUS
)
6215 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
6217 disp
= XEXP (disp
, 0);
6221 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6224 if (GET_CODE (disp
) != UNSPEC
)
6227 switch (XINT (disp
, 1))
6232 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
6234 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6235 While ABI specify also 32bit relocation but we don't produce it in
6236 small PIC model at all. */
6237 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6238 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6240 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6242 case UNSPEC_GOTTPOFF
:
6243 case UNSPEC_GOTNTPOFF
:
6244 case UNSPEC_INDNTPOFF
:
6247 disp
= XVECEXP (disp
, 0, 0);
6248 return (GET_CODE (disp
) == SYMBOL_REF
6249 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6251 disp
= XVECEXP (disp
, 0, 0);
6252 return (GET_CODE (disp
) == SYMBOL_REF
6253 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6255 disp
= XVECEXP (disp
, 0, 0);
6256 return (GET_CODE (disp
) == SYMBOL_REF
6257 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6263 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6264 memory address for an instruction. The MODE argument is the machine mode
6265 for the MEM expression that wants to use this address.
6267 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6268 convert common non-canonical forms to canonical form so that they will
6272 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6274 struct ix86_address parts
;
6275 rtx base
, index
, disp
;
6276 HOST_WIDE_INT scale
;
6277 const char *reason
= NULL
;
6278 rtx reason_rtx
= NULL_RTX
;
6280 if (TARGET_DEBUG_ADDR
)
6283 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6284 GET_MODE_NAME (mode
), strict
);
6288 if (ix86_decompose_address (addr
, &parts
) <= 0)
6290 reason
= "decomposition failed";
6295 index
= parts
.index
;
6297 scale
= parts
.scale
;
6299 /* Validate base register.
6301 Don't allow SUBREG's that span more than a word here. It can lead to spill
6302 failures when the base is one word out of a two word structure, which is
6303 represented internally as a DImode int. */
6312 else if (GET_CODE (base
) == SUBREG
6313 && REG_P (SUBREG_REG (base
))
6314 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6316 reg
= SUBREG_REG (base
);
6319 reason
= "base is not a register";
6323 if (GET_MODE (base
) != Pmode
)
6325 reason
= "base is not in Pmode";
6329 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6330 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6332 reason
= "base is not valid";
6337 /* Validate index register.
6339 Don't allow SUBREG's that span more than a word here -- same as above. */
6348 else if (GET_CODE (index
) == SUBREG
6349 && REG_P (SUBREG_REG (index
))
6350 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6352 reg
= SUBREG_REG (index
);
6355 reason
= "index is not a register";
6359 if (GET_MODE (index
) != Pmode
)
6361 reason
= "index is not in Pmode";
6365 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6366 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6368 reason
= "index is not valid";
6373 /* Validate scale factor. */
6376 reason_rtx
= GEN_INT (scale
);
6379 reason
= "scale without index";
6383 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6385 reason
= "scale is not a valid multiplier";
6390 /* Validate displacement. */
6395 if (GET_CODE (disp
) == CONST
6396 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6397 switch (XINT (XEXP (disp
, 0), 1))
6399 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6400 used. While ABI specify also 32bit relocations, we don't produce
6401 them at all and use IP relative instead. */
6404 gcc_assert (flag_pic
);
6406 goto is_legitimate_pic
;
6407 reason
= "64bit address unspec";
6410 case UNSPEC_GOTPCREL
:
6411 gcc_assert (flag_pic
);
6412 goto is_legitimate_pic
;
6414 case UNSPEC_GOTTPOFF
:
6415 case UNSPEC_GOTNTPOFF
:
6416 case UNSPEC_INDNTPOFF
:
6422 reason
= "invalid address unspec";
6426 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
6428 && !machopic_operand_p (disp
)
6433 if (TARGET_64BIT
&& (index
|| base
))
6435 /* foo@dtpoff(%rX) is ok. */
6436 if (GET_CODE (disp
) != CONST
6437 || GET_CODE (XEXP (disp
, 0)) != PLUS
6438 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6439 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
6440 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6441 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6443 reason
= "non-constant pic memory reference";
6447 else if (! legitimate_pic_address_disp_p (disp
))
6449 reason
= "displacement is an invalid pic construct";
6453 /* This code used to verify that a symbolic pic displacement
6454 includes the pic_offset_table_rtx register.
6456 While this is good idea, unfortunately these constructs may
6457 be created by "adds using lea" optimization for incorrect
6466 This code is nonsensical, but results in addressing
6467 GOT table with pic_offset_table_rtx base. We can't
6468 just refuse it easily, since it gets matched by
6469 "addsi3" pattern, that later gets split to lea in the
6470 case output register differs from input. While this
6471 can be handled by separate addsi pattern for this case
6472 that never results in lea, this seems to be easier and
6473 correct fix for crash to disable this test. */
6475 else if (GET_CODE (disp
) != LABEL_REF
6476 && GET_CODE (disp
) != CONST_INT
6477 && (GET_CODE (disp
) != CONST
6478 || !legitimate_constant_p (disp
))
6479 && (GET_CODE (disp
) != SYMBOL_REF
6480 || !legitimate_constant_p (disp
)))
6482 reason
= "displacement is not constant";
6485 else if (TARGET_64BIT
6486 && !x86_64_immediate_operand (disp
, VOIDmode
))
6488 reason
= "displacement is out of range";
6493 /* Everything looks valid. */
6494 if (TARGET_DEBUG_ADDR
)
6495 fprintf (stderr
, "Success.\n");
6499 if (TARGET_DEBUG_ADDR
)
6501 fprintf (stderr
, "Error: %s\n", reason
);
6502 debug_rtx (reason_rtx
);
6507 /* Return a unique alias set for the GOT. */
6509 static HOST_WIDE_INT
6510 ix86_GOT_alias_set (void)
6512 static HOST_WIDE_INT set
= -1;
6514 set
= new_alias_set ();
6518 /* Return a legitimate reference for ORIG (an address) using the
6519 register REG. If REG is 0, a new pseudo is generated.
6521 There are two types of references that must be handled:
6523 1. Global data references must load the address from the GOT, via
6524 the PIC reg. An insn is emitted to do this load, and the reg is
6527 2. Static data references, constant pool addresses, and code labels
6528 compute the address as an offset from the GOT, whose base is in
6529 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6530 differentiate them from global data objects. The returned
6531 address is the PIC reg + an unspec constant.
6533 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6534 reg also appears in the address. */
6537 legitimize_pic_address (rtx orig
, rtx reg
)
6545 reg
= gen_reg_rtx (Pmode
);
6546 /* Use the generic Mach-O PIC machinery. */
6547 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
6550 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
6552 else if (TARGET_64BIT
6553 && ix86_cmodel
!= CM_SMALL_PIC
6554 && local_symbolic_operand (addr
, Pmode
))
6557 /* This symbol may be referenced via a displacement from the PIC
6558 base address (@GOTOFF). */
6560 if (reload_in_progress
)
6561 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6562 if (GET_CODE (addr
) == CONST
)
6563 addr
= XEXP (addr
, 0);
6564 if (GET_CODE (addr
) == PLUS
)
6566 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6567 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6570 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6571 new = gen_rtx_CONST (Pmode
, new);
6573 tmpreg
= gen_reg_rtx (Pmode
);
6576 emit_move_insn (tmpreg
, new);
6580 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
6581 tmpreg
, 1, OPTAB_DIRECT
);
6584 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
6586 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
6588 /* This symbol may be referenced via a displacement from the PIC
6589 base address (@GOTOFF). */
6591 if (reload_in_progress
)
6592 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6593 if (GET_CODE (addr
) == CONST
)
6594 addr
= XEXP (addr
, 0);
6595 if (GET_CODE (addr
) == PLUS
)
6597 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6598 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6601 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6602 new = gen_rtx_CONST (Pmode
, new);
6603 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6607 emit_move_insn (reg
, new);
6611 else if (GET_CODE (addr
) == SYMBOL_REF
)
6615 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
6616 new = gen_rtx_CONST (Pmode
, new);
6617 new = gen_const_mem (Pmode
, new);
6618 set_mem_alias_set (new, ix86_GOT_alias_set ());
6621 reg
= gen_reg_rtx (Pmode
);
6622 /* Use directly gen_movsi, otherwise the address is loaded
6623 into register for CSE. We don't want to CSE this addresses,
6624 instead we CSE addresses from the GOT table, so skip this. */
6625 emit_insn (gen_movsi (reg
, new));
6630 /* This symbol must be referenced via a load from the
6631 Global Offset Table (@GOT). */
6633 if (reload_in_progress
)
6634 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6635 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
6636 new = gen_rtx_CONST (Pmode
, new);
6637 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6638 new = gen_const_mem (Pmode
, new);
6639 set_mem_alias_set (new, ix86_GOT_alias_set ());
6642 reg
= gen_reg_rtx (Pmode
);
6643 emit_move_insn (reg
, new);
6649 if (GET_CODE (addr
) == CONST_INT
6650 && !x86_64_immediate_operand (addr
, VOIDmode
))
6654 emit_move_insn (reg
, addr
);
6658 new = force_reg (Pmode
, addr
);
6660 else if (GET_CODE (addr
) == CONST
)
6662 addr
= XEXP (addr
, 0);
6664 /* We must match stuff we generate before. Assume the only
6665 unspecs that can get here are ours. Not that we could do
6666 anything with them anyway.... */
6667 if (GET_CODE (addr
) == UNSPEC
6668 || (GET_CODE (addr
) == PLUS
6669 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
6671 gcc_assert (GET_CODE (addr
) == PLUS
);
6673 if (GET_CODE (addr
) == PLUS
)
6675 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
6677 /* Check first to see if this is a constant offset from a @GOTOFF
6678 symbol reference. */
6679 if (local_symbolic_operand (op0
, Pmode
)
6680 && GET_CODE (op1
) == CONST_INT
)
6684 if (reload_in_progress
)
6685 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6686 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
6688 new = gen_rtx_PLUS (Pmode
, new, op1
);
6689 new = gen_rtx_CONST (Pmode
, new);
6690 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6694 emit_move_insn (reg
, new);
6700 if (INTVAL (op1
) < -16*1024*1024
6701 || INTVAL (op1
) >= 16*1024*1024)
6703 if (!x86_64_immediate_operand (op1
, Pmode
))
6704 op1
= force_reg (Pmode
, op1
);
6705 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
6711 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
6712 new = legitimize_pic_address (XEXP (addr
, 1),
6713 base
== reg
? NULL_RTX
: reg
);
6715 if (GET_CODE (new) == CONST_INT
)
6716 new = plus_constant (base
, INTVAL (new));
6719 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
6721 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
6722 new = XEXP (new, 1);
6724 new = gen_rtx_PLUS (Pmode
, base
, new);
6732 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6735 get_thread_pointer (int to_reg
)
6739 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
6743 reg
= gen_reg_rtx (Pmode
);
6744 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
6745 insn
= emit_insn (insn
);
6750 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6751 false if we expect this to be used for a memory address and true if
6752 we expect to load the address into a register. */
6755 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
6757 rtx dest
, base
, off
, pic
, tp
;
6762 case TLS_MODEL_GLOBAL_DYNAMIC
:
6763 dest
= gen_reg_rtx (Pmode
);
6764 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
6766 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
6768 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
6771 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
6772 insns
= get_insns ();
6775 emit_libcall_block (insns
, dest
, rax
, x
);
6777 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
6778 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
6780 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
6782 if (TARGET_GNU2_TLS
)
6784 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
6786 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
6790 case TLS_MODEL_LOCAL_DYNAMIC
:
6791 base
= gen_reg_rtx (Pmode
);
6792 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
6794 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
6796 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
6799 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
6800 insns
= get_insns ();
6803 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
6804 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
6805 emit_libcall_block (insns
, base
, rax
, note
);
6807 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
6808 emit_insn (gen_tls_local_dynamic_base_64 (base
));
6810 emit_insn (gen_tls_local_dynamic_base_32 (base
));
6812 if (TARGET_GNU2_TLS
)
6814 rtx x
= ix86_tls_module_base ();
6816 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
6817 gen_rtx_MINUS (Pmode
, x
, tp
));
6820 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
6821 off
= gen_rtx_CONST (Pmode
, off
);
6823 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
6825 if (TARGET_GNU2_TLS
)
6827 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
6829 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
6834 case TLS_MODEL_INITIAL_EXEC
:
6838 type
= UNSPEC_GOTNTPOFF
;
6842 if (reload_in_progress
)
6843 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6844 pic
= pic_offset_table_rtx
;
6845 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
6847 else if (!TARGET_ANY_GNU_TLS
)
6849 pic
= gen_reg_rtx (Pmode
);
6850 emit_insn (gen_set_got (pic
));
6851 type
= UNSPEC_GOTTPOFF
;
6856 type
= UNSPEC_INDNTPOFF
;
6859 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
6860 off
= gen_rtx_CONST (Pmode
, off
);
6862 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
6863 off
= gen_const_mem (Pmode
, off
);
6864 set_mem_alias_set (off
, ix86_GOT_alias_set ());
6866 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
6868 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6869 off
= force_reg (Pmode
, off
);
6870 return gen_rtx_PLUS (Pmode
, base
, off
);
6874 base
= get_thread_pointer (true);
6875 dest
= gen_reg_rtx (Pmode
);
6876 emit_insn (gen_subsi3 (dest
, base
, off
));
6880 case TLS_MODEL_LOCAL_EXEC
:
6881 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
6882 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
6883 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
6884 off
= gen_rtx_CONST (Pmode
, off
);
6886 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
6888 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6889 return gen_rtx_PLUS (Pmode
, base
, off
);
6893 base
= get_thread_pointer (true);
6894 dest
= gen_reg_rtx (Pmode
);
6895 emit_insn (gen_subsi3 (dest
, base
, off
));
6906 /* Try machine-dependent ways of modifying an illegitimate address
6907 to be legitimate. If we find one, return the new, valid address.
6908 This macro is used in only one place: `memory_address' in explow.c.
6910 OLDX is the address as it was before break_out_memory_refs was called.
6911 In some cases it is useful to look at this to decide what needs to be done.
6913 MODE and WIN are passed so that this macro can use
6914 GO_IF_LEGITIMATE_ADDRESS.
6916 It is always safe for this macro to do nothing. It exists to recognize
6917 opportunities to optimize the output.
6919 For the 80386, we handle X+REG by loading X into a register R and
6920 using R+REG. R will go in a general reg and indexing will be used.
6921 However, if REG is a broken-out memory address or multiplication,
6922 nothing needs to be done because REG can certainly go in a general reg.
6924 When -fpic is used, special handling is needed for symbolic references.
6925 See comments by legitimize_pic_address in i386.c for details. */
6928 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
6933 if (TARGET_DEBUG_ADDR
)
6935 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6936 GET_MODE_NAME (mode
));
6940 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
6942 return legitimize_tls_address (x
, log
, false);
6943 if (GET_CODE (x
) == CONST
6944 && GET_CODE (XEXP (x
, 0)) == PLUS
6945 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
6946 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
6948 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
6949 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
6952 if (flag_pic
&& SYMBOLIC_CONST (x
))
6953 return legitimize_pic_address (x
, 0);
6955 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6956 if (GET_CODE (x
) == ASHIFT
6957 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6958 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
6961 log
= INTVAL (XEXP (x
, 1));
6962 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
6963 GEN_INT (1 << log
));
6966 if (GET_CODE (x
) == PLUS
)
6968 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6970 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
6971 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
6972 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
6975 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
6976 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
6977 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
6978 GEN_INT (1 << log
));
6981 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
6982 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
6983 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
6986 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
6987 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
6988 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
6989 GEN_INT (1 << log
));
6992 /* Put multiply first if it isn't already. */
6993 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6995 rtx tmp
= XEXP (x
, 0);
6996 XEXP (x
, 0) = XEXP (x
, 1);
7001 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7002 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7003 created by virtual register instantiation, register elimination, and
7004 similar optimizations. */
7005 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7008 x
= gen_rtx_PLUS (Pmode
,
7009 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7010 XEXP (XEXP (x
, 1), 0)),
7011 XEXP (XEXP (x
, 1), 1));
7015 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7016 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7017 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7018 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7019 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7020 && CONSTANT_P (XEXP (x
, 1)))
7023 rtx other
= NULL_RTX
;
7025 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7027 constant
= XEXP (x
, 1);
7028 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7030 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
7032 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7033 other
= XEXP (x
, 1);
7041 x
= gen_rtx_PLUS (Pmode
,
7042 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7043 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7044 plus_constant (other
, INTVAL (constant
)));
7048 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7051 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7054 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7057 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7060 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7064 && GET_CODE (XEXP (x
, 1)) == REG
7065 && GET_CODE (XEXP (x
, 0)) == REG
)
7068 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7071 x
= legitimize_pic_address (x
, 0);
7074 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7077 if (GET_CODE (XEXP (x
, 0)) == REG
)
7079 rtx temp
= gen_reg_rtx (Pmode
);
7080 rtx val
= force_operand (XEXP (x
, 1), temp
);
7082 emit_move_insn (temp
, val
);
7088 else if (GET_CODE (XEXP (x
, 1)) == REG
)
7090 rtx temp
= gen_reg_rtx (Pmode
);
7091 rtx val
= force_operand (XEXP (x
, 0), temp
);
7093 emit_move_insn (temp
, val
);
7103 /* Print an integer constant expression in assembler syntax. Addition
7104 and subtraction are the only arithmetic that may appear in these
7105 expressions. FILE is the stdio stream to write to, X is the rtx, and
7106 CODE is the operand print code from the output string. */
7109 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7113 switch (GET_CODE (x
))
7116 gcc_assert (flag_pic
);
7121 output_addr_const (file
, x
);
7122 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7123 fputs ("@PLT", file
);
7130 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7131 assemble_name (asm_out_file
, buf
);
7135 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7139 /* This used to output parentheses around the expression,
7140 but that does not work on the 386 (either ATT or BSD assembler). */
7141 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7145 if (GET_MODE (x
) == VOIDmode
)
7147 /* We can use %d if the number is <32 bits and positive. */
7148 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7149 fprintf (file
, "0x%lx%08lx",
7150 (unsigned long) CONST_DOUBLE_HIGH (x
),
7151 (unsigned long) CONST_DOUBLE_LOW (x
));
7153 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7156 /* We can't handle floating point constants;
7157 PRINT_OPERAND must handle them. */
7158 output_operand_lossage ("floating constant misused");
7162 /* Some assemblers need integer constants to appear first. */
7163 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
7165 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7167 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7171 gcc_assert (GET_CODE (XEXP (x
, 1)) == CONST_INT
);
7172 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7174 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7180 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7181 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7183 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7185 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7189 gcc_assert (XVECLEN (x
, 0) == 1);
7190 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7191 switch (XINT (x
, 1))
7194 fputs ("@GOT", file
);
7197 fputs ("@GOTOFF", file
);
7199 case UNSPEC_GOTPCREL
:
7200 fputs ("@GOTPCREL(%rip)", file
);
7202 case UNSPEC_GOTTPOFF
:
7203 /* FIXME: This might be @TPOFF in Sun ld too. */
7204 fputs ("@GOTTPOFF", file
);
7207 fputs ("@TPOFF", file
);
7211 fputs ("@TPOFF", file
);
7213 fputs ("@NTPOFF", file
);
7216 fputs ("@DTPOFF", file
);
7218 case UNSPEC_GOTNTPOFF
:
7220 fputs ("@GOTTPOFF(%rip)", file
);
7222 fputs ("@GOTNTPOFF", file
);
7224 case UNSPEC_INDNTPOFF
:
7225 fputs ("@INDNTPOFF", file
);
7228 output_operand_lossage ("invalid UNSPEC as operand");
7234 output_operand_lossage ("invalid expression as operand");
7238 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7239 We need to emit DTP-relative relocations. */
7242 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7244 fputs (ASM_LONG
, file
);
7245 output_addr_const (file
, x
);
7246 fputs ("@DTPOFF", file
);
7252 fputs (", 0", file
);
7259 /* In the name of slightly smaller debug output, and to cater to
7260 general assembler lossage, recognize PIC+GOTOFF and turn it back
7261 into a direct symbol reference.
7263 On Darwin, this is necessary to avoid a crash, because Darwin
7264 has a different PIC label for each routine but the DWARF debugging
7265 information is not associated with any particular routine, so it's
7266 necessary to remove references to the PIC label from RTL stored by
7267 the DWARF output code. */
7270 ix86_delegitimize_address (rtx orig_x
)
7273 /* reg_addend is NULL or a multiple of some register. */
7274 rtx reg_addend
= NULL_RTX
;
7275 /* const_addend is NULL or a const_int. */
7276 rtx const_addend
= NULL_RTX
;
7277 /* This is the result, or NULL. */
7278 rtx result
= NULL_RTX
;
7280 if (GET_CODE (x
) == MEM
)
7285 if (GET_CODE (x
) != CONST
7286 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7287 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7288 || GET_CODE (orig_x
) != MEM
)
7290 return XVECEXP (XEXP (x
, 0), 0, 0);
7293 if (GET_CODE (x
) != PLUS
7294 || GET_CODE (XEXP (x
, 1)) != CONST
)
7297 if (GET_CODE (XEXP (x
, 0)) == REG
7298 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7299 /* %ebx + GOT/GOTOFF */
7301 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7303 /* %ebx + %reg * scale + GOT/GOTOFF */
7304 reg_addend
= XEXP (x
, 0);
7305 if (GET_CODE (XEXP (reg_addend
, 0)) == REG
7306 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7307 reg_addend
= XEXP (reg_addend
, 1);
7308 else if (GET_CODE (XEXP (reg_addend
, 1)) == REG
7309 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7310 reg_addend
= XEXP (reg_addend
, 0);
7313 if (GET_CODE (reg_addend
) != REG
7314 && GET_CODE (reg_addend
) != MULT
7315 && GET_CODE (reg_addend
) != ASHIFT
)
7321 x
= XEXP (XEXP (x
, 1), 0);
7322 if (GET_CODE (x
) == PLUS
7323 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7325 const_addend
= XEXP (x
, 1);
7329 if (GET_CODE (x
) == UNSPEC
7330 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
7331 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
7332 result
= XVECEXP (x
, 0, 0);
7334 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7335 && GET_CODE (orig_x
) != MEM
)
7336 result
= XEXP (x
, 0);
7342 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7344 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7349 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7354 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7356 enum rtx_code second_code
, bypass_code
;
7357 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7358 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7359 code
= ix86_fp_compare_code_to_integer (code
);
7363 code
= reverse_condition (code
);
7374 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7378 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7379 Those same assemblers have the same but opposite lossage on cmov. */
7380 gcc_assert (mode
== CCmode
);
7381 suffix
= fp
? "nbe" : "a";
7401 gcc_assert (mode
== CCmode
);
7423 gcc_assert (mode
== CCmode
);
7424 suffix
= fp
? "nb" : "ae";
7427 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
7431 gcc_assert (mode
== CCmode
);
7435 suffix
= fp
? "u" : "p";
7438 suffix
= fp
? "nu" : "np";
7443 fputs (suffix
, file
);
7446 /* Print the name of register X to FILE based on its machine mode and number.
7447 If CODE is 'w', pretend the mode is HImode.
7448 If CODE is 'b', pretend the mode is QImode.
7449 If CODE is 'k', pretend the mode is SImode.
7450 If CODE is 'q', pretend the mode is DImode.
7451 If CODE is 'h', pretend the reg is the 'high' byte register.
7452 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7455 print_reg (rtx x
, int code
, FILE *file
)
7457 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
7458 && REGNO (x
) != FRAME_POINTER_REGNUM
7459 && REGNO (x
) != FLAGS_REG
7460 && REGNO (x
) != FPSR_REG
);
7462 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7465 if (code
== 'w' || MMX_REG_P (x
))
7467 else if (code
== 'b')
7469 else if (code
== 'k')
7471 else if (code
== 'q')
7473 else if (code
== 'y')
7475 else if (code
== 'h')
7478 code
= GET_MODE_SIZE (GET_MODE (x
));
7480 /* Irritatingly, AMD extended registers use different naming convention
7481 from the normal registers. */
7482 if (REX_INT_REG_P (x
))
7484 gcc_assert (TARGET_64BIT
);
7488 error ("extended registers have no high halves");
7491 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7494 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7497 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7500 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7503 error ("unsupported operand size for extended register");
7511 if (STACK_TOP_P (x
))
7513 fputs ("st(0)", file
);
7520 if (! ANY_FP_REG_P (x
))
7521 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
7526 fputs (hi_reg_name
[REGNO (x
)], file
);
7529 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
7531 fputs (qi_reg_name
[REGNO (x
)], file
);
7534 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
7536 fputs (qi_high_reg_name
[REGNO (x
)], file
);
7543 /* Locate some local-dynamic symbol still in use by this function
7544 so that we can print its name in some tls_local_dynamic_base
7548 get_some_local_dynamic_name (void)
7552 if (cfun
->machine
->some_ld_name
)
7553 return cfun
->machine
->some_ld_name
;
7555 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7557 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
7558 return cfun
->machine
->some_ld_name
;
7564 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
7568 if (GET_CODE (x
) == SYMBOL_REF
7569 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
7571 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
7579 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7580 C -- print opcode suffix for set/cmov insn.
7581 c -- like C, but print reversed condition
7582 F,f -- likewise, but for floating-point.
7583 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7585 R -- print the prefix for register names.
7586 z -- print the opcode suffix for the size of the current operand.
7587 * -- print a star (in certain assembler syntax)
7588 A -- print an absolute memory reference.
7589 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7590 s -- print a shift double count, followed by the assemblers argument
7592 b -- print the QImode name of the register for the indicated operand.
7593 %b0 would print %al if operands[0] is reg 0.
7594 w -- likewise, print the HImode name of the register.
7595 k -- likewise, print the SImode name of the register.
7596 q -- likewise, print the DImode name of the register.
7597 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7598 y -- print "st(0)" instead of "st" as a register.
7599 D -- print condition for SSE cmp instruction.
7600 P -- if PIC, print an @PLT suffix.
7601 X -- don't print any sort of PIC '@' suffix for a symbol.
7602 & -- print some in-use local-dynamic symbol name.
7603 H -- print a memory address offset by 8; used for sse high-parts
7607 print_operand (FILE *file
, rtx x
, int code
)
7614 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7619 assemble_name (file
, get_some_local_dynamic_name ());
7623 switch (ASSEMBLER_DIALECT
)
7630 /* Intel syntax. For absolute addresses, registers should not
7631 be surrounded by braces. */
7632 if (GET_CODE (x
) != REG
)
7635 PRINT_OPERAND (file
, x
, 0);
7645 PRINT_OPERAND (file
, x
, 0);
7650 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7655 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7660 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7665 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7670 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7675 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7680 /* 387 opcodes don't get size suffixes if the operands are
7682 if (STACK_REG_P (x
))
7685 /* Likewise if using Intel opcodes. */
7686 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7689 /* This is the size of op from size of operand. */
7690 switch (GET_MODE_SIZE (GET_MODE (x
)))
7693 #ifdef HAVE_GAS_FILDS_FISTS
7699 if (GET_MODE (x
) == SFmode
)
7714 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
7716 #ifdef GAS_MNEMONICS
7742 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
7744 PRINT_OPERAND (file
, x
, 0);
7750 /* Little bit of braindamage here. The SSE compare instructions
7751 does use completely different names for the comparisons that the
7752 fp conditional moves. */
7753 switch (GET_CODE (x
))
7768 fputs ("unord", file
);
7772 fputs ("neq", file
);
7776 fputs ("nlt", file
);
7780 fputs ("nle", file
);
7783 fputs ("ord", file
);
7790 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7791 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7793 switch (GET_MODE (x
))
7795 case HImode
: putc ('w', file
); break;
7797 case SFmode
: putc ('l', file
); break;
7799 case DFmode
: putc ('q', file
); break;
7800 default: gcc_unreachable ();
7807 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
7810 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7811 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7814 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
7817 /* Like above, but reverse condition */
7819 /* Check to see if argument to %c is really a constant
7820 and not a condition code which needs to be reversed. */
7821 if (!COMPARISON_P (x
))
7823 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7826 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
7829 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7830 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7833 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
7837 /* It doesn't actually matter what mode we use here, as we're
7838 only going to use this for printing. */
7839 x
= adjust_address_nv (x
, DImode
, 8);
7846 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
7849 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
7852 int pred_val
= INTVAL (XEXP (x
, 0));
7854 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
7855 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
7857 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
7858 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
7860 /* Emit hints only in the case default branch prediction
7861 heuristics would fail. */
7862 if (taken
!= cputaken
)
7864 /* We use 3e (DS) prefix for taken branches and
7865 2e (CS) prefix for not taken branches. */
7867 fputs ("ds ; ", file
);
7869 fputs ("cs ; ", file
);
7876 output_operand_lossage ("invalid operand code '%c'", code
);
7880 if (GET_CODE (x
) == REG
)
7881 print_reg (x
, code
, file
);
7883 else if (GET_CODE (x
) == MEM
)
7885 /* No `byte ptr' prefix for call instructions. */
7886 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
7889 switch (GET_MODE_SIZE (GET_MODE (x
)))
7891 case 1: size
= "BYTE"; break;
7892 case 2: size
= "WORD"; break;
7893 case 4: size
= "DWORD"; break;
7894 case 8: size
= "QWORD"; break;
7895 case 12: size
= "XWORD"; break;
7896 case 16: size
= "XMMWORD"; break;
7901 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7904 else if (code
== 'w')
7906 else if (code
== 'k')
7910 fputs (" PTR ", file
);
7914 /* Avoid (%rip) for call operands. */
7915 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
7916 && GET_CODE (x
) != CONST_INT
)
7917 output_addr_const (file
, x
);
7918 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
7919 output_operand_lossage ("invalid constraints for operand");
7924 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
7929 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7930 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
7932 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7934 fprintf (file
, "0x%08lx", l
);
7937 /* These float cases don't actually occur as immediate operands. */
7938 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
7942 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7943 fprintf (file
, "%s", dstr
);
7946 else if (GET_CODE (x
) == CONST_DOUBLE
7947 && GET_MODE (x
) == XFmode
)
7951 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7952 fprintf (file
, "%s", dstr
);
7957 /* We have patterns that allow zero sets of memory, for instance.
7958 In 64-bit mode, we should probably support all 8-byte vectors,
7959 since we can in fact encode that into an immediate. */
7960 if (GET_CODE (x
) == CONST_VECTOR
)
7962 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
7968 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
7970 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7973 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
7974 || GET_CODE (x
) == LABEL_REF
)
7976 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7979 fputs ("OFFSET FLAT:", file
);
7982 if (GET_CODE (x
) == CONST_INT
)
7983 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7985 output_pic_addr_const (file
, x
, code
);
7987 output_addr_const (file
, x
);
7991 /* Print a memory operand whose address is ADDR. */
7994 print_operand_address (FILE *file
, rtx addr
)
7996 struct ix86_address parts
;
7997 rtx base
, index
, disp
;
7999 int ok
= ix86_decompose_address (addr
, &parts
);
8004 index
= parts
.index
;
8006 scale
= parts
.scale
;
8014 if (USER_LABEL_PREFIX
[0] == 0)
8016 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8022 if (!base
&& !index
)
8024 /* Displacement only requires special attention. */
8026 if (GET_CODE (disp
) == CONST_INT
)
8028 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8030 if (USER_LABEL_PREFIX
[0] == 0)
8032 fputs ("ds:", file
);
8034 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8037 output_pic_addr_const (file
, disp
, 0);
8039 output_addr_const (file
, disp
);
8041 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8044 if (GET_CODE (disp
) == CONST
8045 && GET_CODE (XEXP (disp
, 0)) == PLUS
8046 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
8047 disp
= XEXP (XEXP (disp
, 0), 0);
8048 if (GET_CODE (disp
) == LABEL_REF
8049 || (GET_CODE (disp
) == SYMBOL_REF
8050 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8051 fputs ("(%rip)", file
);
8056 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8061 output_pic_addr_const (file
, disp
, 0);
8062 else if (GET_CODE (disp
) == LABEL_REF
)
8063 output_asm_label (disp
);
8065 output_addr_const (file
, disp
);
8070 print_reg (base
, 0, file
);
8074 print_reg (index
, 0, file
);
8076 fprintf (file
, ",%d", scale
);
8082 rtx offset
= NULL_RTX
;
8086 /* Pull out the offset of a symbol; print any symbol itself. */
8087 if (GET_CODE (disp
) == CONST
8088 && GET_CODE (XEXP (disp
, 0)) == PLUS
8089 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
8091 offset
= XEXP (XEXP (disp
, 0), 1);
8092 disp
= gen_rtx_CONST (VOIDmode
,
8093 XEXP (XEXP (disp
, 0), 0));
8097 output_pic_addr_const (file
, disp
, 0);
8098 else if (GET_CODE (disp
) == LABEL_REF
)
8099 output_asm_label (disp
);
8100 else if (GET_CODE (disp
) == CONST_INT
)
8103 output_addr_const (file
, disp
);
8109 print_reg (base
, 0, file
);
8112 if (INTVAL (offset
) >= 0)
8114 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8118 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8125 print_reg (index
, 0, file
);
8127 fprintf (file
, "*%d", scale
);
8135 output_addr_const_extra (FILE *file
, rtx x
)
8139 if (GET_CODE (x
) != UNSPEC
)
8142 op
= XVECEXP (x
, 0, 0);
8143 switch (XINT (x
, 1))
8145 case UNSPEC_GOTTPOFF
:
8146 output_addr_const (file
, op
);
8147 /* FIXME: This might be @TPOFF in Sun ld. */
8148 fputs ("@GOTTPOFF", file
);
8151 output_addr_const (file
, op
);
8152 fputs ("@TPOFF", file
);
8155 output_addr_const (file
, op
);
8157 fputs ("@TPOFF", file
);
8159 fputs ("@NTPOFF", file
);
8162 output_addr_const (file
, op
);
8163 fputs ("@DTPOFF", file
);
8165 case UNSPEC_GOTNTPOFF
:
8166 output_addr_const (file
, op
);
8168 fputs ("@GOTTPOFF(%rip)", file
);
8170 fputs ("@GOTNTPOFF", file
);
8172 case UNSPEC_INDNTPOFF
:
8173 output_addr_const (file
, op
);
8174 fputs ("@INDNTPOFF", file
);
8184 /* Split one or more DImode RTL references into pairs of SImode
8185 references. The RTL can be REG, offsettable MEM, integer constant, or
8186 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8187 split and "num" is its length. lo_half and hi_half are output arrays
8188 that parallel "operands". */
8191 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8195 rtx op
= operands
[num
];
8197 /* simplify_subreg refuse to split volatile memory addresses,
8198 but we still have to handle it. */
8199 if (GET_CODE (op
) == MEM
)
8201 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8202 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8206 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8207 GET_MODE (op
) == VOIDmode
8208 ? DImode
: GET_MODE (op
), 0);
8209 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8210 GET_MODE (op
) == VOIDmode
8211 ? DImode
: GET_MODE (op
), 4);
8215 /* Split one or more TImode RTL references into pairs of DImode
8216 references. The RTL can be REG, offsettable MEM, integer constant, or
8217 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8218 split and "num" is its length. lo_half and hi_half are output arrays
8219 that parallel "operands". */
8222 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8226 rtx op
= operands
[num
];
8228 /* simplify_subreg refuse to split volatile memory addresses, but we
8229 still have to handle it. */
8230 if (GET_CODE (op
) == MEM
)
8232 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8233 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8237 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8238 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8243 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8244 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8245 is the expression of the binary operation. The output may either be
8246 emitted here, or returned to the caller, like all output_* functions.
8248 There is no guarantee that the operands are the same mode, as they
8249 might be within FLOAT or FLOAT_EXTEND expressions. */
8251 #ifndef SYSV386_COMPAT
8252 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8253 wants to fix the assemblers because that causes incompatibility
8254 with gcc. No-one wants to fix gcc because that causes
8255 incompatibility with assemblers... You can use the option of
8256 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8257 #define SYSV386_COMPAT 1
8261 output_387_binary_op (rtx insn
, rtx
*operands
)
8263 static char buf
[30];
8266 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8268 #ifdef ENABLE_CHECKING
8269 /* Even if we do not want to check the inputs, this documents input
8270 constraints. Which helps in understanding the following code. */
8271 if (STACK_REG_P (operands
[0])
8272 && ((REG_P (operands
[1])
8273 && REGNO (operands
[0]) == REGNO (operands
[1])
8274 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
8275 || (REG_P (operands
[2])
8276 && REGNO (operands
[0]) == REGNO (operands
[2])
8277 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
8278 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8281 gcc_assert (is_sse
);
8284 switch (GET_CODE (operands
[3]))
8287 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8288 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8296 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8297 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8305 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8306 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8314 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8315 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8329 if (GET_MODE (operands
[0]) == SFmode
)
8330 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8332 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8337 switch (GET_CODE (operands
[3]))
8341 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8343 rtx temp
= operands
[2];
8344 operands
[2] = operands
[1];
8348 /* know operands[0] == operands[1]. */
8350 if (GET_CODE (operands
[2]) == MEM
)
8356 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8358 if (STACK_TOP_P (operands
[0]))
8359 /* How is it that we are storing to a dead operand[2]?
8360 Well, presumably operands[1] is dead too. We can't
8361 store the result to st(0) as st(0) gets popped on this
8362 instruction. Instead store to operands[2] (which I
8363 think has to be st(1)). st(1) will be popped later.
8364 gcc <= 2.8.1 didn't have this check and generated
8365 assembly code that the Unixware assembler rejected. */
8366 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8368 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8372 if (STACK_TOP_P (operands
[0]))
8373 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8375 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8380 if (GET_CODE (operands
[1]) == MEM
)
8386 if (GET_CODE (operands
[2]) == MEM
)
8392 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8395 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8396 derived assemblers, confusingly reverse the direction of
8397 the operation for fsub{r} and fdiv{r} when the
8398 destination register is not st(0). The Intel assembler
8399 doesn't have this brain damage. Read !SYSV386_COMPAT to
8400 figure out what the hardware really does. */
8401 if (STACK_TOP_P (operands
[0]))
8402 p
= "{p\t%0, %2|rp\t%2, %0}";
8404 p
= "{rp\t%2, %0|p\t%0, %2}";
8406 if (STACK_TOP_P (operands
[0]))
8407 /* As above for fmul/fadd, we can't store to st(0). */
8408 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8410 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8415 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8418 if (STACK_TOP_P (operands
[0]))
8419 p
= "{rp\t%0, %1|p\t%1, %0}";
8421 p
= "{p\t%1, %0|rp\t%0, %1}";
8423 if (STACK_TOP_P (operands
[0]))
8424 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8426 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8431 if (STACK_TOP_P (operands
[0]))
8433 if (STACK_TOP_P (operands
[1]))
8434 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8436 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8439 else if (STACK_TOP_P (operands
[1]))
8442 p
= "{\t%1, %0|r\t%0, %1}";
8444 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8450 p
= "{r\t%2, %0|\t%0, %2}";
8452 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8465 /* Return needed mode for entity in optimize_mode_switching pass. */
8468 ix86_mode_needed (int entity
, rtx insn
)
8470 enum attr_i387_cw mode
;
8472 /* The mode UNINITIALIZED is used to store control word after a
8473 function call or ASM pattern. The mode ANY specify that function
8474 has no requirements on the control word and make no changes in the
8475 bits we are interested in. */
8478 || (NONJUMP_INSN_P (insn
)
8479 && (asm_noperands (PATTERN (insn
)) >= 0
8480 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
8481 return I387_CW_UNINITIALIZED
;
8483 if (recog_memoized (insn
) < 0)
8486 mode
= get_attr_i387_cw (insn
);
8491 if (mode
== I387_CW_TRUNC
)
8496 if (mode
== I387_CW_FLOOR
)
8501 if (mode
== I387_CW_CEIL
)
8506 if (mode
== I387_CW_MASK_PM
)
8517 /* Output code to initialize control word copies used by trunc?f?i and
8518 rounding patterns. CURRENT_MODE is set to current control word,
8519 while NEW_MODE is set to new control word. */
8522 emit_i387_cw_initialization (int mode
)
8524 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
8529 rtx reg
= gen_reg_rtx (HImode
);
8531 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
8532 emit_move_insn (reg
, stored_mode
);
8534 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
8539 /* round toward zero (truncate) */
8540 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
8541 slot
= SLOT_CW_TRUNC
;
8545 /* round down toward -oo */
8546 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
8547 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
8548 slot
= SLOT_CW_FLOOR
;
8552 /* round up toward +oo */
8553 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
8554 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
8555 slot
= SLOT_CW_CEIL
;
8558 case I387_CW_MASK_PM
:
8559 /* mask precision exception for nearbyint() */
8560 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
8561 slot
= SLOT_CW_MASK_PM
;
8573 /* round toward zero (truncate) */
8574 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
8575 slot
= SLOT_CW_TRUNC
;
8579 /* round down toward -oo */
8580 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
8581 slot
= SLOT_CW_FLOOR
;
8585 /* round up toward +oo */
8586 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
8587 slot
= SLOT_CW_CEIL
;
8590 case I387_CW_MASK_PM
:
8591 /* mask precision exception for nearbyint() */
8592 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
8593 slot
= SLOT_CW_MASK_PM
;
8601 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
8603 new_mode
= assign_386_stack_local (HImode
, slot
);
8604 emit_move_insn (new_mode
, reg
);
8607 /* Output code for INSN to convert a float to a signed int. OPERANDS
8608 are the insn operands. The output may be [HSD]Imode and the input
8609 operand may be [SDX]Fmode. */
8612 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
8614 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8615 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
8616 int round_mode
= get_attr_i387_cw (insn
);
8618 /* Jump through a hoop or two for DImode, since the hardware has no
8619 non-popping instruction. We used to do this a different way, but
8620 that was somewhat fragile and broke with post-reload splitters. */
8621 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
8622 output_asm_insn ("fld\t%y1", operands
);
8624 gcc_assert (STACK_TOP_P (operands
[1]));
8625 gcc_assert (GET_CODE (operands
[0]) == MEM
);
8628 output_asm_insn ("fisttp%z0\t%0", operands
);
8631 if (round_mode
!= I387_CW_ANY
)
8632 output_asm_insn ("fldcw\t%3", operands
);
8633 if (stack_top_dies
|| dimode_p
)
8634 output_asm_insn ("fistp%z0\t%0", operands
);
8636 output_asm_insn ("fist%z0\t%0", operands
);
8637 if (round_mode
!= I387_CW_ANY
)
8638 output_asm_insn ("fldcw\t%2", operands
);
8644 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8645 have the values zero or one, indicates the ffreep insn's operand
8646 from the OPERANDS array. */
8649 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
8651 if (TARGET_USE_FFREEP
)
8652 #if HAVE_AS_IX86_FFREEP
8653 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
8655 switch (REGNO (operands
[opno
]))
8657 case FIRST_STACK_REG
+ 0: return ".word\t0xc0df";
8658 case FIRST_STACK_REG
+ 1: return ".word\t0xc1df";
8659 case FIRST_STACK_REG
+ 2: return ".word\t0xc2df";
8660 case FIRST_STACK_REG
+ 3: return ".word\t0xc3df";
8661 case FIRST_STACK_REG
+ 4: return ".word\t0xc4df";
8662 case FIRST_STACK_REG
+ 5: return ".word\t0xc5df";
8663 case FIRST_STACK_REG
+ 6: return ".word\t0xc6df";
8664 case FIRST_STACK_REG
+ 7: return ".word\t0xc7df";
8668 return opno
? "fstp\t%y1" : "fstp\t%y0";
8672 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8673 should be used. UNORDERED_P is true when fucom should be used. */
8676 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
8679 rtx cmp_op0
, cmp_op1
;
8680 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
8684 cmp_op0
= operands
[0];
8685 cmp_op1
= operands
[1];
8689 cmp_op0
= operands
[1];
8690 cmp_op1
= operands
[2];
8695 if (GET_MODE (operands
[0]) == SFmode
)
8697 return "ucomiss\t{%1, %0|%0, %1}";
8699 return "comiss\t{%1, %0|%0, %1}";
8702 return "ucomisd\t{%1, %0|%0, %1}";
8704 return "comisd\t{%1, %0|%0, %1}";
8707 gcc_assert (STACK_TOP_P (cmp_op0
));
8709 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8711 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
8715 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
8716 return output_387_ffreep (operands
, 1);
8719 return "ftst\n\tfnstsw\t%0";
8722 if (STACK_REG_P (cmp_op1
)
8724 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
8725 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
8727 /* If both the top of the 387 stack dies, and the other operand
8728 is also a stack register that dies, then this must be a
8729 `fcompp' float compare */
8733 /* There is no double popping fcomi variant. Fortunately,
8734 eflags is immune from the fstp's cc clobbering. */
8736 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
8738 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
8739 return output_387_ffreep (operands
, 0);
8744 return "fucompp\n\tfnstsw\t%0";
8746 return "fcompp\n\tfnstsw\t%0";
8751 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8753 static const char * const alt
[16] =
8755 "fcom%z2\t%y2\n\tfnstsw\t%0",
8756 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8757 "fucom%z2\t%y2\n\tfnstsw\t%0",
8758 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8760 "ficom%z2\t%y2\n\tfnstsw\t%0",
8761 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8765 "fcomi\t{%y1, %0|%0, %y1}",
8766 "fcomip\t{%y1, %0|%0, %y1}",
8767 "fucomi\t{%y1, %0|%0, %y1}",
8768 "fucomip\t{%y1, %0|%0, %y1}",
8779 mask
= eflags_p
<< 3;
8780 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
8781 mask
|= unordered_p
<< 1;
8782 mask
|= stack_top_dies
;
8784 gcc_assert (mask
< 16);
8793 ix86_output_addr_vec_elt (FILE *file
, int value
)
8795 const char *directive
= ASM_LONG
;
8799 directive
= ASM_QUAD
;
8801 gcc_assert (!TARGET_64BIT
);
8804 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
8808 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
8811 fprintf (file
, "%s%s%d-%s%d\n",
8812 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
8813 else if (HAVE_AS_GOTOFF_IN_DATA
)
8814 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
8816 else if (TARGET_MACHO
)
8818 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
8819 machopic_output_function_base_name (file
);
8820 fprintf(file
, "\n");
8824 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
8825 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
8828 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8832 ix86_expand_clear (rtx dest
)
8836 /* We play register width games, which are only valid after reload. */
8837 gcc_assert (reload_completed
);
8839 /* Avoid HImode and its attendant prefix byte. */
8840 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
8841 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
8843 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
8845 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8846 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
8848 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
8849 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8855 /* X is an unchanging MEM. If it is a constant pool reference, return
8856 the constant pool rtx, else NULL. */
8859 maybe_get_pool_constant (rtx x
)
8861 x
= ix86_delegitimize_address (XEXP (x
, 0));
8863 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8864 return get_pool_constant (x
);
8870 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
8872 int strict
= (reload_in_progress
|| reload_completed
);
8874 enum tls_model model
;
8879 if (GET_CODE (op1
) == SYMBOL_REF
)
8881 model
= SYMBOL_REF_TLS_MODEL (op1
);
8884 op1
= legitimize_tls_address (op1
, model
, true);
8885 op1
= force_operand (op1
, op0
);
8890 else if (GET_CODE (op1
) == CONST
8891 && GET_CODE (XEXP (op1
, 0)) == PLUS
8892 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
8894 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
8897 rtx addend
= XEXP (XEXP (op1
, 0), 1);
8898 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
8899 op1
= force_operand (op1
, NULL
);
8900 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
8901 op0
, 1, OPTAB_DIRECT
);
8907 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
8912 rtx temp
= ((reload_in_progress
8913 || ((op0
&& GET_CODE (op0
) == REG
)
8915 ? op0
: gen_reg_rtx (Pmode
));
8916 op1
= machopic_indirect_data_reference (op1
, temp
);
8917 op1
= machopic_legitimize_pic_address (op1
, mode
,
8918 temp
== op1
? 0 : temp
);
8920 else if (MACHOPIC_INDIRECT
)
8921 op1
= machopic_indirect_data_reference (op1
, 0);
8925 if (GET_CODE (op0
) == MEM
)
8926 op1
= force_reg (Pmode
, op1
);
8928 op1
= legitimize_address (op1
, op1
, Pmode
);
8929 #endif /* TARGET_MACHO */
8933 if (GET_CODE (op0
) == MEM
8934 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
8935 || !push_operand (op0
, mode
))
8936 && GET_CODE (op1
) == MEM
)
8937 op1
= force_reg (mode
, op1
);
8939 if (push_operand (op0
, mode
)
8940 && ! general_no_elim_operand (op1
, mode
))
8941 op1
= copy_to_mode_reg (mode
, op1
);
8943 /* Force large constants in 64bit compilation into register
8944 to get them CSEed. */
8945 if (TARGET_64BIT
&& mode
== DImode
8946 && immediate_operand (op1
, mode
)
8947 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
8948 && !register_operand (op0
, mode
)
8949 && optimize
&& !reload_completed
&& !reload_in_progress
)
8950 op1
= copy_to_mode_reg (mode
, op1
);
8952 if (FLOAT_MODE_P (mode
))
8954 /* If we are loading a floating point constant to a register,
8955 force the value to memory now, since we'll get better code
8956 out the back end. */
8960 else if (GET_CODE (op1
) == CONST_DOUBLE
)
8962 op1
= validize_mem (force_const_mem (mode
, op1
));
8963 if (!register_operand (op0
, mode
))
8965 rtx temp
= gen_reg_rtx (mode
);
8966 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
8967 emit_move_insn (op0
, temp
);
8974 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
8978 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
8980 rtx op0
= operands
[0], op1
= operands
[1];
8982 /* Force constants other than zero into memory. We do not know how
8983 the instructions used to build constants modify the upper 64 bits
8984 of the register, once we have that information we may be able
8985 to handle some of them more efficiently. */
8986 if ((reload_in_progress
| reload_completed
) == 0
8987 && register_operand (op0
, mode
)
8989 && standard_sse_constant_p (op1
) <= 0)
8990 op1
= validize_mem (force_const_mem (mode
, op1
));
8992 /* Make operand1 a register if it isn't already. */
8994 && !register_operand (op0
, mode
)
8995 && !register_operand (op1
, mode
))
8997 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9001 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9004 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9005 straight to ix86_expand_vector_move. */
9008 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9017 /* If we're optimizing for size, movups is the smallest. */
9020 op0
= gen_lowpart (V4SFmode
, op0
);
9021 op1
= gen_lowpart (V4SFmode
, op1
);
9022 emit_insn (gen_sse_movups (op0
, op1
));
9026 /* ??? If we have typed data, then it would appear that using
9027 movdqu is the only way to get unaligned data loaded with
9029 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9031 op0
= gen_lowpart (V16QImode
, op0
);
9032 op1
= gen_lowpart (V16QImode
, op1
);
9033 emit_insn (gen_sse2_movdqu (op0
, op1
));
9037 if (TARGET_SSE2
&& mode
== V2DFmode
)
9041 /* When SSE registers are split into halves, we can avoid
9042 writing to the top half twice. */
9043 if (TARGET_SSE_SPLIT_REGS
)
9045 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9050 /* ??? Not sure about the best option for the Intel chips.
9051 The following would seem to satisfy; the register is
9052 entirely cleared, breaking the dependency chain. We
9053 then store to the upper half, with a dependency depth
9054 of one. A rumor has it that Intel recommends two movsd
9055 followed by an unpacklpd, but this is unconfirmed. And
9056 given that the dependency depth of the unpacklpd would
9057 still be one, I'm not sure why this would be better. */
9058 zero
= CONST0_RTX (V2DFmode
);
9061 m
= adjust_address (op1
, DFmode
, 0);
9062 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9063 m
= adjust_address (op1
, DFmode
, 8);
9064 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9068 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9069 emit_move_insn (op0
, CONST0_RTX (mode
));
9071 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9073 if (mode
!= V4SFmode
)
9074 op0
= gen_lowpart (V4SFmode
, op0
);
9075 m
= adjust_address (op1
, V2SFmode
, 0);
9076 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9077 m
= adjust_address (op1
, V2SFmode
, 8);
9078 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9081 else if (MEM_P (op0
))
9083 /* If we're optimizing for size, movups is the smallest. */
9086 op0
= gen_lowpart (V4SFmode
, op0
);
9087 op1
= gen_lowpart (V4SFmode
, op1
);
9088 emit_insn (gen_sse_movups (op0
, op1
));
9092 /* ??? Similar to above, only less clear because of quote
9093 typeless stores unquote. */
9094 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9095 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9097 op0
= gen_lowpart (V16QImode
, op0
);
9098 op1
= gen_lowpart (V16QImode
, op1
);
9099 emit_insn (gen_sse2_movdqu (op0
, op1
));
9103 if (TARGET_SSE2
&& mode
== V2DFmode
)
9105 m
= adjust_address (op0
, DFmode
, 0);
9106 emit_insn (gen_sse2_storelpd (m
, op1
));
9107 m
= adjust_address (op0
, DFmode
, 8);
9108 emit_insn (gen_sse2_storehpd (m
, op1
));
9112 if (mode
!= V4SFmode
)
9113 op1
= gen_lowpart (V4SFmode
, op1
);
9114 m
= adjust_address (op0
, V2SFmode
, 0);
9115 emit_insn (gen_sse_storelps (m
, op1
));
9116 m
= adjust_address (op0
, V2SFmode
, 8);
9117 emit_insn (gen_sse_storehps (m
, op1
));
9124 /* Expand a push in MODE. This is some mode for which we do not support
9125 proper push instructions, at least from the registers that we expect
9126 the value to live in. */
9129 ix86_expand_push (enum machine_mode mode
, rtx x
)
9133 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9134 GEN_INT (-GET_MODE_SIZE (mode
)),
9135 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9136 if (tmp
!= stack_pointer_rtx
)
9137 emit_move_insn (stack_pointer_rtx
, tmp
);
9139 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9140 emit_move_insn (tmp
, x
);
9143 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9144 destination to use for the operation. If different from the true
9145 destination in operands[0], a copy operation will be required. */
9148 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9151 int matching_memory
;
9152 rtx src1
, src2
, dst
;
9158 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9159 if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
9160 && (rtx_equal_p (dst
, src2
)
9161 || immediate_operand (src1
, mode
)))
9168 /* If the destination is memory, and we do not have matching source
9169 operands, do things in registers. */
9170 matching_memory
= 0;
9171 if (GET_CODE (dst
) == MEM
)
9173 if (rtx_equal_p (dst
, src1
))
9174 matching_memory
= 1;
9175 else if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
9176 && rtx_equal_p (dst
, src2
))
9177 matching_memory
= 2;
9179 dst
= gen_reg_rtx (mode
);
9182 /* Both source operands cannot be in memory. */
9183 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
9185 if (matching_memory
!= 2)
9186 src2
= force_reg (mode
, src2
);
9188 src1
= force_reg (mode
, src1
);
9191 /* If the operation is not commutable, source 1 cannot be a constant
9192 or non-matching memory. */
9193 if ((CONSTANT_P (src1
)
9194 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
9195 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9196 src1
= force_reg (mode
, src1
);
9198 src1
= operands
[1] = src1
;
9199 src2
= operands
[2] = src2
;
9203 /* Similarly, but assume that the destination has already been
9207 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9208 enum machine_mode mode
, rtx operands
[])
9210 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9211 gcc_assert (dst
== operands
[0]);
9214 /* Attempt to expand a binary operator. Make the expansion closer to the
9215 actual machine, then just general_operand, which will allow 3 separate
9216 memory references (one output, two input) in a single insn. */
9219 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9222 rtx src1
, src2
, dst
, op
, clob
;
9224 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9228 /* Emit the instruction. */
9230 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9231 if (reload_in_progress
)
9233 /* Reload doesn't know about the flags register, and doesn't know that
9234 it doesn't want to clobber it. We can only do this with PLUS. */
9235 gcc_assert (code
== PLUS
);
9240 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9241 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9244 /* Fix up the destination if needed. */
9245 if (dst
!= operands
[0])
9246 emit_move_insn (operands
[0], dst
);
9249 /* Return TRUE or FALSE depending on whether the binary operator meets the
9250 appropriate constraints. */
9253 ix86_binary_operator_ok (enum rtx_code code
,
9254 enum machine_mode mode ATTRIBUTE_UNUSED
,
9257 /* Both source operands cannot be in memory. */
9258 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
9260 /* If the operation is not commutable, source 1 cannot be a constant. */
9261 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9263 /* If the destination is memory, we must have a matching source operand. */
9264 if (GET_CODE (operands
[0]) == MEM
9265 && ! (rtx_equal_p (operands
[0], operands
[1])
9266 || (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
9267 && rtx_equal_p (operands
[0], operands
[2]))))
9269 /* If the operation is not commutable and the source 1 is memory, we must
9270 have a matching destination. */
9271 if (GET_CODE (operands
[1]) == MEM
9272 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
9273 && ! rtx_equal_p (operands
[0], operands
[1]))
9278 /* Attempt to expand a unary operator. Make the expansion closer to the
9279 actual machine, then just general_operand, which will allow 2 separate
9280 memory references (one output, one input) in a single insn. */
9283 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
9286 int matching_memory
;
9287 rtx src
, dst
, op
, clob
;
9292 /* If the destination is memory, and we do not have matching source
9293 operands, do things in registers. */
9294 matching_memory
= 0;
9297 if (rtx_equal_p (dst
, src
))
9298 matching_memory
= 1;
9300 dst
= gen_reg_rtx (mode
);
9303 /* When source operand is memory, destination must match. */
9304 if (MEM_P (src
) && !matching_memory
)
9305 src
= force_reg (mode
, src
);
9307 /* Emit the instruction. */
9309 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
9310 if (reload_in_progress
|| code
== NOT
)
9312 /* Reload doesn't know about the flags register, and doesn't know that
9313 it doesn't want to clobber it. */
9314 gcc_assert (code
== NOT
);
9319 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9320 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9323 /* Fix up the destination if needed. */
9324 if (dst
!= operands
[0])
9325 emit_move_insn (operands
[0], dst
);
9328 /* Return TRUE or FALSE depending on whether the unary operator meets the
9329 appropriate constraints. */
9332 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
9333 enum machine_mode mode ATTRIBUTE_UNUSED
,
9334 rtx operands
[2] ATTRIBUTE_UNUSED
)
9336 /* If one of operands is memory, source and destination must match. */
9337 if ((GET_CODE (operands
[0]) == MEM
9338 || GET_CODE (operands
[1]) == MEM
)
9339 && ! rtx_equal_p (operands
[0], operands
[1]))
9344 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9345 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9346 true, then replicate the mask for all elements of the vector register.
9347 If INVERT is true, then create a mask excluding the sign bit. */
9350 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
9352 enum machine_mode vec_mode
;
9353 HOST_WIDE_INT hi
, lo
;
9358 /* Find the sign bit, sign extended to 2*HWI. */
9360 lo
= 0x80000000, hi
= lo
< 0;
9361 else if (HOST_BITS_PER_WIDE_INT
>= 64)
9362 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
9364 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
9369 /* Force this value into the low part of a fp vector constant. */
9370 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
9371 mask
= gen_lowpart (mode
, mask
);
9376 v
= gen_rtvec (4, mask
, mask
, mask
, mask
);
9378 v
= gen_rtvec (4, mask
, CONST0_RTX (SFmode
),
9379 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
9380 vec_mode
= V4SFmode
;
9385 v
= gen_rtvec (2, mask
, mask
);
9387 v
= gen_rtvec (2, mask
, CONST0_RTX (DFmode
));
9388 vec_mode
= V2DFmode
;
9391 return force_reg (vec_mode
, gen_rtx_CONST_VECTOR (vec_mode
, v
));
9394 /* Generate code for floating point ABS or NEG. */
9397 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
9400 rtx mask
, set
, use
, clob
, dst
, src
;
9401 bool matching_memory
;
9402 bool use_sse
= false;
9403 bool vector_mode
= VECTOR_MODE_P (mode
);
9404 enum machine_mode elt_mode
= mode
;
9408 elt_mode
= GET_MODE_INNER (mode
);
9411 else if (TARGET_SSE_MATH
)
9412 use_sse
= SSE_FLOAT_MODE_P (mode
);
9414 /* NEG and ABS performed with SSE use bitwise mask operations.
9415 Create the appropriate mask now. */
9417 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
9424 /* If the destination is memory, and we don't have matching source
9425 operands or we're using the x87, do things in registers. */
9426 matching_memory
= false;
9429 if (use_sse
&& rtx_equal_p (dst
, src
))
9430 matching_memory
= true;
9432 dst
= gen_reg_rtx (mode
);
9434 if (MEM_P (src
) && !matching_memory
)
9435 src
= force_reg (mode
, src
);
9439 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
9440 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9445 set
= gen_rtx_fmt_e (code
, mode
, src
);
9446 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9449 use
= gen_rtx_USE (VOIDmode
, mask
);
9450 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9451 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
9452 gen_rtvec (3, set
, use
, clob
)));
9458 if (dst
!= operands
[0])
9459 emit_move_insn (operands
[0], dst
);
9462 /* Expand a copysign operation. Special case operand 0 being a constant. */
9465 ix86_expand_copysign (rtx operands
[])
9467 enum machine_mode mode
, vmode
;
9468 rtx dest
, op0
, op1
, mask
, nmask
;
9474 mode
= GET_MODE (dest
);
9475 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
9477 if (GET_CODE (op0
) == CONST_DOUBLE
)
9481 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
9482 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
9484 if (op0
== CONST0_RTX (mode
))
9485 op0
= CONST0_RTX (vmode
);
9489 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
9490 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
9492 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
9493 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
9496 mask
= ix86_build_signbit_mask (mode
, 0, 0);
9499 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
9501 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
9505 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
9506 mask
= ix86_build_signbit_mask (mode
, 0, 0);
9509 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
9511 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
9515 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9516 be a constant, and so has already been expanded into a vector constant. */
9519 ix86_split_copysign_const (rtx operands
[])
9521 enum machine_mode mode
, vmode
;
9522 rtx dest
, op0
, op1
, mask
, x
;
9529 mode
= GET_MODE (dest
);
9530 vmode
= GET_MODE (mask
);
9532 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
9533 x
= gen_rtx_AND (vmode
, dest
, mask
);
9534 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9536 if (op0
!= CONST0_RTX (vmode
))
9538 x
= gen_rtx_IOR (vmode
, dest
, op0
);
9539 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9543 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9544 so we have to do two masks. */
9547 ix86_split_copysign_var (rtx operands
[])
9549 enum machine_mode mode
, vmode
;
9550 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
9553 scratch
= operands
[1];
9556 nmask
= operands
[4];
9559 mode
= GET_MODE (dest
);
9560 vmode
= GET_MODE (mask
);
9562 if (rtx_equal_p (op0
, op1
))
9564 /* Shouldn't happen often (it's useless, obviously), but when it does
9565 we'd generate incorrect code if we continue below. */
9566 emit_move_insn (dest
, op0
);
9570 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
9572 gcc_assert (REGNO (op1
) == REGNO (scratch
));
9574 x
= gen_rtx_AND (vmode
, scratch
, mask
);
9575 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
9578 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9579 x
= gen_rtx_NOT (vmode
, dest
);
9580 x
= gen_rtx_AND (vmode
, x
, op0
);
9581 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9585 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
9587 x
= gen_rtx_AND (vmode
, scratch
, mask
);
9589 else /* alternative 2,4 */
9591 gcc_assert (REGNO (mask
) == REGNO (scratch
));
9592 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
9593 x
= gen_rtx_AND (vmode
, scratch
, op1
);
9595 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
9597 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
9599 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9600 x
= gen_rtx_AND (vmode
, dest
, nmask
);
9602 else /* alternative 3,4 */
9604 gcc_assert (REGNO (nmask
) == REGNO (dest
));
9606 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9607 x
= gen_rtx_AND (vmode
, dest
, op0
);
9609 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9612 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
9613 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9616 /* Return TRUE or FALSE depending on whether the first SET in INSN
9617 has source and destination with matching CC modes, and that the
9618 CC mode is at least as constrained as REQ_MODE. */
9621 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
9624 enum machine_mode set_mode
;
9626 set
= PATTERN (insn
);
9627 if (GET_CODE (set
) == PARALLEL
)
9628 set
= XVECEXP (set
, 0, 0);
9629 gcc_assert (GET_CODE (set
) == SET
);
9630 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
9632 set_mode
= GET_MODE (SET_DEST (set
));
9636 if (req_mode
!= CCNOmode
9637 && (req_mode
!= CCmode
9638 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
9642 if (req_mode
== CCGCmode
)
9646 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
9650 if (req_mode
== CCZmode
)
9660 return (GET_MODE (SET_SRC (set
)) == set_mode
);
9663 /* Generate insn patterns to do an integer compare of OPERANDS. */
9666 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
9668 enum machine_mode cmpmode
;
9671 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
9672 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
9674 /* This is very simple, but making the interface the same as in the
9675 FP case makes the rest of the code easier. */
9676 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
9677 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
9679 /* Return the test that should be put into the flags user, i.e.
9680 the bcc, scc, or cmov instruction. */
9681 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
9684 /* Figure out whether to use ordered or unordered fp comparisons.
9685 Return the appropriate mode to use. */
9688 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
9690 /* ??? In order to make all comparisons reversible, we do all comparisons
9691 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9692 all forms trapping and nontrapping comparisons, we can make inequality
9693 comparisons trapping again, since it results in better code when using
9694 FCOM based compares. */
9695 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
9699 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
9701 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
9702 return ix86_fp_compare_mode (code
);
9705 /* Only zero flag is needed. */
9707 case NE
: /* ZF!=0 */
9709 /* Codes needing carry flag. */
9710 case GEU
: /* CF=0 */
9711 case GTU
: /* CF=0 & ZF=0 */
9712 case LTU
: /* CF=1 */
9713 case LEU
: /* CF=1 | ZF=1 */
9715 /* Codes possibly doable only with sign flag when
9716 comparing against zero. */
9717 case GE
: /* SF=OF or SF=0 */
9718 case LT
: /* SF<>OF or SF=1 */
9719 if (op1
== const0_rtx
)
9722 /* For other cases Carry flag is not required. */
9724 /* Codes doable only with sign flag when comparing
9725 against zero, but we miss jump instruction for it
9726 so we need to use relational tests against overflow
9727 that thus needs to be zero. */
9728 case GT
: /* ZF=0 & SF=OF */
9729 case LE
: /* ZF=1 | SF<>OF */
9730 if (op1
== const0_rtx
)
9734 /* strcmp pattern do (use flags) and combine may ask us for proper
9743 /* Return the fixed registers used for condition codes. */
9746 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
9753 /* If two condition code modes are compatible, return a condition code
9754 mode which is compatible with both. Otherwise, return
9757 static enum machine_mode
9758 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
9763 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
9766 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
9767 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
9795 /* These are only compatible with themselves, which we already
9801 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9804 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
9806 enum rtx_code swapped_code
= swap_condition (code
);
9807 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
9808 || (ix86_fp_comparison_cost (swapped_code
)
9809 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
9812 /* Swap, force into registers, or otherwise massage the two operands
9813 to a fp comparison. The operands are updated in place; the new
9814 comparison code is returned. */
9816 static enum rtx_code
9817 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
9819 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
9820 rtx op0
= *pop0
, op1
= *pop1
;
9821 enum machine_mode op_mode
= GET_MODE (op0
);
9822 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
9824 /* All of the unordered compare instructions only work on registers.
9825 The same is true of the fcomi compare instructions. The XFmode
9826 compare instructions require registers except when comparing
9827 against zero or when converting operand 1 from fixed point to
9831 && (fpcmp_mode
== CCFPUmode
9832 || (op_mode
== XFmode
9833 && ! (standard_80387_constant_p (op0
) == 1
9834 || standard_80387_constant_p (op1
) == 1)
9835 && GET_CODE (op1
) != FLOAT
)
9836 || ix86_use_fcomi_compare (code
)))
9838 op0
= force_reg (op_mode
, op0
);
9839 op1
= force_reg (op_mode
, op1
);
9843 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9844 things around if they appear profitable, otherwise force op0
9847 if (standard_80387_constant_p (op0
) == 0
9848 || (GET_CODE (op0
) == MEM
9849 && ! (standard_80387_constant_p (op1
) == 0
9850 || GET_CODE (op1
) == MEM
)))
9853 tmp
= op0
, op0
= op1
, op1
= tmp
;
9854 code
= swap_condition (code
);
9857 if (GET_CODE (op0
) != REG
)
9858 op0
= force_reg (op_mode
, op0
);
9860 if (CONSTANT_P (op1
))
9862 int tmp
= standard_80387_constant_p (op1
);
9864 op1
= validize_mem (force_const_mem (op_mode
, op1
));
9868 op1
= force_reg (op_mode
, op1
);
9871 op1
= force_reg (op_mode
, op1
);
9875 /* Try to rearrange the comparison to make it cheaper. */
9876 if (ix86_fp_comparison_cost (code
)
9877 > ix86_fp_comparison_cost (swap_condition (code
))
9878 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
9881 tmp
= op0
, op0
= op1
, op1
= tmp
;
9882 code
= swap_condition (code
);
9883 if (GET_CODE (op0
) != REG
)
9884 op0
= force_reg (op_mode
, op0
);
9892 /* Convert comparison codes we use to represent FP comparison to integer
9893 code that will result in proper branch. Return UNKNOWN if no such code
9897 ix86_fp_compare_code_to_integer (enum rtx_code code
)
9926 /* Split comparison code CODE into comparisons we can do using branch
9927 instructions. BYPASS_CODE is comparison code for branch that will
9928 branch around FIRST_CODE and SECOND_CODE. If some of branches
9929 is not required, set value to UNKNOWN.
9930 We never require more than two branches. */
9933 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
9934 enum rtx_code
*first_code
,
9935 enum rtx_code
*second_code
)
9938 *bypass_code
= UNKNOWN
;
9939 *second_code
= UNKNOWN
;
9941 /* The fcomi comparison sets flags as follows:
9951 case GT
: /* GTU - CF=0 & ZF=0 */
9952 case GE
: /* GEU - CF=0 */
9953 case ORDERED
: /* PF=0 */
9954 case UNORDERED
: /* PF=1 */
9955 case UNEQ
: /* EQ - ZF=1 */
9956 case UNLT
: /* LTU - CF=1 */
9957 case UNLE
: /* LEU - CF=1 | ZF=1 */
9958 case LTGT
: /* EQ - ZF=0 */
9960 case LT
: /* LTU - CF=1 - fails on unordered */
9962 *bypass_code
= UNORDERED
;
9964 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
9966 *bypass_code
= UNORDERED
;
9968 case EQ
: /* EQ - ZF=1 - fails on unordered */
9970 *bypass_code
= UNORDERED
;
9972 case NE
: /* NE - ZF=0 - fails on unordered */
9974 *second_code
= UNORDERED
;
9976 case UNGE
: /* GEU - CF=0 - fails on unordered */
9978 *second_code
= UNORDERED
;
9980 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
9982 *second_code
= UNORDERED
;
9987 if (!TARGET_IEEE_FP
)
9989 *second_code
= UNKNOWN
;
9990 *bypass_code
= UNKNOWN
;
9994 /* Return cost of comparison done fcom + arithmetics operations on AX.
9995 All following functions do use number of instructions as a cost metrics.
9996 In future this should be tweaked to compute bytes for optimize_size and
9997 take into account performance of various instructions on various CPUs. */
9999 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10001 if (!TARGET_IEEE_FP
)
10003 /* The cost of code output by ix86_expand_fp_compare. */
10027 gcc_unreachable ();
10031 /* Return cost of comparison done using fcomi operation.
10032 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10034 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10036 enum rtx_code bypass_code
, first_code
, second_code
;
10037 /* Return arbitrarily high cost when instruction is not supported - this
10038 prevents gcc from using it. */
10041 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10042 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10045 /* Return cost of comparison done using sahf operation.
10046 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10048 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10050 enum rtx_code bypass_code
, first_code
, second_code
;
10051 /* Return arbitrarily high cost when instruction is not preferred - this
10052 avoids gcc from using it. */
10053 if (!TARGET_USE_SAHF
&& !optimize_size
)
10055 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10056 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10059 /* Compute cost of the comparison done using any method.
10060 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10062 ix86_fp_comparison_cost (enum rtx_code code
)
10064 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
10067 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
10068 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
10070 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
10071 if (min
> sahf_cost
)
10073 if (min
> fcomi_cost
)
10078 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10081 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
10082 rtx
*second_test
, rtx
*bypass_test
)
10084 enum machine_mode fpcmp_mode
, intcmp_mode
;
10086 int cost
= ix86_fp_comparison_cost (code
);
10087 enum rtx_code bypass_code
, first_code
, second_code
;
10089 fpcmp_mode
= ix86_fp_compare_mode (code
);
10090 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
10093 *second_test
= NULL_RTX
;
10095 *bypass_test
= NULL_RTX
;
10097 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10099 /* Do fcomi/sahf based test when profitable. */
10100 if ((bypass_code
== UNKNOWN
|| bypass_test
)
10101 && (second_code
== UNKNOWN
|| second_test
)
10102 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
10106 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10107 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
10113 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10114 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10116 scratch
= gen_reg_rtx (HImode
);
10117 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10118 emit_insn (gen_x86_sahf_1 (scratch
));
10121 /* The FP codes work out to act like unsigned. */
10122 intcmp_mode
= fpcmp_mode
;
10124 if (bypass_code
!= UNKNOWN
)
10125 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
10126 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10128 if (second_code
!= UNKNOWN
)
10129 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
10130 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10135 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10136 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10137 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10139 scratch
= gen_reg_rtx (HImode
);
10140 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10142 /* In the unordered case, we have to check C2 for NaN's, which
10143 doesn't happen to work out to anything nice combination-wise.
10144 So do some bit twiddling on the value we've got in AH to come
10145 up with an appropriate set of condition codes. */
10147 intcmp_mode
= CCNOmode
;
10152 if (code
== GT
|| !TARGET_IEEE_FP
)
10154 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10159 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10160 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10161 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
10162 intcmp_mode
= CCmode
;
10168 if (code
== LT
&& TARGET_IEEE_FP
)
10170 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10171 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
10172 intcmp_mode
= CCmode
;
10177 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
10183 if (code
== GE
|| !TARGET_IEEE_FP
)
10185 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
10190 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10191 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10198 if (code
== LE
&& TARGET_IEEE_FP
)
10200 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10201 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10202 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10203 intcmp_mode
= CCmode
;
10208 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10214 if (code
== EQ
&& TARGET_IEEE_FP
)
10216 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10217 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10218 intcmp_mode
= CCmode
;
10223 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10230 if (code
== NE
&& TARGET_IEEE_FP
)
10232 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10233 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10239 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10245 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10249 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10254 gcc_unreachable ();
10258 /* Return the test that should be put into the flags user, i.e.
10259 the bcc, scc, or cmov instruction. */
10260 return gen_rtx_fmt_ee (code
, VOIDmode
,
10261 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10266 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
10269 op0
= ix86_compare_op0
;
10270 op1
= ix86_compare_op1
;
10273 *second_test
= NULL_RTX
;
10275 *bypass_test
= NULL_RTX
;
10277 if (ix86_compare_emitted
)
10279 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
10280 ix86_compare_emitted
= NULL_RTX
;
10282 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10283 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10284 second_test
, bypass_test
);
10286 ret
= ix86_expand_int_compare (code
, op0
, op1
);
10291 /* Return true if the CODE will result in nontrivial jump sequence. */
10293 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
10295 enum rtx_code bypass_code
, first_code
, second_code
;
10298 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10299 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
10303 ix86_expand_branch (enum rtx_code code
, rtx label
)
10307 /* If we have emitted a compare insn, go straight to simple.
10308 ix86_expand_compare won't emit anything if ix86_compare_emitted
10310 if (ix86_compare_emitted
)
10313 switch (GET_MODE (ix86_compare_op0
))
10319 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
10320 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10321 gen_rtx_LABEL_REF (VOIDmode
, label
),
10323 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10332 enum rtx_code bypass_code
, first_code
, second_code
;
10334 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
10335 &ix86_compare_op1
);
10337 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10339 /* Check whether we will use the natural sequence with one jump. If
10340 so, we can expand jump early. Otherwise delay expansion by
10341 creating compound insn to not confuse optimizers. */
10342 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
10345 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
10346 gen_rtx_LABEL_REF (VOIDmode
, label
),
10347 pc_rtx
, NULL_RTX
, NULL_RTX
);
10351 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
10352 ix86_compare_op0
, ix86_compare_op1
);
10353 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10354 gen_rtx_LABEL_REF (VOIDmode
, label
),
10356 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
10358 use_fcomi
= ix86_use_fcomi_compare (code
);
10359 vec
= rtvec_alloc (3 + !use_fcomi
);
10360 RTVEC_ELT (vec
, 0) = tmp
;
10362 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
10364 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
10367 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
10369 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
10378 /* Expand DImode branch into multiple compare+branch. */
10380 rtx lo
[2], hi
[2], label2
;
10381 enum rtx_code code1
, code2
, code3
;
10382 enum machine_mode submode
;
10384 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
10386 tmp
= ix86_compare_op0
;
10387 ix86_compare_op0
= ix86_compare_op1
;
10388 ix86_compare_op1
= tmp
;
10389 code
= swap_condition (code
);
10391 if (GET_MODE (ix86_compare_op0
) == DImode
)
10393 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10394 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10399 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10400 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10404 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10405 avoid two branches. This costs one extra insn, so disable when
10406 optimizing for size. */
10408 if ((code
== EQ
|| code
== NE
)
10410 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
10415 if (hi
[1] != const0_rtx
)
10416 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
10417 NULL_RTX
, 0, OPTAB_WIDEN
);
10420 if (lo
[1] != const0_rtx
)
10421 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
10422 NULL_RTX
, 0, OPTAB_WIDEN
);
10424 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
10425 NULL_RTX
, 0, OPTAB_WIDEN
);
10427 ix86_compare_op0
= tmp
;
10428 ix86_compare_op1
= const0_rtx
;
10429 ix86_expand_branch (code
, label
);
10433 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10434 op1 is a constant and the low word is zero, then we can just
10435 examine the high word. */
10437 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
10440 case LT
: case LTU
: case GE
: case GEU
:
10441 ix86_compare_op0
= hi
[0];
10442 ix86_compare_op1
= hi
[1];
10443 ix86_expand_branch (code
, label
);
10449 /* Otherwise, we need two or three jumps. */
10451 label2
= gen_label_rtx ();
10454 code2
= swap_condition (code
);
10455 code3
= unsigned_condition (code
);
10459 case LT
: case GT
: case LTU
: case GTU
:
10462 case LE
: code1
= LT
; code2
= GT
; break;
10463 case GE
: code1
= GT
; code2
= LT
; break;
10464 case LEU
: code1
= LTU
; code2
= GTU
; break;
10465 case GEU
: code1
= GTU
; code2
= LTU
; break;
10467 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
10468 case NE
: code2
= UNKNOWN
; break;
10471 gcc_unreachable ();
10476 * if (hi(a) < hi(b)) goto true;
10477 * if (hi(a) > hi(b)) goto false;
10478 * if (lo(a) < lo(b)) goto true;
10482 ix86_compare_op0
= hi
[0];
10483 ix86_compare_op1
= hi
[1];
10485 if (code1
!= UNKNOWN
)
10486 ix86_expand_branch (code1
, label
);
10487 if (code2
!= UNKNOWN
)
10488 ix86_expand_branch (code2
, label2
);
10490 ix86_compare_op0
= lo
[0];
10491 ix86_compare_op1
= lo
[1];
10492 ix86_expand_branch (code3
, label
);
10494 if (code2
!= UNKNOWN
)
10495 emit_label (label2
);
10500 gcc_unreachable ();
10504 /* Split branch based on floating point condition. */
10506 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
10507 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
10509 rtx second
, bypass
;
10510 rtx label
= NULL_RTX
;
10512 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
10515 if (target2
!= pc_rtx
)
10518 code
= reverse_condition_maybe_unordered (code
);
10523 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
10524 tmp
, &second
, &bypass
);
10526 /* Remove pushed operand from stack. */
10528 ix86_free_from_memory (GET_MODE (pushed
));
10530 if (split_branch_probability
>= 0)
10532 /* Distribute the probabilities across the jumps.
10533 Assume the BYPASS and SECOND to be always test
10535 probability
= split_branch_probability
;
10537 /* Value of 1 is low enough to make no need for probability
10538 to be updated. Later we may run some experiments and see
10539 if unordered values are more frequent in practice. */
10541 bypass_probability
= 1;
10543 second_probability
= 1;
10545 if (bypass
!= NULL_RTX
)
10547 label
= gen_label_rtx ();
10548 i
= emit_jump_insn (gen_rtx_SET
10550 gen_rtx_IF_THEN_ELSE (VOIDmode
,
10552 gen_rtx_LABEL_REF (VOIDmode
,
10555 if (bypass_probability
>= 0)
10557 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10558 GEN_INT (bypass_probability
),
10561 i
= emit_jump_insn (gen_rtx_SET
10563 gen_rtx_IF_THEN_ELSE (VOIDmode
,
10564 condition
, target1
, target2
)));
10565 if (probability
>= 0)
10567 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10568 GEN_INT (probability
),
10570 if (second
!= NULL_RTX
)
10572 i
= emit_jump_insn (gen_rtx_SET
10574 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
10576 if (second_probability
>= 0)
10578 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10579 GEN_INT (second_probability
),
10582 if (label
!= NULL_RTX
)
10583 emit_label (label
);
10587 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
10589 rtx ret
, tmp
, tmpreg
, equiv
;
10590 rtx second_test
, bypass_test
;
10592 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
10593 return 0; /* FAIL */
10595 gcc_assert (GET_MODE (dest
) == QImode
);
10597 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10598 PUT_MODE (ret
, QImode
);
10603 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
10604 if (bypass_test
|| second_test
)
10606 rtx test
= second_test
;
10608 rtx tmp2
= gen_reg_rtx (QImode
);
10611 gcc_assert (!second_test
);
10612 test
= bypass_test
;
10614 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
10616 PUT_MODE (test
, QImode
);
10617 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
10620 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
10622 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
10625 /* Attach a REG_EQUAL note describing the comparison result. */
10626 if (ix86_compare_op0
&& ix86_compare_op1
)
10628 equiv
= simplify_gen_relational (code
, QImode
,
10629 GET_MODE (ix86_compare_op0
),
10630 ix86_compare_op0
, ix86_compare_op1
);
10631 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
10634 return 1; /* DONE */
10637 /* Expand comparison setting or clearing carry flag. Return true when
10638 successful and set pop for the operation. */
10640 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
10642 enum machine_mode mode
=
10643 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
10645 /* Do not handle DImode compares that go through special path. Also we can't
10646 deal with FP compares yet. This is possible to add. */
10647 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
10649 if (FLOAT_MODE_P (mode
))
10651 rtx second_test
= NULL
, bypass_test
= NULL
;
10652 rtx compare_op
, compare_seq
;
10654 /* Shortcut: following common codes never translate into carry flag compares. */
10655 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
10656 || code
== ORDERED
|| code
== UNORDERED
)
10659 /* These comparisons require zero flag; swap operands so they won't. */
10660 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
10661 && !TARGET_IEEE_FP
)
10666 code
= swap_condition (code
);
10669 /* Try to expand the comparison and verify that we end up with carry flag
10670 based comparison. This is fails to be true only when we decide to expand
10671 comparison using arithmetic that is not too common scenario. */
10673 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10674 &second_test
, &bypass_test
);
10675 compare_seq
= get_insns ();
10678 if (second_test
|| bypass_test
)
10680 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10681 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10682 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
10684 code
= GET_CODE (compare_op
);
10685 if (code
!= LTU
&& code
!= GEU
)
10687 emit_insn (compare_seq
);
10691 if (!INTEGRAL_MODE_P (mode
))
10699 /* Convert a==0 into (unsigned)a<1. */
10702 if (op1
!= const0_rtx
)
10705 code
= (code
== EQ
? LTU
: GEU
);
10708 /* Convert a>b into b<a or a>=b-1. */
10711 if (GET_CODE (op1
) == CONST_INT
)
10713 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
10714 /* Bail out on overflow. We still can swap operands but that
10715 would force loading of the constant into register. */
10716 if (op1
== const0_rtx
10717 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
10719 code
= (code
== GTU
? GEU
: LTU
);
10726 code
= (code
== GTU
? LTU
: GEU
);
10730 /* Convert a>=0 into (unsigned)a<0x80000000. */
10733 if (mode
== DImode
|| op1
!= const0_rtx
)
10735 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
10736 code
= (code
== LT
? GEU
: LTU
);
10740 if (mode
== DImode
|| op1
!= constm1_rtx
)
10742 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
10743 code
= (code
== LE
? GEU
: LTU
);
10749 /* Swapping operands may cause constant to appear as first operand. */
10750 if (!nonimmediate_operand (op0
, VOIDmode
))
10752 if (no_new_pseudos
)
10754 op0
= force_reg (mode
, op0
);
10756 ix86_compare_op0
= op0
;
10757 ix86_compare_op1
= op1
;
10758 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
10759 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
10764 ix86_expand_int_movcc (rtx operands
[])
10766 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
10767 rtx compare_seq
, compare_op
;
10768 rtx second_test
, bypass_test
;
10769 enum machine_mode mode
= GET_MODE (operands
[0]);
10770 bool sign_bit_compare_p
= false;;
10773 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10774 compare_seq
= get_insns ();
10777 compare_code
= GET_CODE (compare_op
);
10779 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
10780 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
10781 sign_bit_compare_p
= true;
10783 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10784 HImode insns, we'd be swallowed in word prefix ops. */
10786 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
10787 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
10788 && GET_CODE (operands
[2]) == CONST_INT
10789 && GET_CODE (operands
[3]) == CONST_INT
)
10791 rtx out
= operands
[0];
10792 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
10793 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
10794 HOST_WIDE_INT diff
;
10797 /* Sign bit compares are better done using shifts than we do by using
10799 if (sign_bit_compare_p
10800 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
10801 ix86_compare_op1
, &compare_op
))
10803 /* Detect overlap between destination and compare sources. */
10806 if (!sign_bit_compare_p
)
10808 bool fpcmp
= false;
10810 compare_code
= GET_CODE (compare_op
);
10812 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10813 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10816 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
10819 /* To simplify rest of code, restrict to the GEU case. */
10820 if (compare_code
== LTU
)
10822 HOST_WIDE_INT tmp
= ct
;
10825 compare_code
= reverse_condition (compare_code
);
10826 code
= reverse_condition (code
);
10831 PUT_CODE (compare_op
,
10832 reverse_condition_maybe_unordered
10833 (GET_CODE (compare_op
)));
10835 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
10839 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
10840 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
10841 tmp
= gen_reg_rtx (mode
);
10843 if (mode
== DImode
)
10844 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
10846 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
10850 if (code
== GT
|| code
== GE
)
10851 code
= reverse_condition (code
);
10854 HOST_WIDE_INT tmp
= ct
;
10859 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
10860 ix86_compare_op1
, VOIDmode
, 0, -1);
10873 tmp
= expand_simple_binop (mode
, PLUS
,
10875 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10886 tmp
= expand_simple_binop (mode
, IOR
,
10888 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10890 else if (diff
== -1 && ct
)
10900 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
10902 tmp
= expand_simple_binop (mode
, PLUS
,
10903 copy_rtx (tmp
), GEN_INT (cf
),
10904 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10912 * andl cf - ct, dest
10922 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
10925 tmp
= expand_simple_binop (mode
, AND
,
10927 gen_int_mode (cf
- ct
, mode
),
10928 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10930 tmp
= expand_simple_binop (mode
, PLUS
,
10931 copy_rtx (tmp
), GEN_INT (ct
),
10932 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10935 if (!rtx_equal_p (tmp
, out
))
10936 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
10938 return 1; /* DONE */
10944 tmp
= ct
, ct
= cf
, cf
= tmp
;
10946 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
10948 /* We may be reversing unordered compare to normal compare, that
10949 is not valid in general (we may convert non-trapping condition
10950 to trapping one), however on i386 we currently emit all
10951 comparisons unordered. */
10952 compare_code
= reverse_condition_maybe_unordered (compare_code
);
10953 code
= reverse_condition_maybe_unordered (code
);
10957 compare_code
= reverse_condition (compare_code
);
10958 code
= reverse_condition (code
);
10962 compare_code
= UNKNOWN
;
10963 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
10964 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
10966 if (ix86_compare_op1
== const0_rtx
10967 && (code
== LT
|| code
== GE
))
10968 compare_code
= code
;
10969 else if (ix86_compare_op1
== constm1_rtx
)
10973 else if (code
== GT
)
10978 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10979 if (compare_code
!= UNKNOWN
10980 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
10981 && (cf
== -1 || ct
== -1))
10983 /* If lea code below could be used, only optimize
10984 if it results in a 2 insn sequence. */
10986 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
10987 || diff
== 3 || diff
== 5 || diff
== 9)
10988 || (compare_code
== LT
&& ct
== -1)
10989 || (compare_code
== GE
&& cf
== -1))
10992 * notl op1 (if necessary)
11000 code
= reverse_condition (code
);
11003 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11004 ix86_compare_op1
, VOIDmode
, 0, -1);
11006 out
= expand_simple_binop (mode
, IOR
,
11008 out
, 1, OPTAB_DIRECT
);
11009 if (out
!= operands
[0])
11010 emit_move_insn (operands
[0], out
);
11012 return 1; /* DONE */
11017 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11018 || diff
== 3 || diff
== 5 || diff
== 9)
11019 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11021 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11027 * lea cf(dest*(ct-cf)),dest
11031 * This also catches the degenerate setcc-only case.
11037 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11038 ix86_compare_op1
, VOIDmode
, 0, 1);
11041 /* On x86_64 the lea instruction operates on Pmode, so we need
11042 to get arithmetics done in proper mode to match. */
11044 tmp
= copy_rtx (out
);
11048 out1
= copy_rtx (out
);
11049 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
11053 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
11059 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
11062 if (!rtx_equal_p (tmp
, out
))
11065 out
= force_operand (tmp
, copy_rtx (out
));
11067 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
11069 if (!rtx_equal_p (out
, operands
[0]))
11070 emit_move_insn (operands
[0], copy_rtx (out
));
11072 return 1; /* DONE */
11076 * General case: Jumpful:
11077 * xorl dest,dest cmpl op1, op2
11078 * cmpl op1, op2 movl ct, dest
11079 * setcc dest jcc 1f
11080 * decl dest movl cf, dest
11081 * andl (cf-ct),dest 1:
11084 * Size 20. Size 14.
11086 * This is reasonably steep, but branch mispredict costs are
11087 * high on modern cpus, so consider failing only if optimizing
11091 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11092 && BRANCH_COST
>= 2)
11098 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11099 /* We may be reversing unordered compare to normal compare,
11100 that is not valid in general (we may convert non-trapping
11101 condition to trapping one), however on i386 we currently
11102 emit all comparisons unordered. */
11103 code
= reverse_condition_maybe_unordered (code
);
11106 code
= reverse_condition (code
);
11107 if (compare_code
!= UNKNOWN
)
11108 compare_code
= reverse_condition (compare_code
);
11112 if (compare_code
!= UNKNOWN
)
11114 /* notl op1 (if needed)
11119 For x < 0 (resp. x <= -1) there will be no notl,
11120 so if possible swap the constants to get rid of the
11122 True/false will be -1/0 while code below (store flag
11123 followed by decrement) is 0/-1, so the constants need
11124 to be exchanged once more. */
11126 if (compare_code
== GE
|| !cf
)
11128 code
= reverse_condition (code
);
11133 HOST_WIDE_INT tmp
= cf
;
11138 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11139 ix86_compare_op1
, VOIDmode
, 0, -1);
11143 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11144 ix86_compare_op1
, VOIDmode
, 0, 1);
11146 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
11147 copy_rtx (out
), 1, OPTAB_DIRECT
);
11150 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
11151 gen_int_mode (cf
- ct
, mode
),
11152 copy_rtx (out
), 1, OPTAB_DIRECT
);
11154 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
11155 copy_rtx (out
), 1, OPTAB_DIRECT
);
11156 if (!rtx_equal_p (out
, operands
[0]))
11157 emit_move_insn (operands
[0], copy_rtx (out
));
11159 return 1; /* DONE */
11163 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11165 /* Try a few things more with specific constants and a variable. */
11168 rtx var
, orig_out
, out
, tmp
;
11170 if (BRANCH_COST
<= 2)
11171 return 0; /* FAIL */
11173 /* If one of the two operands is an interesting constant, load a
11174 constant with the above and mask it in with a logical operation. */
11176 if (GET_CODE (operands
[2]) == CONST_INT
)
11179 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
11180 operands
[3] = constm1_rtx
, op
= and_optab
;
11181 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
11182 operands
[3] = const0_rtx
, op
= ior_optab
;
11184 return 0; /* FAIL */
11186 else if (GET_CODE (operands
[3]) == CONST_INT
)
11189 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
11190 operands
[2] = constm1_rtx
, op
= and_optab
;
11191 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
11192 operands
[2] = const0_rtx
, op
= ior_optab
;
11194 return 0; /* FAIL */
11197 return 0; /* FAIL */
11199 orig_out
= operands
[0];
11200 tmp
= gen_reg_rtx (mode
);
11203 /* Recurse to get the constant loaded. */
11204 if (ix86_expand_int_movcc (operands
) == 0)
11205 return 0; /* FAIL */
11207 /* Mask in the interesting variable. */
11208 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
11210 if (!rtx_equal_p (out
, orig_out
))
11211 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
11213 return 1; /* DONE */
11217 * For comparison with above,
11227 if (! nonimmediate_operand (operands
[2], mode
))
11228 operands
[2] = force_reg (mode
, operands
[2]);
11229 if (! nonimmediate_operand (operands
[3], mode
))
11230 operands
[3] = force_reg (mode
, operands
[3]);
11232 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11234 rtx tmp
= gen_reg_rtx (mode
);
11235 emit_move_insn (tmp
, operands
[3]);
11238 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11240 rtx tmp
= gen_reg_rtx (mode
);
11241 emit_move_insn (tmp
, operands
[2]);
11245 if (! register_operand (operands
[2], VOIDmode
)
11247 || ! register_operand (operands
[3], VOIDmode
)))
11248 operands
[2] = force_reg (mode
, operands
[2]);
11251 && ! register_operand (operands
[3], VOIDmode
))
11252 operands
[3] = force_reg (mode
, operands
[3]);
11254 emit_insn (compare_seq
);
11255 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11256 gen_rtx_IF_THEN_ELSE (mode
,
11257 compare_op
, operands
[2],
11260 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11261 gen_rtx_IF_THEN_ELSE (mode
,
11263 copy_rtx (operands
[3]),
11264 copy_rtx (operands
[0]))));
11266 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11267 gen_rtx_IF_THEN_ELSE (mode
,
11269 copy_rtx (operands
[2]),
11270 copy_rtx (operands
[0]))));
11272 return 1; /* DONE */
11275 /* Swap, force into registers, or otherwise massage the two operands
11276 to an sse comparison with a mask result. Thus we differ a bit from
11277 ix86_prepare_fp_compare_args which expects to produce a flags result.
11279 The DEST operand exists to help determine whether to commute commutative
11280 operators. The POP0/POP1 operands are updated in place. The new
11281 comparison code is returned, or UNKNOWN if not implementable. */
11283 static enum rtx_code
11284 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
11285 rtx
*pop0
, rtx
*pop1
)
11293 /* We have no LTGT as an operator. We could implement it with
11294 NE & ORDERED, but this requires an extra temporary. It's
11295 not clear that it's worth it. */
11302 /* These are supported directly. */
11309 /* For commutative operators, try to canonicalize the destination
11310 operand to be first in the comparison - this helps reload to
11311 avoid extra moves. */
11312 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
11320 /* These are not supported directly. Swap the comparison operands
11321 to transform into something that is supported. */
11325 code
= swap_condition (code
);
11329 gcc_unreachable ();
11335 /* Detect conditional moves that exactly match min/max operational
11336 semantics. Note that this is IEEE safe, as long as we don't
11337 interchange the operands.
11339 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11340 and TRUE if the operation is successful and instructions are emitted. */
11343 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
11344 rtx cmp_op1
, rtx if_true
, rtx if_false
)
11346 enum machine_mode mode
;
11352 else if (code
== UNGE
)
11355 if_true
= if_false
;
11361 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
11363 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
11368 mode
= GET_MODE (dest
);
11370 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11371 but MODE may be a vector mode and thus not appropriate. */
11372 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
11374 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
11377 if_true
= force_reg (mode
, if_true
);
11378 v
= gen_rtvec (2, if_true
, if_false
);
11379 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
11383 code
= is_min
? SMIN
: SMAX
;
11384 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
11387 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
11391 /* Expand an sse vector comparison. Return the register with the result. */
11394 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
11395 rtx op_true
, rtx op_false
)
11397 enum machine_mode mode
= GET_MODE (dest
);
11400 cmp_op0
= force_reg (mode
, cmp_op0
);
11401 if (!nonimmediate_operand (cmp_op1
, mode
))
11402 cmp_op1
= force_reg (mode
, cmp_op1
);
11405 || reg_overlap_mentioned_p (dest
, op_true
)
11406 || reg_overlap_mentioned_p (dest
, op_false
))
11407 dest
= gen_reg_rtx (mode
);
11409 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
11410 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11415 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11416 operations. This is used for both scalar and vector conditional moves. */
11419 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
11421 enum machine_mode mode
= GET_MODE (dest
);
11424 if (op_false
== CONST0_RTX (mode
))
11426 op_true
= force_reg (mode
, op_true
);
11427 x
= gen_rtx_AND (mode
, cmp
, op_true
);
11428 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11430 else if (op_true
== CONST0_RTX (mode
))
11432 op_false
= force_reg (mode
, op_false
);
11433 x
= gen_rtx_NOT (mode
, cmp
);
11434 x
= gen_rtx_AND (mode
, x
, op_false
);
11435 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11439 op_true
= force_reg (mode
, op_true
);
11440 op_false
= force_reg (mode
, op_false
);
11442 t2
= gen_reg_rtx (mode
);
11444 t3
= gen_reg_rtx (mode
);
11448 x
= gen_rtx_AND (mode
, op_true
, cmp
);
11449 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
11451 x
= gen_rtx_NOT (mode
, cmp
);
11452 x
= gen_rtx_AND (mode
, x
, op_false
);
11453 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
11455 x
= gen_rtx_IOR (mode
, t3
, t2
);
11456 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11460 /* Expand a floating-point conditional move. Return true if successful. */
11463 ix86_expand_fp_movcc (rtx operands
[])
11465 enum machine_mode mode
= GET_MODE (operands
[0]);
11466 enum rtx_code code
= GET_CODE (operands
[1]);
11467 rtx tmp
, compare_op
, second_test
, bypass_test
;
11469 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
11471 enum machine_mode cmode
;
11473 /* Since we've no cmove for sse registers, don't force bad register
11474 allocation just to gain access to it. Deny movcc when the
11475 comparison mode doesn't match the move mode. */
11476 cmode
= GET_MODE (ix86_compare_op0
);
11477 if (cmode
== VOIDmode
)
11478 cmode
= GET_MODE (ix86_compare_op1
);
11482 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
11484 &ix86_compare_op1
);
11485 if (code
== UNKNOWN
)
11488 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
11489 ix86_compare_op1
, operands
[2],
11493 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
11494 ix86_compare_op1
, operands
[2], operands
[3]);
11495 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
11499 /* The floating point conditional move instructions don't directly
11500 support conditions resulting from a signed integer comparison. */
11502 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11504 /* The floating point conditional move instructions don't directly
11505 support signed integer comparisons. */
11507 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
11509 gcc_assert (!second_test
&& !bypass_test
);
11510 tmp
= gen_reg_rtx (QImode
);
11511 ix86_expand_setcc (code
, tmp
);
11513 ix86_compare_op0
= tmp
;
11514 ix86_compare_op1
= const0_rtx
;
11515 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11517 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11519 tmp
= gen_reg_rtx (mode
);
11520 emit_move_insn (tmp
, operands
[3]);
11523 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11525 tmp
= gen_reg_rtx (mode
);
11526 emit_move_insn (tmp
, operands
[2]);
11530 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11531 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
11532 operands
[2], operands
[3])));
11534 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11535 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
11536 operands
[3], operands
[0])));
11538 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11539 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
11540 operands
[2], operands
[0])));
11545 /* Expand a floating-point vector conditional move; a vcond operation
11546 rather than a movcc operation. */
11549 ix86_expand_fp_vcond (rtx operands
[])
11551 enum rtx_code code
= GET_CODE (operands
[3]);
11554 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
11555 &operands
[4], &operands
[5]);
11556 if (code
== UNKNOWN
)
11559 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
11560 operands
[5], operands
[1], operands
[2]))
11563 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
11564 operands
[1], operands
[2]);
11565 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
11569 /* Expand a signed integral vector conditional move. */
11572 ix86_expand_int_vcond (rtx operands
[])
11574 enum machine_mode mode
= GET_MODE (operands
[0]);
11575 enum rtx_code code
= GET_CODE (operands
[3]);
11576 bool negate
= false;
11579 cop0
= operands
[4];
11580 cop1
= operands
[5];
11582 /* Canonicalize the comparison to EQ, GT, GTU. */
11593 code
= reverse_condition (code
);
11599 code
= reverse_condition (code
);
11605 code
= swap_condition (code
);
11606 x
= cop0
, cop0
= cop1
, cop1
= x
;
11610 gcc_unreachable ();
11613 /* Unsigned parallel compare is not supported by the hardware. Play some
11614 tricks to turn this into a signed comparison against 0. */
11617 cop0
= force_reg (mode
, cop0
);
11625 /* Perform a parallel modulo subtraction. */
11626 t1
= gen_reg_rtx (mode
);
11627 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
11629 /* Extract the original sign bit of op0. */
11630 mask
= GEN_INT (-0x80000000);
11631 mask
= gen_rtx_CONST_VECTOR (mode
,
11632 gen_rtvec (4, mask
, mask
, mask
, mask
));
11633 mask
= force_reg (mode
, mask
);
11634 t2
= gen_reg_rtx (mode
);
11635 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
11637 /* XOR it back into the result of the subtraction. This results
11638 in the sign bit set iff we saw unsigned underflow. */
11639 x
= gen_reg_rtx (mode
);
11640 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
11648 /* Perform a parallel unsigned saturating subtraction. */
11649 x
= gen_reg_rtx (mode
);
11650 emit_insn (gen_rtx_SET (VOIDmode
, x
,
11651 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
11658 gcc_unreachable ();
11662 cop1
= CONST0_RTX (mode
);
11665 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
11666 operands
[1+negate
], operands
[2-negate
]);
11668 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
11669 operands
[2-negate
]);
11673 /* Expand conditional increment or decrement using adb/sbb instructions.
11674 The default case using setcc followed by the conditional move can be
11675 done by generic code. */
11677 ix86_expand_int_addcc (rtx operands
[])
11679 enum rtx_code code
= GET_CODE (operands
[1]);
11681 rtx val
= const0_rtx
;
11682 bool fpcmp
= false;
11683 enum machine_mode mode
= GET_MODE (operands
[0]);
11685 if (operands
[3] != const1_rtx
11686 && operands
[3] != constm1_rtx
)
11688 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11689 ix86_compare_op1
, &compare_op
))
11691 code
= GET_CODE (compare_op
);
11693 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11694 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11697 code
= ix86_fp_compare_code_to_integer (code
);
11704 PUT_CODE (compare_op
,
11705 reverse_condition_maybe_unordered
11706 (GET_CODE (compare_op
)));
11708 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11710 PUT_MODE (compare_op
, mode
);
11712 /* Construct either adc or sbb insn. */
11713 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
11715 switch (GET_MODE (operands
[0]))
11718 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
11721 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
11724 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
11727 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
11730 gcc_unreachable ();
11735 switch (GET_MODE (operands
[0]))
11738 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
11741 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
11744 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
11747 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
11750 gcc_unreachable ();
11753 return 1; /* DONE */
11757 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11758 works for floating pointer parameters and nonoffsetable memories.
11759 For pushes, it returns just stack offsets; the values will be saved
11760 in the right order. Maximally three parts are generated. */
11763 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
11768 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
11770 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
11772 gcc_assert (GET_CODE (operand
) != REG
|| !MMX_REGNO_P (REGNO (operand
)));
11773 gcc_assert (size
>= 2 && size
<= 3);
11775 /* Optimize constant pool reference to immediates. This is used by fp
11776 moves, that force all constants to memory to allow combining. */
11777 if (GET_CODE (operand
) == MEM
&& MEM_READONLY_P (operand
))
11779 rtx tmp
= maybe_get_pool_constant (operand
);
11784 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
11786 /* The only non-offsetable memories we handle are pushes. */
11787 int ok
= push_operand (operand
, VOIDmode
);
11791 operand
= copy_rtx (operand
);
11792 PUT_MODE (operand
, Pmode
);
11793 parts
[0] = parts
[1] = parts
[2] = operand
;
11797 if (GET_CODE (operand
) == CONST_VECTOR
)
11799 enum machine_mode imode
= int_mode_for_mode (mode
);
11800 /* Caution: if we looked through a constant pool memory above,
11801 the operand may actually have a different mode now. That's
11802 ok, since we want to pun this all the way back to an integer. */
11803 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
11804 gcc_assert (operand
!= NULL
);
11810 if (mode
== DImode
)
11811 split_di (&operand
, 1, &parts
[0], &parts
[1]);
11814 if (REG_P (operand
))
11816 gcc_assert (reload_completed
);
11817 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
11818 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
11820 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
11822 else if (offsettable_memref_p (operand
))
11824 operand
= adjust_address (operand
, SImode
, 0);
11825 parts
[0] = operand
;
11826 parts
[1] = adjust_address (operand
, SImode
, 4);
11828 parts
[2] = adjust_address (operand
, SImode
, 8);
11830 else if (GET_CODE (operand
) == CONST_DOUBLE
)
11835 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
11839 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
11840 parts
[2] = gen_int_mode (l
[2], SImode
);
11843 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
11846 gcc_unreachable ();
11848 parts
[1] = gen_int_mode (l
[1], SImode
);
11849 parts
[0] = gen_int_mode (l
[0], SImode
);
11852 gcc_unreachable ();
11857 if (mode
== TImode
)
11858 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
11859 if (mode
== XFmode
|| mode
== TFmode
)
11861 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
11862 if (REG_P (operand
))
11864 gcc_assert (reload_completed
);
11865 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
11866 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
11868 else if (offsettable_memref_p (operand
))
11870 operand
= adjust_address (operand
, DImode
, 0);
11871 parts
[0] = operand
;
11872 parts
[1] = adjust_address (operand
, upper_mode
, 8);
11874 else if (GET_CODE (operand
) == CONST_DOUBLE
)
11879 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
11880 real_to_target (l
, &r
, mode
);
11882 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11883 if (HOST_BITS_PER_WIDE_INT
>= 64)
11886 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
11887 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
11890 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
11892 if (upper_mode
== SImode
)
11893 parts
[1] = gen_int_mode (l
[2], SImode
);
11894 else if (HOST_BITS_PER_WIDE_INT
>= 64)
11897 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
11898 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
11901 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
11904 gcc_unreachable ();
11911 /* Emit insns to perform a move or push of DI, DF, and XF values.
11912 Return false when normal moves are needed; true when all required
11913 insns have been emitted. Operands 2-4 contain the input values
11914 int the correct order; operands 5-7 contain the output values. */
11917 ix86_split_long_move (rtx operands
[])
11922 int collisions
= 0;
11923 enum machine_mode mode
= GET_MODE (operands
[0]);
11925 /* The DFmode expanders may ask us to move double.
11926 For 64bit target this is single move. By hiding the fact
11927 here we simplify i386.md splitters. */
11928 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
11930 /* Optimize constant pool reference to immediates. This is used by
11931 fp moves, that force all constants to memory to allow combining. */
11933 if (GET_CODE (operands
[1]) == MEM
11934 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
11935 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
11936 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
11937 if (push_operand (operands
[0], VOIDmode
))
11939 operands
[0] = copy_rtx (operands
[0]);
11940 PUT_MODE (operands
[0], Pmode
);
11943 operands
[0] = gen_lowpart (DImode
, operands
[0]);
11944 operands
[1] = gen_lowpart (DImode
, operands
[1]);
11945 emit_move_insn (operands
[0], operands
[1]);
11949 /* The only non-offsettable memory we handle is push. */
11950 if (push_operand (operands
[0], VOIDmode
))
11953 gcc_assert (GET_CODE (operands
[0]) != MEM
11954 || offsettable_memref_p (operands
[0]));
11956 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
11957 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
11959 /* When emitting push, take care for source operands on the stack. */
11960 if (push
&& GET_CODE (operands
[1]) == MEM
11961 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
11964 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
11965 XEXP (part
[1][2], 0));
11966 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
11967 XEXP (part
[1][1], 0));
11970 /* We need to do copy in the right order in case an address register
11971 of the source overlaps the destination. */
11972 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
11974 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
11976 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
11979 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
11982 /* Collision in the middle part can be handled by reordering. */
11983 if (collisions
== 1 && nparts
== 3
11984 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
11987 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
11988 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
11991 /* If there are more collisions, we can't handle it by reordering.
11992 Do an lea to the last part and use only one colliding move. */
11993 else if (collisions
> 1)
11999 base
= part
[0][nparts
- 1];
12001 /* Handle the case when the last part isn't valid for lea.
12002 Happens in 64-bit mode storing the 12-byte XFmode. */
12003 if (GET_MODE (base
) != Pmode
)
12004 base
= gen_rtx_REG (Pmode
, REGNO (base
));
12006 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
12007 part
[1][0] = replace_equiv_address (part
[1][0], base
);
12008 part
[1][1] = replace_equiv_address (part
[1][1],
12009 plus_constant (base
, UNITS_PER_WORD
));
12011 part
[1][2] = replace_equiv_address (part
[1][2],
12012 plus_constant (base
, 8));
12022 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
12023 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
12024 emit_move_insn (part
[0][2], part
[1][2]);
12029 /* In 64bit mode we don't have 32bit push available. In case this is
12030 register, it is OK - we will just use larger counterpart. We also
12031 retype memory - these comes from attempt to avoid REX prefix on
12032 moving of second half of TFmode value. */
12033 if (GET_MODE (part
[1][1]) == SImode
)
12035 switch (GET_CODE (part
[1][1]))
12038 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
12042 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
12046 gcc_unreachable ();
12049 if (GET_MODE (part
[1][0]) == SImode
)
12050 part
[1][0] = part
[1][1];
12053 emit_move_insn (part
[0][1], part
[1][1]);
12054 emit_move_insn (part
[0][0], part
[1][0]);
12058 /* Choose correct order to not overwrite the source before it is copied. */
12059 if ((REG_P (part
[0][0])
12060 && REG_P (part
[1][1])
12061 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
12063 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
12065 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
12069 operands
[2] = part
[0][2];
12070 operands
[3] = part
[0][1];
12071 operands
[4] = part
[0][0];
12072 operands
[5] = part
[1][2];
12073 operands
[6] = part
[1][1];
12074 operands
[7] = part
[1][0];
12078 operands
[2] = part
[0][1];
12079 operands
[3] = part
[0][0];
12080 operands
[5] = part
[1][1];
12081 operands
[6] = part
[1][0];
12088 operands
[2] = part
[0][0];
12089 operands
[3] = part
[0][1];
12090 operands
[4] = part
[0][2];
12091 operands
[5] = part
[1][0];
12092 operands
[6] = part
[1][1];
12093 operands
[7] = part
[1][2];
12097 operands
[2] = part
[0][0];
12098 operands
[3] = part
[0][1];
12099 operands
[5] = part
[1][0];
12100 operands
[6] = part
[1][1];
12104 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12107 if (GET_CODE (operands
[5]) == CONST_INT
12108 && operands
[5] != const0_rtx
12109 && REG_P (operands
[2]))
12111 if (GET_CODE (operands
[6]) == CONST_INT
12112 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
12113 operands
[6] = operands
[2];
12116 && GET_CODE (operands
[7]) == CONST_INT
12117 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
12118 operands
[7] = operands
[2];
12122 && GET_CODE (operands
[6]) == CONST_INT
12123 && operands
[6] != const0_rtx
12124 && REG_P (operands
[3])
12125 && GET_CODE (operands
[7]) == CONST_INT
12126 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
12127 operands
[7] = operands
[3];
12130 emit_move_insn (operands
[2], operands
[5]);
12131 emit_move_insn (operands
[3], operands
[6]);
12133 emit_move_insn (operands
[4], operands
[7]);
12138 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12139 left shift by a constant, either using a single shift or
12140 a sequence of add instructions. */
12143 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
12147 emit_insn ((mode
== DImode
12149 : gen_adddi3
) (operand
, operand
, operand
));
12151 else if (!optimize_size
12152 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
12155 for (i
=0; i
<count
; i
++)
12157 emit_insn ((mode
== DImode
12159 : gen_adddi3
) (operand
, operand
, operand
));
12163 emit_insn ((mode
== DImode
12165 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
12169 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12171 rtx low
[2], high
[2];
12173 const int single_width
= mode
== DImode
? 32 : 64;
12175 if (GET_CODE (operands
[2]) == CONST_INT
)
12177 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12178 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12180 if (count
>= single_width
)
12182 emit_move_insn (high
[0], low
[1]);
12183 emit_move_insn (low
[0], const0_rtx
);
12185 if (count
> single_width
)
12186 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
12190 if (!rtx_equal_p (operands
[0], operands
[1]))
12191 emit_move_insn (operands
[0], operands
[1]);
12192 emit_insn ((mode
== DImode
12194 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
12195 ix86_expand_ashl_const (low
[0], count
, mode
);
12200 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12202 if (operands
[1] == const1_rtx
)
12204 /* Assuming we've chosen a QImode capable registers, then 1 << N
12205 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12206 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
12208 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
12210 ix86_expand_clear (low
[0]);
12211 ix86_expand_clear (high
[0]);
12212 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
12214 d
= gen_lowpart (QImode
, low
[0]);
12215 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12216 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
12217 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12219 d
= gen_lowpart (QImode
, high
[0]);
12220 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12221 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
12222 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12225 /* Otherwise, we can get the same results by manually performing
12226 a bit extract operation on bit 5/6, and then performing the two
12227 shifts. The two methods of getting 0/1 into low/high are exactly
12228 the same size. Avoiding the shift in the bit extract case helps
12229 pentium4 a bit; no one else seems to care much either way. */
12234 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
12235 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
12237 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
12238 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
12240 emit_insn ((mode
== DImode
12242 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
12243 emit_insn ((mode
== DImode
12245 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
12246 emit_move_insn (low
[0], high
[0]);
12247 emit_insn ((mode
== DImode
12249 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
12252 emit_insn ((mode
== DImode
12254 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12255 emit_insn ((mode
== DImode
12257 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
12261 if (operands
[1] == constm1_rtx
)
12263 /* For -1 << N, we can avoid the shld instruction, because we
12264 know that we're shifting 0...31/63 ones into a -1. */
12265 emit_move_insn (low
[0], constm1_rtx
);
12267 emit_move_insn (high
[0], low
[0]);
12269 emit_move_insn (high
[0], constm1_rtx
);
12273 if (!rtx_equal_p (operands
[0], operands
[1]))
12274 emit_move_insn (operands
[0], operands
[1]);
12276 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12277 emit_insn ((mode
== DImode
12279 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
12282 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12284 if (TARGET_CMOVE
&& scratch
)
12286 ix86_expand_clear (scratch
);
12287 emit_insn ((mode
== DImode
12288 ? gen_x86_shift_adj_1
12289 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
12292 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
12296 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12298 rtx low
[2], high
[2];
12300 const int single_width
= mode
== DImode
? 32 : 64;
12302 if (GET_CODE (operands
[2]) == CONST_INT
)
12304 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12305 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12307 if (count
== single_width
* 2 - 1)
12309 emit_move_insn (high
[0], high
[1]);
12310 emit_insn ((mode
== DImode
12312 : gen_ashrdi3
) (high
[0], high
[0],
12313 GEN_INT (single_width
- 1)));
12314 emit_move_insn (low
[0], high
[0]);
12317 else if (count
>= single_width
)
12319 emit_move_insn (low
[0], high
[1]);
12320 emit_move_insn (high
[0], low
[0]);
12321 emit_insn ((mode
== DImode
12323 : gen_ashrdi3
) (high
[0], high
[0],
12324 GEN_INT (single_width
- 1)));
12325 if (count
> single_width
)
12326 emit_insn ((mode
== DImode
12328 : gen_ashrdi3
) (low
[0], low
[0],
12329 GEN_INT (count
- single_width
)));
12333 if (!rtx_equal_p (operands
[0], operands
[1]))
12334 emit_move_insn (operands
[0], operands
[1]);
12335 emit_insn ((mode
== DImode
12337 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12338 emit_insn ((mode
== DImode
12340 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12345 if (!rtx_equal_p (operands
[0], operands
[1]))
12346 emit_move_insn (operands
[0], operands
[1]);
12348 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12350 emit_insn ((mode
== DImode
12352 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12353 emit_insn ((mode
== DImode
12355 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
12357 if (TARGET_CMOVE
&& scratch
)
12359 emit_move_insn (scratch
, high
[0]);
12360 emit_insn ((mode
== DImode
12362 : gen_ashrdi3
) (scratch
, scratch
,
12363 GEN_INT (single_width
- 1)));
12364 emit_insn ((mode
== DImode
12365 ? gen_x86_shift_adj_1
12366 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12370 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
12375 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12377 rtx low
[2], high
[2];
12379 const int single_width
= mode
== DImode
? 32 : 64;
12381 if (GET_CODE (operands
[2]) == CONST_INT
)
12383 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12384 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12386 if (count
>= single_width
)
12388 emit_move_insn (low
[0], high
[1]);
12389 ix86_expand_clear (high
[0]);
12391 if (count
> single_width
)
12392 emit_insn ((mode
== DImode
12394 : gen_lshrdi3
) (low
[0], low
[0],
12395 GEN_INT (count
- single_width
)));
12399 if (!rtx_equal_p (operands
[0], operands
[1]))
12400 emit_move_insn (operands
[0], operands
[1]);
12401 emit_insn ((mode
== DImode
12403 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12404 emit_insn ((mode
== DImode
12406 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12411 if (!rtx_equal_p (operands
[0], operands
[1]))
12412 emit_move_insn (operands
[0], operands
[1]);
12414 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12416 emit_insn ((mode
== DImode
12418 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12419 emit_insn ((mode
== DImode
12421 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
12423 /* Heh. By reversing the arguments, we can reuse this pattern. */
12424 if (TARGET_CMOVE
&& scratch
)
12426 ix86_expand_clear (scratch
);
12427 emit_insn ((mode
== DImode
12428 ? gen_x86_shift_adj_1
12429 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12433 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
12437 /* Helper function for the string operations below. Dest VARIABLE whether
12438 it is aligned to VALUE bytes. If true, jump to the label. */
12440 ix86_expand_aligntest (rtx variable
, int value
)
12442 rtx label
= gen_label_rtx ();
12443 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
12444 if (GET_MODE (variable
) == DImode
)
12445 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
12447 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
12448 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
12453 /* Adjust COUNTER by the VALUE. */
12455 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
12457 if (GET_MODE (countreg
) == DImode
)
12458 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
12460 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
12463 /* Zero extend possibly SImode EXP to Pmode register. */
12465 ix86_zero_extend_to_Pmode (rtx exp
)
12468 if (GET_MODE (exp
) == VOIDmode
)
12469 return force_reg (Pmode
, exp
);
12470 if (GET_MODE (exp
) == Pmode
)
12471 return copy_to_mode_reg (Pmode
, exp
);
12472 r
= gen_reg_rtx (Pmode
);
12473 emit_insn (gen_zero_extendsidi2 (r
, exp
));
12477 /* Expand string move (memcpy) operation. Use i386 string operations when
12478 profitable. expand_clrmem contains similar code. */
12480 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
12482 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
12483 enum machine_mode counter_mode
;
12484 HOST_WIDE_INT align
= 0;
12485 unsigned HOST_WIDE_INT count
= 0;
12487 if (GET_CODE (align_exp
) == CONST_INT
)
12488 align
= INTVAL (align_exp
);
12490 /* Can't use any of this if the user has appropriated esi or edi. */
12491 if (global_regs
[4] || global_regs
[5])
12494 /* This simple hack avoids all inlining code and simplifies code below. */
12495 if (!TARGET_ALIGN_STRINGOPS
)
12498 if (GET_CODE (count_exp
) == CONST_INT
)
12500 count
= INTVAL (count_exp
);
12501 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
12505 /* Figure out proper mode for counter. For 32bits it is always SImode,
12506 for 64bits use SImode when possible, otherwise DImode.
12507 Set count to number of bytes copied when known at compile time. */
12509 || GET_MODE (count_exp
) == SImode
12510 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
12511 counter_mode
= SImode
;
12513 counter_mode
= DImode
;
12515 gcc_assert (counter_mode
== SImode
|| counter_mode
== DImode
);
12517 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
12518 if (destreg
!= XEXP (dst
, 0))
12519 dst
= replace_equiv_address_nv (dst
, destreg
);
12520 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
12521 if (srcreg
!= XEXP (src
, 0))
12522 src
= replace_equiv_address_nv (src
, srcreg
);
12524 /* When optimizing for size emit simple rep ; movsb instruction for
12525 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12526 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12527 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12528 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12529 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12530 known to be zero or not. The rep; movsb sequence causes higher
12531 register pressure though, so take that into account. */
12533 if ((!optimize
|| optimize_size
)
12538 || (count
& 3) + count
/ 4 > 6))))
12540 emit_insn (gen_cld ());
12541 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
12542 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
12543 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
12544 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
12548 /* For constant aligned (or small unaligned) copies use rep movsl
12549 followed by code copying the rest. For PentiumPro ensure 8 byte
12550 alignment to allow rep movsl acceleration. */
12552 else if (count
!= 0
12554 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
12555 || optimize_size
|| count
< (unsigned int) 64))
12557 unsigned HOST_WIDE_INT offset
= 0;
12558 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
12559 rtx srcmem
, dstmem
;
12561 emit_insn (gen_cld ());
12562 if (count
& ~(size
- 1))
12564 if ((TARGET_SINGLE_STRINGOP
|| optimize_size
) && count
< 5 * 4)
12566 enum machine_mode movs_mode
= size
== 4 ? SImode
: DImode
;
12568 while (offset
< (count
& ~(size
- 1)))
12570 srcmem
= adjust_automodify_address_nv (src
, movs_mode
,
12572 dstmem
= adjust_automodify_address_nv (dst
, movs_mode
,
12574 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12580 countreg
= GEN_INT ((count
>> (size
== 4 ? 2 : 3))
12581 & (TARGET_64BIT
? -1 : 0x3fffffff));
12582 countreg
= copy_to_mode_reg (counter_mode
, countreg
);
12583 countreg
= ix86_zero_extend_to_Pmode (countreg
);
12585 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
12586 GEN_INT (size
== 4 ? 2 : 3));
12587 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
12588 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
12590 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
12591 countreg
, destexp
, srcexp
));
12592 offset
= count
& ~(size
- 1);
12595 if (size
== 8 && (count
& 0x04))
12597 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
12599 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
12601 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12606 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
12608 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
12610 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12615 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
12617 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
12619 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12622 /* The generic code based on the glibc implementation:
12623 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12624 allowing accelerated copying there)
12625 - copy the data using rep movsl
12626 - copy the rest. */
12631 rtx srcmem
, dstmem
;
12632 int desired_alignment
= (TARGET_PENTIUMPRO
12633 && (count
== 0 || count
>= (unsigned int) 260)
12634 ? 8 : UNITS_PER_WORD
);
12635 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12636 dst
= change_address (dst
, BLKmode
, destreg
);
12637 src
= change_address (src
, BLKmode
, srcreg
);
12639 /* In case we don't know anything about the alignment, default to
12640 library version, since it is usually equally fast and result in
12643 Also emit call when we know that the count is large and call overhead
12644 will not be important. */
12645 if (!TARGET_INLINE_ALL_STRINGOPS
12646 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
12649 if (TARGET_SINGLE_STRINGOP
)
12650 emit_insn (gen_cld ());
12652 countreg2
= gen_reg_rtx (Pmode
);
12653 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
12655 /* We don't use loops to align destination and to copy parts smaller
12656 than 4 bytes, because gcc is able to optimize such code better (in
12657 the case the destination or the count really is aligned, gcc is often
12658 able to predict the branches) and also it is friendlier to the
12659 hardware branch prediction.
12661 Using loops is beneficial for generic case, because we can
12662 handle small counts using the loops. Many CPUs (such as Athlon)
12663 have large REP prefix setup costs.
12665 This is quite costly. Maybe we can revisit this decision later or
12666 add some customizability to this code. */
12668 if (count
== 0 && align
< desired_alignment
)
12670 label
= gen_label_rtx ();
12671 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
12672 LEU
, 0, counter_mode
, 1, label
);
12676 rtx label
= ix86_expand_aligntest (destreg
, 1);
12677 srcmem
= change_address (src
, QImode
, srcreg
);
12678 dstmem
= change_address (dst
, QImode
, destreg
);
12679 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12680 ix86_adjust_counter (countreg
, 1);
12681 emit_label (label
);
12682 LABEL_NUSES (label
) = 1;
12686 rtx label
= ix86_expand_aligntest (destreg
, 2);
12687 srcmem
= change_address (src
, HImode
, srcreg
);
12688 dstmem
= change_address (dst
, HImode
, destreg
);
12689 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12690 ix86_adjust_counter (countreg
, 2);
12691 emit_label (label
);
12692 LABEL_NUSES (label
) = 1;
12694 if (align
<= 4 && desired_alignment
> 4)
12696 rtx label
= ix86_expand_aligntest (destreg
, 4);
12697 srcmem
= change_address (src
, SImode
, srcreg
);
12698 dstmem
= change_address (dst
, SImode
, destreg
);
12699 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12700 ix86_adjust_counter (countreg
, 4);
12701 emit_label (label
);
12702 LABEL_NUSES (label
) = 1;
12705 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
12707 emit_label (label
);
12708 LABEL_NUSES (label
) = 1;
12711 if (!TARGET_SINGLE_STRINGOP
)
12712 emit_insn (gen_cld ());
12715 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
12717 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
12721 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
12722 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
12724 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
12725 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
12726 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
12727 countreg2
, destexp
, srcexp
));
12731 emit_label (label
);
12732 LABEL_NUSES (label
) = 1;
12734 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
12736 srcmem
= change_address (src
, SImode
, srcreg
);
12737 dstmem
= change_address (dst
, SImode
, destreg
);
12738 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12740 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
12742 rtx label
= ix86_expand_aligntest (countreg
, 4);
12743 srcmem
= change_address (src
, SImode
, srcreg
);
12744 dstmem
= change_address (dst
, SImode
, destreg
);
12745 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12746 emit_label (label
);
12747 LABEL_NUSES (label
) = 1;
12749 if (align
> 2 && count
!= 0 && (count
& 2))
12751 srcmem
= change_address (src
, HImode
, srcreg
);
12752 dstmem
= change_address (dst
, HImode
, destreg
);
12753 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12755 if (align
<= 2 || count
== 0)
12757 rtx label
= ix86_expand_aligntest (countreg
, 2);
12758 srcmem
= change_address (src
, HImode
, srcreg
);
12759 dstmem
= change_address (dst
, HImode
, destreg
);
12760 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12761 emit_label (label
);
12762 LABEL_NUSES (label
) = 1;
12764 if (align
> 1 && count
!= 0 && (count
& 1))
12766 srcmem
= change_address (src
, QImode
, srcreg
);
12767 dstmem
= change_address (dst
, QImode
, destreg
);
12768 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12770 if (align
<= 1 || count
== 0)
12772 rtx label
= ix86_expand_aligntest (countreg
, 1);
12773 srcmem
= change_address (src
, QImode
, srcreg
);
12774 dstmem
= change_address (dst
, QImode
, destreg
);
12775 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12776 emit_label (label
);
12777 LABEL_NUSES (label
) = 1;
12784 /* Expand string clear operation (bzero). Use i386 string operations when
12785 profitable. expand_movmem contains similar code. */
12787 ix86_expand_clrmem (rtx dst
, rtx count_exp
, rtx align_exp
)
12789 rtx destreg
, zeroreg
, countreg
, destexp
;
12790 enum machine_mode counter_mode
;
12791 HOST_WIDE_INT align
= 0;
12792 unsigned HOST_WIDE_INT count
= 0;
12794 if (GET_CODE (align_exp
) == CONST_INT
)
12795 align
= INTVAL (align_exp
);
12797 /* Can't use any of this if the user has appropriated esi. */
12798 if (global_regs
[4])
12801 /* This simple hack avoids all inlining code and simplifies code below. */
12802 if (!TARGET_ALIGN_STRINGOPS
)
12805 if (GET_CODE (count_exp
) == CONST_INT
)
12807 count
= INTVAL (count_exp
);
12808 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
12811 /* Figure out proper mode for counter. For 32bits it is always SImode,
12812 for 64bits use SImode when possible, otherwise DImode.
12813 Set count to number of bytes copied when known at compile time. */
12815 || GET_MODE (count_exp
) == SImode
12816 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
12817 counter_mode
= SImode
;
12819 counter_mode
= DImode
;
12821 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
12822 if (destreg
!= XEXP (dst
, 0))
12823 dst
= replace_equiv_address_nv (dst
, destreg
);
12826 /* When optimizing for size emit simple rep ; movsb instruction for
12827 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12828 sequence is 7 bytes long, so if optimizing for size and count is
12829 small enough that some stosl, stosw and stosb instructions without
12830 rep are shorter, fall back into the next if. */
12832 if ((!optimize
|| optimize_size
)
12835 && (!optimize_size
|| (count
& 0x03) + (count
>> 2) > 7))))
12837 emit_insn (gen_cld ());
12839 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
12840 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
12841 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
12842 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
12844 else if (count
!= 0
12846 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
12847 || optimize_size
|| count
< (unsigned int) 64))
12849 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
12850 unsigned HOST_WIDE_INT offset
= 0;
12852 emit_insn (gen_cld ());
12854 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
12855 if (count
& ~(size
- 1))
12857 unsigned HOST_WIDE_INT repcount
;
12858 unsigned int max_nonrep
;
12860 repcount
= count
>> (size
== 4 ? 2 : 3);
12862 repcount
&= 0x3fffffff;
12864 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12865 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12866 bytes. In both cases the latter seems to be faster for small
12868 max_nonrep
= size
== 4 ? 7 : 4;
12869 if (!optimize_size
)
12872 case PROCESSOR_PENTIUM4
:
12873 case PROCESSOR_NOCONA
:
12880 if (repcount
<= max_nonrep
)
12881 while (repcount
-- > 0)
12883 rtx mem
= adjust_automodify_address_nv (dst
,
12884 GET_MODE (zeroreg
),
12886 emit_insn (gen_strset (destreg
, mem
, zeroreg
));
12891 countreg
= copy_to_mode_reg (counter_mode
, GEN_INT (repcount
));
12892 countreg
= ix86_zero_extend_to_Pmode (countreg
);
12893 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
12894 GEN_INT (size
== 4 ? 2 : 3));
12895 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
12896 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
,
12898 offset
= count
& ~(size
- 1);
12901 if (size
== 8 && (count
& 0x04))
12903 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
12905 emit_insn (gen_strset (destreg
, mem
,
12906 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
12911 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
12913 emit_insn (gen_strset (destreg
, mem
,
12914 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
12919 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
12921 emit_insn (gen_strset (destreg
, mem
,
12922 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
12929 /* Compute desired alignment of the string operation. */
12930 int desired_alignment
= (TARGET_PENTIUMPRO
12931 && (count
== 0 || count
>= (unsigned int) 260)
12932 ? 8 : UNITS_PER_WORD
);
12934 /* In case we don't know anything about the alignment, default to
12935 library version, since it is usually equally fast and result in
12938 Also emit call when we know that the count is large and call overhead
12939 will not be important. */
12940 if (!TARGET_INLINE_ALL_STRINGOPS
12941 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
12944 if (TARGET_SINGLE_STRINGOP
)
12945 emit_insn (gen_cld ());
12947 countreg2
= gen_reg_rtx (Pmode
);
12948 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
12949 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
12950 /* Get rid of MEM_OFFSET, it won't be accurate. */
12951 dst
= change_address (dst
, BLKmode
, destreg
);
12953 if (count
== 0 && align
< desired_alignment
)
12955 label
= gen_label_rtx ();
12956 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
12957 LEU
, 0, counter_mode
, 1, label
);
12961 rtx label
= ix86_expand_aligntest (destreg
, 1);
12962 emit_insn (gen_strset (destreg
, dst
,
12963 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
12964 ix86_adjust_counter (countreg
, 1);
12965 emit_label (label
);
12966 LABEL_NUSES (label
) = 1;
12970 rtx label
= ix86_expand_aligntest (destreg
, 2);
12971 emit_insn (gen_strset (destreg
, dst
,
12972 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
12973 ix86_adjust_counter (countreg
, 2);
12974 emit_label (label
);
12975 LABEL_NUSES (label
) = 1;
12977 if (align
<= 4 && desired_alignment
> 4)
12979 rtx label
= ix86_expand_aligntest (destreg
, 4);
12980 emit_insn (gen_strset (destreg
, dst
,
12982 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
12984 ix86_adjust_counter (countreg
, 4);
12985 emit_label (label
);
12986 LABEL_NUSES (label
) = 1;
12989 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
12991 emit_label (label
);
12992 LABEL_NUSES (label
) = 1;
12996 if (!TARGET_SINGLE_STRINGOP
)
12997 emit_insn (gen_cld ());
13000 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
13002 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
13006 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
13007 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
13009 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
13010 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
13014 emit_label (label
);
13015 LABEL_NUSES (label
) = 1;
13018 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
13019 emit_insn (gen_strset (destreg
, dst
,
13020 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
13021 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
13023 rtx label
= ix86_expand_aligntest (countreg
, 4);
13024 emit_insn (gen_strset (destreg
, dst
,
13025 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
13026 emit_label (label
);
13027 LABEL_NUSES (label
) = 1;
13029 if (align
> 2 && count
!= 0 && (count
& 2))
13030 emit_insn (gen_strset (destreg
, dst
,
13031 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13032 if (align
<= 2 || count
== 0)
13034 rtx label
= ix86_expand_aligntest (countreg
, 2);
13035 emit_insn (gen_strset (destreg
, dst
,
13036 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13037 emit_label (label
);
13038 LABEL_NUSES (label
) = 1;
13040 if (align
> 1 && count
!= 0 && (count
& 1))
13041 emit_insn (gen_strset (destreg
, dst
,
13042 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13043 if (align
<= 1 || count
== 0)
13045 rtx label
= ix86_expand_aligntest (countreg
, 1);
13046 emit_insn (gen_strset (destreg
, dst
,
13047 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13048 emit_label (label
);
13049 LABEL_NUSES (label
) = 1;
13055 /* Expand strlen. */
13057 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
13059 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
13061 /* The generic case of strlen expander is long. Avoid it's
13062 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13064 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
13065 && !TARGET_INLINE_ALL_STRINGOPS
13067 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
13070 addr
= force_reg (Pmode
, XEXP (src
, 0));
13071 scratch1
= gen_reg_rtx (Pmode
);
13073 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
13076 /* Well it seems that some optimizer does not combine a call like
13077 foo(strlen(bar), strlen(bar));
13078 when the move and the subtraction is done here. It does calculate
13079 the length just once when these instructions are done inside of
13080 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13081 often used and I use one fewer register for the lifetime of
13082 output_strlen_unroll() this is better. */
13084 emit_move_insn (out
, addr
);
13086 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
13088 /* strlensi_unroll_1 returns the address of the zero at the end of
13089 the string, like memchr(), so compute the length by subtracting
13090 the start address. */
13092 emit_insn (gen_subdi3 (out
, out
, addr
));
13094 emit_insn (gen_subsi3 (out
, out
, addr
));
13099 scratch2
= gen_reg_rtx (Pmode
);
13100 scratch3
= gen_reg_rtx (Pmode
);
13101 scratch4
= force_reg (Pmode
, constm1_rtx
);
13103 emit_move_insn (scratch3
, addr
);
13104 eoschar
= force_reg (QImode
, eoschar
);
13106 emit_insn (gen_cld ());
13107 src
= replace_equiv_address_nv (src
, scratch3
);
13109 /* If .md starts supporting :P, this can be done in .md. */
13110 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
13111 scratch4
), UNSPEC_SCAS
);
13112 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
13115 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
13116 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
13120 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
13121 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
13127 /* Expand the appropriate insns for doing strlen if not just doing
13130 out = result, initialized with the start address
13131 align_rtx = alignment of the address.
13132 scratch = scratch register, initialized with the startaddress when
13133 not aligned, otherwise undefined
13135 This is just the body. It needs the initializations mentioned above and
13136 some address computing at the end. These things are done in i386.md. */
13139 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
13143 rtx align_2_label
= NULL_RTX
;
13144 rtx align_3_label
= NULL_RTX
;
13145 rtx align_4_label
= gen_label_rtx ();
13146 rtx end_0_label
= gen_label_rtx ();
13148 rtx tmpreg
= gen_reg_rtx (SImode
);
13149 rtx scratch
= gen_reg_rtx (SImode
);
13153 if (GET_CODE (align_rtx
) == CONST_INT
)
13154 align
= INTVAL (align_rtx
);
13156 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13158 /* Is there a known alignment and is it less than 4? */
13161 rtx scratch1
= gen_reg_rtx (Pmode
);
13162 emit_move_insn (scratch1
, out
);
13163 /* Is there a known alignment and is it not 2? */
13166 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
13167 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
13169 /* Leave just the 3 lower bits. */
13170 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
13171 NULL_RTX
, 0, OPTAB_WIDEN
);
13173 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
13174 Pmode
, 1, align_4_label
);
13175 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
13176 Pmode
, 1, align_2_label
);
13177 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
13178 Pmode
, 1, align_3_label
);
13182 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13183 check if is aligned to 4 - byte. */
13185 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
13186 NULL_RTX
, 0, OPTAB_WIDEN
);
13188 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
13189 Pmode
, 1, align_4_label
);
13192 mem
= change_address (src
, QImode
, out
);
13194 /* Now compare the bytes. */
13196 /* Compare the first n unaligned byte on a byte per byte basis. */
13197 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
13198 QImode
, 1, end_0_label
);
13200 /* Increment the address. */
13202 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
13204 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
13206 /* Not needed with an alignment of 2 */
13209 emit_label (align_2_label
);
13211 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
13215 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
13217 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
13219 emit_label (align_3_label
);
13222 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
13226 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
13228 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
13231 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13232 align this loop. It gives only huge programs, but does not help to
13234 emit_label (align_4_label
);
13236 mem
= change_address (src
, SImode
, out
);
13237 emit_move_insn (scratch
, mem
);
13239 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
13241 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
13243 /* This formula yields a nonzero result iff one of the bytes is zero.
13244 This saves three branches inside loop and many cycles. */
13246 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
13247 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
13248 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
13249 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
13250 gen_int_mode (0x80808080, SImode
)));
13251 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
13256 rtx reg
= gen_reg_rtx (SImode
);
13257 rtx reg2
= gen_reg_rtx (Pmode
);
13258 emit_move_insn (reg
, tmpreg
);
13259 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
13261 /* If zero is not in the first two bytes, move two bytes forward. */
13262 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
13263 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13264 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
13265 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
13266 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
13269 /* Emit lea manually to avoid clobbering of flags. */
13270 emit_insn (gen_rtx_SET (SImode
, reg2
,
13271 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
13273 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13274 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
13275 emit_insn (gen_rtx_SET (VOIDmode
, out
,
13276 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
13283 rtx end_2_label
= gen_label_rtx ();
13284 /* Is zero in the first two bytes? */
13286 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
13287 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13288 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
13289 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
13290 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
13292 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
13293 JUMP_LABEL (tmp
) = end_2_label
;
13295 /* Not in the first two. Move two bytes forward. */
13296 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
13298 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
13300 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
13302 emit_label (end_2_label
);
13306 /* Avoid branch in fixing the byte. */
13307 tmpreg
= gen_lowpart (QImode
, tmpreg
);
13308 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
13309 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
13311 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
13313 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
13315 emit_label (end_0_label
);
13319 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
13320 rtx callarg2 ATTRIBUTE_UNUSED
,
13321 rtx pop
, int sibcall
)
13323 rtx use
= NULL
, call
;
13325 if (pop
== const0_rtx
)
13327 gcc_assert (!TARGET_64BIT
|| !pop
);
13330 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
13331 fnaddr
= machopic_indirect_call_target (fnaddr
);
13333 /* Static functions and indirect calls don't need the pic register. */
13334 if (! TARGET_64BIT
&& flag_pic
13335 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
13336 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
13337 use_reg (&use
, pic_offset_table_rtx
);
13339 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
13341 rtx al
= gen_rtx_REG (QImode
, 0);
13342 emit_move_insn (al
, callarg2
);
13343 use_reg (&use
, al
);
13345 #endif /* TARGET_MACHO */
13347 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
13349 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
13350 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
13352 if (sibcall
&& TARGET_64BIT
13353 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
13356 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
13357 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
13358 emit_move_insn (fnaddr
, addr
);
13359 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
13362 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
13364 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
13367 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
13368 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
13369 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
13372 call
= emit_call_insn (call
);
13374 CALL_INSN_FUNCTION_USAGE (call
) = use
;
13378 /* Clear stack slot assignments remembered from previous functions.
13379 This is called from INIT_EXPANDERS once before RTL is emitted for each
13382 static struct machine_function
*
13383 ix86_init_machine_status (void)
13385 struct machine_function
*f
;
13387 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
13388 f
->use_fast_prologue_epilogue_nregs
= -1;
13389 f
->tls_descriptor_call_expanded_p
= 0;
13394 /* Return a MEM corresponding to a stack slot with mode MODE.
13395 Allocate a new slot if necessary.
13397 The RTL for a function can have several slots available: N is
13398 which slot to use. */
13401 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
13403 struct stack_local_entry
*s
;
13405 gcc_assert (n
< MAX_386_STACK_LOCALS
);
13407 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
13408 if (s
->mode
== mode
&& s
->n
== n
)
13411 s
= (struct stack_local_entry
*)
13412 ggc_alloc (sizeof (struct stack_local_entry
));
13415 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
13417 s
->next
= ix86_stack_locals
;
13418 ix86_stack_locals
= s
;
13422 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13424 static GTY(()) rtx ix86_tls_symbol
;
13426 ix86_tls_get_addr (void)
13429 if (!ix86_tls_symbol
)
13431 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
13432 (TARGET_ANY_GNU_TLS
13434 ? "___tls_get_addr"
13435 : "__tls_get_addr");
13438 return ix86_tls_symbol
;
13441 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13443 static GTY(()) rtx ix86_tls_module_base_symbol
;
13445 ix86_tls_module_base (void)
13448 if (!ix86_tls_module_base_symbol
)
13450 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
13451 "_TLS_MODULE_BASE_");
13452 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
13453 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
13456 return ix86_tls_module_base_symbol
;
13459 /* Calculate the length of the memory address in the instruction
13460 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13463 memory_address_length (rtx addr
)
13465 struct ix86_address parts
;
13466 rtx base
, index
, disp
;
13470 if (GET_CODE (addr
) == PRE_DEC
13471 || GET_CODE (addr
) == POST_INC
13472 || GET_CODE (addr
) == PRE_MODIFY
13473 || GET_CODE (addr
) == POST_MODIFY
)
13476 ok
= ix86_decompose_address (addr
, &parts
);
13479 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
13480 parts
.base
= SUBREG_REG (parts
.base
);
13481 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
13482 parts
.index
= SUBREG_REG (parts
.index
);
13485 index
= parts
.index
;
13490 - esp as the base always wants an index,
13491 - ebp as the base always wants a displacement. */
13493 /* Register Indirect. */
13494 if (base
&& !index
&& !disp
)
13496 /* esp (for its index) and ebp (for its displacement) need
13497 the two-byte modrm form. */
13498 if (addr
== stack_pointer_rtx
13499 || addr
== arg_pointer_rtx
13500 || addr
== frame_pointer_rtx
13501 || addr
== hard_frame_pointer_rtx
)
13505 /* Direct Addressing. */
13506 else if (disp
&& !base
&& !index
)
13511 /* Find the length of the displacement constant. */
13514 if (base
&& satisfies_constraint_K (disp
))
13519 /* ebp always wants a displacement. */
13520 else if (base
== hard_frame_pointer_rtx
)
13523 /* An index requires the two-byte modrm form.... */
13525 /* ...like esp, which always wants an index. */
13526 || base
== stack_pointer_rtx
13527 || base
== arg_pointer_rtx
13528 || base
== frame_pointer_rtx
)
13535 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13536 is set, expect that insn have 8bit immediate alternative. */
13538 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
13542 extract_insn_cached (insn
);
13543 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13544 if (CONSTANT_P (recog_data
.operand
[i
]))
13547 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
13551 switch (get_attr_mode (insn
))
13562 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13567 fatal_insn ("unknown insn mode", insn
);
13573 /* Compute default value for "length_address" attribute. */
13575 ix86_attr_length_address_default (rtx insn
)
13579 if (get_attr_type (insn
) == TYPE_LEA
)
13581 rtx set
= PATTERN (insn
);
13583 if (GET_CODE (set
) == PARALLEL
)
13584 set
= XVECEXP (set
, 0, 0);
13586 gcc_assert (GET_CODE (set
) == SET
);
13588 return memory_address_length (SET_SRC (set
));
13591 extract_insn_cached (insn
);
13592 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13593 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
13595 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
13601 /* Return the maximum number of instructions a cpu can issue. */
13604 ix86_issue_rate (void)
13608 case PROCESSOR_PENTIUM
:
13612 case PROCESSOR_PENTIUMPRO
:
13613 case PROCESSOR_PENTIUM4
:
13614 case PROCESSOR_ATHLON
:
13616 case PROCESSOR_NOCONA
:
13617 case PROCESSOR_GENERIC32
:
13618 case PROCESSOR_GENERIC64
:
13626 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13627 by DEP_INSN and nothing set by DEP_INSN. */
13630 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
13634 /* Simplify the test for uninteresting insns. */
13635 if (insn_type
!= TYPE_SETCC
13636 && insn_type
!= TYPE_ICMOV
13637 && insn_type
!= TYPE_FCMOV
13638 && insn_type
!= TYPE_IBR
)
13641 if ((set
= single_set (dep_insn
)) != 0)
13643 set
= SET_DEST (set
);
13646 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
13647 && XVECLEN (PATTERN (dep_insn
), 0) == 2
13648 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
13649 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
13651 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
13652 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
13657 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
13660 /* This test is true if the dependent insn reads the flags but
13661 not any other potentially set register. */
13662 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
13665 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
13671 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13672 address with operands set by DEP_INSN. */
13675 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
13679 if (insn_type
== TYPE_LEA
13682 addr
= PATTERN (insn
);
13684 if (GET_CODE (addr
) == PARALLEL
)
13685 addr
= XVECEXP (addr
, 0, 0);
13687 gcc_assert (GET_CODE (addr
) == SET
);
13689 addr
= SET_SRC (addr
);
13694 extract_insn_cached (insn
);
13695 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13696 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
13698 addr
= XEXP (recog_data
.operand
[i
], 0);
13705 return modified_in_p (addr
, dep_insn
);
13709 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
13711 enum attr_type insn_type
, dep_insn_type
;
13712 enum attr_memory memory
;
13714 int dep_insn_code_number
;
13716 /* Anti and output dependencies have zero cost on all CPUs. */
13717 if (REG_NOTE_KIND (link
) != 0)
13720 dep_insn_code_number
= recog_memoized (dep_insn
);
13722 /* If we can't recognize the insns, we can't really do anything. */
13723 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
13726 insn_type
= get_attr_type (insn
);
13727 dep_insn_type
= get_attr_type (dep_insn
);
13731 case PROCESSOR_PENTIUM
:
13732 /* Address Generation Interlock adds a cycle of latency. */
13733 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13736 /* ??? Compares pair with jump/setcc. */
13737 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
13740 /* Floating point stores require value to be ready one cycle earlier. */
13741 if (insn_type
== TYPE_FMOV
13742 && get_attr_memory (insn
) == MEMORY_STORE
13743 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13747 case PROCESSOR_PENTIUMPRO
:
13748 memory
= get_attr_memory (insn
);
13750 /* INT->FP conversion is expensive. */
13751 if (get_attr_fp_int_src (dep_insn
))
13754 /* There is one cycle extra latency between an FP op and a store. */
13755 if (insn_type
== TYPE_FMOV
13756 && (set
= single_set (dep_insn
)) != NULL_RTX
13757 && (set2
= single_set (insn
)) != NULL_RTX
13758 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
13759 && GET_CODE (SET_DEST (set2
)) == MEM
)
13762 /* Show ability of reorder buffer to hide latency of load by executing
13763 in parallel with previous instruction in case
13764 previous instruction is not needed to compute the address. */
13765 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
13766 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13768 /* Claim moves to take one cycle, as core can issue one load
13769 at time and the next load can start cycle later. */
13770 if (dep_insn_type
== TYPE_IMOV
13771 || dep_insn_type
== TYPE_FMOV
)
13779 memory
= get_attr_memory (insn
);
13781 /* The esp dependency is resolved before the instruction is really
13783 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
13784 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
13787 /* INT->FP conversion is expensive. */
13788 if (get_attr_fp_int_src (dep_insn
))
13791 /* Show ability of reorder buffer to hide latency of load by executing
13792 in parallel with previous instruction in case
13793 previous instruction is not needed to compute the address. */
13794 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
13795 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13797 /* Claim moves to take one cycle, as core can issue one load
13798 at time and the next load can start cycle later. */
13799 if (dep_insn_type
== TYPE_IMOV
13800 || dep_insn_type
== TYPE_FMOV
)
13809 case PROCESSOR_ATHLON
:
13811 case PROCESSOR_GENERIC32
:
13812 case PROCESSOR_GENERIC64
:
13813 memory
= get_attr_memory (insn
);
13815 /* Show ability of reorder buffer to hide latency of load by executing
13816 in parallel with previous instruction in case
13817 previous instruction is not needed to compute the address. */
13818 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
13819 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13821 enum attr_unit unit
= get_attr_unit (insn
);
13824 /* Because of the difference between the length of integer and
13825 floating unit pipeline preparation stages, the memory operands
13826 for floating point are cheaper.
13828 ??? For Athlon it the difference is most probably 2. */
13829 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
13832 loadcost
= TARGET_ATHLON
? 2 : 0;
13834 if (cost
>= loadcost
)
13847 /* How many alternative schedules to try. This should be as wide as the
13848 scheduling freedom in the DFA, but no wider. Making this value too
13849 large results extra work for the scheduler. */
13852 ia32_multipass_dfa_lookahead (void)
13854 if (ix86_tune
== PROCESSOR_PENTIUM
)
13857 if (ix86_tune
== PROCESSOR_PENTIUMPRO
13858 || ix86_tune
== PROCESSOR_K6
)
13866 /* Compute the alignment given to a constant that is being placed in memory.
13867 EXP is the constant and ALIGN is the alignment that the object would
13869 The value of this function is used instead of that alignment to align
13873 ix86_constant_alignment (tree exp
, int align
)
13875 if (TREE_CODE (exp
) == REAL_CST
)
13877 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
13879 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
13882 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
13883 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
13884 return BITS_PER_WORD
;
13889 /* Compute the alignment for a static variable.
13890 TYPE is the data type, and ALIGN is the alignment that
13891 the object would ordinarily have. The value of this function is used
13892 instead of that alignment to align the object. */
13895 ix86_data_alignment (tree type
, int align
)
13897 int max_align
= optimize_size
? BITS_PER_WORD
: 256;
13899 if (AGGREGATE_TYPE_P (type
)
13900 && TYPE_SIZE (type
)
13901 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
13902 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
13903 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
13904 && align
< max_align
)
13907 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13908 to 16byte boundary. */
13911 if (AGGREGATE_TYPE_P (type
)
13912 && TYPE_SIZE (type
)
13913 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
13914 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
13915 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
13919 if (TREE_CODE (type
) == ARRAY_TYPE
)
13921 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
13923 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
13926 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
13929 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
13931 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
13934 else if ((TREE_CODE (type
) == RECORD_TYPE
13935 || TREE_CODE (type
) == UNION_TYPE
13936 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
13937 && TYPE_FIELDS (type
))
13939 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
13941 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
13944 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
13945 || TREE_CODE (type
) == INTEGER_TYPE
)
13947 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
13949 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
13956 /* Compute the alignment for a local variable.
13957 TYPE is the data type, and ALIGN is the alignment that
13958 the object would ordinarily have. The value of this macro is used
13959 instead of that alignment to align the object. */
13962 ix86_local_alignment (tree type
, int align
)
13964 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13965 to 16byte boundary. */
13968 if (AGGREGATE_TYPE_P (type
)
13969 && TYPE_SIZE (type
)
13970 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
13971 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
13972 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
13975 if (TREE_CODE (type
) == ARRAY_TYPE
)
13977 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
13979 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
13982 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
13984 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
13986 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
13989 else if ((TREE_CODE (type
) == RECORD_TYPE
13990 || TREE_CODE (type
) == UNION_TYPE
13991 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
13992 && TYPE_FIELDS (type
))
13994 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
13996 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
13999 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
14000 || TREE_CODE (type
) == INTEGER_TYPE
)
14003 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
14005 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
14011 /* Emit RTL insns to initialize the variable parts of a trampoline.
14012 FNADDR is an RTX for the address of the function's pure code.
14013 CXT is an RTX for the static chain value for the function. */
14015 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
14019 /* Compute offset from the end of the jmp to the target function. */
14020 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
14021 plus_constant (tramp
, 10),
14022 NULL_RTX
, 1, OPTAB_DIRECT
);
14023 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
14024 gen_int_mode (0xb9, QImode
));
14025 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
14026 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
14027 gen_int_mode (0xe9, QImode
));
14028 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
14033 /* Try to load address using shorter movl instead of movabs.
14034 We may want to support movq for kernel mode, but kernel does not use
14035 trampolines at the moment. */
14036 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
14038 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
14039 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14040 gen_int_mode (0xbb41, HImode
));
14041 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
14042 gen_lowpart (SImode
, fnaddr
));
14047 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14048 gen_int_mode (0xbb49, HImode
));
14049 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
14053 /* Load static chain using movabs to r10. */
14054 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14055 gen_int_mode (0xba49, HImode
));
14056 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
14059 /* Jump to the r11 */
14060 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14061 gen_int_mode (0xff49, HImode
));
14062 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
14063 gen_int_mode (0xe3, QImode
));
14065 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
14068 #ifdef ENABLE_EXECUTE_STACK
14069 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
14070 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
14074 /* Codes for all the SSE/MMX builtins. */
14077 IX86_BUILTIN_ADDPS
,
14078 IX86_BUILTIN_ADDSS
,
14079 IX86_BUILTIN_DIVPS
,
14080 IX86_BUILTIN_DIVSS
,
14081 IX86_BUILTIN_MULPS
,
14082 IX86_BUILTIN_MULSS
,
14083 IX86_BUILTIN_SUBPS
,
14084 IX86_BUILTIN_SUBSS
,
14086 IX86_BUILTIN_CMPEQPS
,
14087 IX86_BUILTIN_CMPLTPS
,
14088 IX86_BUILTIN_CMPLEPS
,
14089 IX86_BUILTIN_CMPGTPS
,
14090 IX86_BUILTIN_CMPGEPS
,
14091 IX86_BUILTIN_CMPNEQPS
,
14092 IX86_BUILTIN_CMPNLTPS
,
14093 IX86_BUILTIN_CMPNLEPS
,
14094 IX86_BUILTIN_CMPNGTPS
,
14095 IX86_BUILTIN_CMPNGEPS
,
14096 IX86_BUILTIN_CMPORDPS
,
14097 IX86_BUILTIN_CMPUNORDPS
,
14098 IX86_BUILTIN_CMPEQSS
,
14099 IX86_BUILTIN_CMPLTSS
,
14100 IX86_BUILTIN_CMPLESS
,
14101 IX86_BUILTIN_CMPNEQSS
,
14102 IX86_BUILTIN_CMPNLTSS
,
14103 IX86_BUILTIN_CMPNLESS
,
14104 IX86_BUILTIN_CMPNGTSS
,
14105 IX86_BUILTIN_CMPNGESS
,
14106 IX86_BUILTIN_CMPORDSS
,
14107 IX86_BUILTIN_CMPUNORDSS
,
14109 IX86_BUILTIN_COMIEQSS
,
14110 IX86_BUILTIN_COMILTSS
,
14111 IX86_BUILTIN_COMILESS
,
14112 IX86_BUILTIN_COMIGTSS
,
14113 IX86_BUILTIN_COMIGESS
,
14114 IX86_BUILTIN_COMINEQSS
,
14115 IX86_BUILTIN_UCOMIEQSS
,
14116 IX86_BUILTIN_UCOMILTSS
,
14117 IX86_BUILTIN_UCOMILESS
,
14118 IX86_BUILTIN_UCOMIGTSS
,
14119 IX86_BUILTIN_UCOMIGESS
,
14120 IX86_BUILTIN_UCOMINEQSS
,
14122 IX86_BUILTIN_CVTPI2PS
,
14123 IX86_BUILTIN_CVTPS2PI
,
14124 IX86_BUILTIN_CVTSI2SS
,
14125 IX86_BUILTIN_CVTSI642SS
,
14126 IX86_BUILTIN_CVTSS2SI
,
14127 IX86_BUILTIN_CVTSS2SI64
,
14128 IX86_BUILTIN_CVTTPS2PI
,
14129 IX86_BUILTIN_CVTTSS2SI
,
14130 IX86_BUILTIN_CVTTSS2SI64
,
14132 IX86_BUILTIN_MAXPS
,
14133 IX86_BUILTIN_MAXSS
,
14134 IX86_BUILTIN_MINPS
,
14135 IX86_BUILTIN_MINSS
,
14137 IX86_BUILTIN_LOADUPS
,
14138 IX86_BUILTIN_STOREUPS
,
14139 IX86_BUILTIN_MOVSS
,
14141 IX86_BUILTIN_MOVHLPS
,
14142 IX86_BUILTIN_MOVLHPS
,
14143 IX86_BUILTIN_LOADHPS
,
14144 IX86_BUILTIN_LOADLPS
,
14145 IX86_BUILTIN_STOREHPS
,
14146 IX86_BUILTIN_STORELPS
,
14148 IX86_BUILTIN_MASKMOVQ
,
14149 IX86_BUILTIN_MOVMSKPS
,
14150 IX86_BUILTIN_PMOVMSKB
,
14152 IX86_BUILTIN_MOVNTPS
,
14153 IX86_BUILTIN_MOVNTQ
,
14155 IX86_BUILTIN_LOADDQU
,
14156 IX86_BUILTIN_STOREDQU
,
14158 IX86_BUILTIN_PACKSSWB
,
14159 IX86_BUILTIN_PACKSSDW
,
14160 IX86_BUILTIN_PACKUSWB
,
14162 IX86_BUILTIN_PADDB
,
14163 IX86_BUILTIN_PADDW
,
14164 IX86_BUILTIN_PADDD
,
14165 IX86_BUILTIN_PADDQ
,
14166 IX86_BUILTIN_PADDSB
,
14167 IX86_BUILTIN_PADDSW
,
14168 IX86_BUILTIN_PADDUSB
,
14169 IX86_BUILTIN_PADDUSW
,
14170 IX86_BUILTIN_PSUBB
,
14171 IX86_BUILTIN_PSUBW
,
14172 IX86_BUILTIN_PSUBD
,
14173 IX86_BUILTIN_PSUBQ
,
14174 IX86_BUILTIN_PSUBSB
,
14175 IX86_BUILTIN_PSUBSW
,
14176 IX86_BUILTIN_PSUBUSB
,
14177 IX86_BUILTIN_PSUBUSW
,
14180 IX86_BUILTIN_PANDN
,
14184 IX86_BUILTIN_PAVGB
,
14185 IX86_BUILTIN_PAVGW
,
14187 IX86_BUILTIN_PCMPEQB
,
14188 IX86_BUILTIN_PCMPEQW
,
14189 IX86_BUILTIN_PCMPEQD
,
14190 IX86_BUILTIN_PCMPGTB
,
14191 IX86_BUILTIN_PCMPGTW
,
14192 IX86_BUILTIN_PCMPGTD
,
14194 IX86_BUILTIN_PMADDWD
,
14196 IX86_BUILTIN_PMAXSW
,
14197 IX86_BUILTIN_PMAXUB
,
14198 IX86_BUILTIN_PMINSW
,
14199 IX86_BUILTIN_PMINUB
,
14201 IX86_BUILTIN_PMULHUW
,
14202 IX86_BUILTIN_PMULHW
,
14203 IX86_BUILTIN_PMULLW
,
14205 IX86_BUILTIN_PSADBW
,
14206 IX86_BUILTIN_PSHUFW
,
14208 IX86_BUILTIN_PSLLW
,
14209 IX86_BUILTIN_PSLLD
,
14210 IX86_BUILTIN_PSLLQ
,
14211 IX86_BUILTIN_PSRAW
,
14212 IX86_BUILTIN_PSRAD
,
14213 IX86_BUILTIN_PSRLW
,
14214 IX86_BUILTIN_PSRLD
,
14215 IX86_BUILTIN_PSRLQ
,
14216 IX86_BUILTIN_PSLLWI
,
14217 IX86_BUILTIN_PSLLDI
,
14218 IX86_BUILTIN_PSLLQI
,
14219 IX86_BUILTIN_PSRAWI
,
14220 IX86_BUILTIN_PSRADI
,
14221 IX86_BUILTIN_PSRLWI
,
14222 IX86_BUILTIN_PSRLDI
,
14223 IX86_BUILTIN_PSRLQI
,
14225 IX86_BUILTIN_PUNPCKHBW
,
14226 IX86_BUILTIN_PUNPCKHWD
,
14227 IX86_BUILTIN_PUNPCKHDQ
,
14228 IX86_BUILTIN_PUNPCKLBW
,
14229 IX86_BUILTIN_PUNPCKLWD
,
14230 IX86_BUILTIN_PUNPCKLDQ
,
14232 IX86_BUILTIN_SHUFPS
,
14234 IX86_BUILTIN_RCPPS
,
14235 IX86_BUILTIN_RCPSS
,
14236 IX86_BUILTIN_RSQRTPS
,
14237 IX86_BUILTIN_RSQRTSS
,
14238 IX86_BUILTIN_SQRTPS
,
14239 IX86_BUILTIN_SQRTSS
,
14241 IX86_BUILTIN_UNPCKHPS
,
14242 IX86_BUILTIN_UNPCKLPS
,
14244 IX86_BUILTIN_ANDPS
,
14245 IX86_BUILTIN_ANDNPS
,
14247 IX86_BUILTIN_XORPS
,
14250 IX86_BUILTIN_LDMXCSR
,
14251 IX86_BUILTIN_STMXCSR
,
14252 IX86_BUILTIN_SFENCE
,
14254 /* 3DNow! Original */
14255 IX86_BUILTIN_FEMMS
,
14256 IX86_BUILTIN_PAVGUSB
,
14257 IX86_BUILTIN_PF2ID
,
14258 IX86_BUILTIN_PFACC
,
14259 IX86_BUILTIN_PFADD
,
14260 IX86_BUILTIN_PFCMPEQ
,
14261 IX86_BUILTIN_PFCMPGE
,
14262 IX86_BUILTIN_PFCMPGT
,
14263 IX86_BUILTIN_PFMAX
,
14264 IX86_BUILTIN_PFMIN
,
14265 IX86_BUILTIN_PFMUL
,
14266 IX86_BUILTIN_PFRCP
,
14267 IX86_BUILTIN_PFRCPIT1
,
14268 IX86_BUILTIN_PFRCPIT2
,
14269 IX86_BUILTIN_PFRSQIT1
,
14270 IX86_BUILTIN_PFRSQRT
,
14271 IX86_BUILTIN_PFSUB
,
14272 IX86_BUILTIN_PFSUBR
,
14273 IX86_BUILTIN_PI2FD
,
14274 IX86_BUILTIN_PMULHRW
,
14276 /* 3DNow! Athlon Extensions */
14277 IX86_BUILTIN_PF2IW
,
14278 IX86_BUILTIN_PFNACC
,
14279 IX86_BUILTIN_PFPNACC
,
14280 IX86_BUILTIN_PI2FW
,
14281 IX86_BUILTIN_PSWAPDSI
,
14282 IX86_BUILTIN_PSWAPDSF
,
14285 IX86_BUILTIN_ADDPD
,
14286 IX86_BUILTIN_ADDSD
,
14287 IX86_BUILTIN_DIVPD
,
14288 IX86_BUILTIN_DIVSD
,
14289 IX86_BUILTIN_MULPD
,
14290 IX86_BUILTIN_MULSD
,
14291 IX86_BUILTIN_SUBPD
,
14292 IX86_BUILTIN_SUBSD
,
14294 IX86_BUILTIN_CMPEQPD
,
14295 IX86_BUILTIN_CMPLTPD
,
14296 IX86_BUILTIN_CMPLEPD
,
14297 IX86_BUILTIN_CMPGTPD
,
14298 IX86_BUILTIN_CMPGEPD
,
14299 IX86_BUILTIN_CMPNEQPD
,
14300 IX86_BUILTIN_CMPNLTPD
,
14301 IX86_BUILTIN_CMPNLEPD
,
14302 IX86_BUILTIN_CMPNGTPD
,
14303 IX86_BUILTIN_CMPNGEPD
,
14304 IX86_BUILTIN_CMPORDPD
,
14305 IX86_BUILTIN_CMPUNORDPD
,
14306 IX86_BUILTIN_CMPNEPD
,
14307 IX86_BUILTIN_CMPEQSD
,
14308 IX86_BUILTIN_CMPLTSD
,
14309 IX86_BUILTIN_CMPLESD
,
14310 IX86_BUILTIN_CMPNEQSD
,
14311 IX86_BUILTIN_CMPNLTSD
,
14312 IX86_BUILTIN_CMPNLESD
,
14313 IX86_BUILTIN_CMPORDSD
,
14314 IX86_BUILTIN_CMPUNORDSD
,
14315 IX86_BUILTIN_CMPNESD
,
14317 IX86_BUILTIN_COMIEQSD
,
14318 IX86_BUILTIN_COMILTSD
,
14319 IX86_BUILTIN_COMILESD
,
14320 IX86_BUILTIN_COMIGTSD
,
14321 IX86_BUILTIN_COMIGESD
,
14322 IX86_BUILTIN_COMINEQSD
,
14323 IX86_BUILTIN_UCOMIEQSD
,
14324 IX86_BUILTIN_UCOMILTSD
,
14325 IX86_BUILTIN_UCOMILESD
,
14326 IX86_BUILTIN_UCOMIGTSD
,
14327 IX86_BUILTIN_UCOMIGESD
,
14328 IX86_BUILTIN_UCOMINEQSD
,
14330 IX86_BUILTIN_MAXPD
,
14331 IX86_BUILTIN_MAXSD
,
14332 IX86_BUILTIN_MINPD
,
14333 IX86_BUILTIN_MINSD
,
14335 IX86_BUILTIN_ANDPD
,
14336 IX86_BUILTIN_ANDNPD
,
14338 IX86_BUILTIN_XORPD
,
14340 IX86_BUILTIN_SQRTPD
,
14341 IX86_BUILTIN_SQRTSD
,
14343 IX86_BUILTIN_UNPCKHPD
,
14344 IX86_BUILTIN_UNPCKLPD
,
14346 IX86_BUILTIN_SHUFPD
,
14348 IX86_BUILTIN_LOADUPD
,
14349 IX86_BUILTIN_STOREUPD
,
14350 IX86_BUILTIN_MOVSD
,
14352 IX86_BUILTIN_LOADHPD
,
14353 IX86_BUILTIN_LOADLPD
,
14355 IX86_BUILTIN_CVTDQ2PD
,
14356 IX86_BUILTIN_CVTDQ2PS
,
14358 IX86_BUILTIN_CVTPD2DQ
,
14359 IX86_BUILTIN_CVTPD2PI
,
14360 IX86_BUILTIN_CVTPD2PS
,
14361 IX86_BUILTIN_CVTTPD2DQ
,
14362 IX86_BUILTIN_CVTTPD2PI
,
14364 IX86_BUILTIN_CVTPI2PD
,
14365 IX86_BUILTIN_CVTSI2SD
,
14366 IX86_BUILTIN_CVTSI642SD
,
14368 IX86_BUILTIN_CVTSD2SI
,
14369 IX86_BUILTIN_CVTSD2SI64
,
14370 IX86_BUILTIN_CVTSD2SS
,
14371 IX86_BUILTIN_CVTSS2SD
,
14372 IX86_BUILTIN_CVTTSD2SI
,
14373 IX86_BUILTIN_CVTTSD2SI64
,
14375 IX86_BUILTIN_CVTPS2DQ
,
14376 IX86_BUILTIN_CVTPS2PD
,
14377 IX86_BUILTIN_CVTTPS2DQ
,
14379 IX86_BUILTIN_MOVNTI
,
14380 IX86_BUILTIN_MOVNTPD
,
14381 IX86_BUILTIN_MOVNTDQ
,
14384 IX86_BUILTIN_MASKMOVDQU
,
14385 IX86_BUILTIN_MOVMSKPD
,
14386 IX86_BUILTIN_PMOVMSKB128
,
14388 IX86_BUILTIN_PACKSSWB128
,
14389 IX86_BUILTIN_PACKSSDW128
,
14390 IX86_BUILTIN_PACKUSWB128
,
14392 IX86_BUILTIN_PADDB128
,
14393 IX86_BUILTIN_PADDW128
,
14394 IX86_BUILTIN_PADDD128
,
14395 IX86_BUILTIN_PADDQ128
,
14396 IX86_BUILTIN_PADDSB128
,
14397 IX86_BUILTIN_PADDSW128
,
14398 IX86_BUILTIN_PADDUSB128
,
14399 IX86_BUILTIN_PADDUSW128
,
14400 IX86_BUILTIN_PSUBB128
,
14401 IX86_BUILTIN_PSUBW128
,
14402 IX86_BUILTIN_PSUBD128
,
14403 IX86_BUILTIN_PSUBQ128
,
14404 IX86_BUILTIN_PSUBSB128
,
14405 IX86_BUILTIN_PSUBSW128
,
14406 IX86_BUILTIN_PSUBUSB128
,
14407 IX86_BUILTIN_PSUBUSW128
,
14409 IX86_BUILTIN_PAND128
,
14410 IX86_BUILTIN_PANDN128
,
14411 IX86_BUILTIN_POR128
,
14412 IX86_BUILTIN_PXOR128
,
14414 IX86_BUILTIN_PAVGB128
,
14415 IX86_BUILTIN_PAVGW128
,
14417 IX86_BUILTIN_PCMPEQB128
,
14418 IX86_BUILTIN_PCMPEQW128
,
14419 IX86_BUILTIN_PCMPEQD128
,
14420 IX86_BUILTIN_PCMPGTB128
,
14421 IX86_BUILTIN_PCMPGTW128
,
14422 IX86_BUILTIN_PCMPGTD128
,
14424 IX86_BUILTIN_PMADDWD128
,
14426 IX86_BUILTIN_PMAXSW128
,
14427 IX86_BUILTIN_PMAXUB128
,
14428 IX86_BUILTIN_PMINSW128
,
14429 IX86_BUILTIN_PMINUB128
,
14431 IX86_BUILTIN_PMULUDQ
,
14432 IX86_BUILTIN_PMULUDQ128
,
14433 IX86_BUILTIN_PMULHUW128
,
14434 IX86_BUILTIN_PMULHW128
,
14435 IX86_BUILTIN_PMULLW128
,
14437 IX86_BUILTIN_PSADBW128
,
14438 IX86_BUILTIN_PSHUFHW
,
14439 IX86_BUILTIN_PSHUFLW
,
14440 IX86_BUILTIN_PSHUFD
,
14442 IX86_BUILTIN_PSLLW128
,
14443 IX86_BUILTIN_PSLLD128
,
14444 IX86_BUILTIN_PSLLQ128
,
14445 IX86_BUILTIN_PSRAW128
,
14446 IX86_BUILTIN_PSRAD128
,
14447 IX86_BUILTIN_PSRLW128
,
14448 IX86_BUILTIN_PSRLD128
,
14449 IX86_BUILTIN_PSRLQ128
,
14450 IX86_BUILTIN_PSLLDQI128
,
14451 IX86_BUILTIN_PSLLWI128
,
14452 IX86_BUILTIN_PSLLDI128
,
14453 IX86_BUILTIN_PSLLQI128
,
14454 IX86_BUILTIN_PSRAWI128
,
14455 IX86_BUILTIN_PSRADI128
,
14456 IX86_BUILTIN_PSRLDQI128
,
14457 IX86_BUILTIN_PSRLWI128
,
14458 IX86_BUILTIN_PSRLDI128
,
14459 IX86_BUILTIN_PSRLQI128
,
14461 IX86_BUILTIN_PUNPCKHBW128
,
14462 IX86_BUILTIN_PUNPCKHWD128
,
14463 IX86_BUILTIN_PUNPCKHDQ128
,
14464 IX86_BUILTIN_PUNPCKHQDQ128
,
14465 IX86_BUILTIN_PUNPCKLBW128
,
14466 IX86_BUILTIN_PUNPCKLWD128
,
14467 IX86_BUILTIN_PUNPCKLDQ128
,
14468 IX86_BUILTIN_PUNPCKLQDQ128
,
14470 IX86_BUILTIN_CLFLUSH
,
14471 IX86_BUILTIN_MFENCE
,
14472 IX86_BUILTIN_LFENCE
,
14474 /* Prescott New Instructions. */
14475 IX86_BUILTIN_ADDSUBPS
,
14476 IX86_BUILTIN_HADDPS
,
14477 IX86_BUILTIN_HSUBPS
,
14478 IX86_BUILTIN_MOVSHDUP
,
14479 IX86_BUILTIN_MOVSLDUP
,
14480 IX86_BUILTIN_ADDSUBPD
,
14481 IX86_BUILTIN_HADDPD
,
14482 IX86_BUILTIN_HSUBPD
,
14483 IX86_BUILTIN_LDDQU
,
14485 IX86_BUILTIN_MONITOR
,
14486 IX86_BUILTIN_MWAIT
,
14488 IX86_BUILTIN_VEC_INIT_V2SI
,
14489 IX86_BUILTIN_VEC_INIT_V4HI
,
14490 IX86_BUILTIN_VEC_INIT_V8QI
,
14491 IX86_BUILTIN_VEC_EXT_V2DF
,
14492 IX86_BUILTIN_VEC_EXT_V2DI
,
14493 IX86_BUILTIN_VEC_EXT_V4SF
,
14494 IX86_BUILTIN_VEC_EXT_V4SI
,
14495 IX86_BUILTIN_VEC_EXT_V8HI
,
14496 IX86_BUILTIN_VEC_EXT_V2SI
,
14497 IX86_BUILTIN_VEC_EXT_V4HI
,
14498 IX86_BUILTIN_VEC_SET_V8HI
,
14499 IX86_BUILTIN_VEC_SET_V4HI
,
14504 #define def_builtin(MASK, NAME, TYPE, CODE) \
14506 if ((MASK) & target_flags \
14507 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14508 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14509 NULL, NULL_TREE); \
14512 /* Bits for builtin_description.flag. */
14514 /* Set when we don't support the comparison natively, and should
14515 swap_comparison in order to support it. */
14516 #define BUILTIN_DESC_SWAP_OPERANDS 1
14518 struct builtin_description
14520 const unsigned int mask
;
14521 const enum insn_code icode
;
14522 const char *const name
;
14523 const enum ix86_builtins code
;
14524 const enum rtx_code comparison
;
14525 const unsigned int flag
;
14528 static const struct builtin_description bdesc_comi
[] =
14530 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
14531 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
14532 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
14533 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
14534 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
14535 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
14536 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
14537 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
14538 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
14539 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
14540 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
14541 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
14542 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
14543 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
14544 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
14545 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
14546 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
14547 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
14548 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
14549 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
14550 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
14551 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
14552 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
14553 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
14556 static const struct builtin_description bdesc_2arg
[] =
14559 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
14560 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
14561 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
14562 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
14563 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
14564 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
14565 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
14566 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
14568 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
14569 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
14570 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
14571 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
14572 BUILTIN_DESC_SWAP_OPERANDS
},
14573 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
14574 BUILTIN_DESC_SWAP_OPERANDS
},
14575 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
14576 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
14577 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
14578 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
14579 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
14580 BUILTIN_DESC_SWAP_OPERANDS
},
14581 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
14582 BUILTIN_DESC_SWAP_OPERANDS
},
14583 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
14584 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
14585 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
14586 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
14587 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
14588 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
14589 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
14590 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
14591 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
14592 BUILTIN_DESC_SWAP_OPERANDS
},
14593 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
14594 BUILTIN_DESC_SWAP_OPERANDS
},
14595 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
14597 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
14598 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
14599 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
14600 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
14602 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
14603 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
14604 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
14605 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
14607 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
14608 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
14609 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
14610 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
14611 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
14614 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
14615 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
14616 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
14617 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
14618 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
14619 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
14620 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
14621 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
14623 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
14624 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
14625 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
14626 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
14627 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
14628 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
14629 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
14630 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
14632 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
14633 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
14634 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
14636 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
14637 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
14638 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
14639 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
14641 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
14642 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
14644 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
14645 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
14646 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
14647 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
14648 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
14649 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
14651 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
14652 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
14653 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
14654 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
14656 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
14657 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
14658 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
14659 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
14660 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
14661 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
14664 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
14665 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
14666 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
14668 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
14669 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
14670 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
14672 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
14673 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
14674 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
14675 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
14676 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
14677 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
14679 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
14680 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
14681 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
14682 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
14683 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
14684 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
14686 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
14687 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
14688 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
14689 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
14691 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
14692 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
14695 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
14696 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
14697 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
14698 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
14699 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
14700 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
14701 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
14702 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
14704 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
14705 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
14706 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
14707 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
14708 BUILTIN_DESC_SWAP_OPERANDS
},
14709 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
14710 BUILTIN_DESC_SWAP_OPERANDS
},
14711 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
14712 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
14713 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
14714 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
14715 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
14716 BUILTIN_DESC_SWAP_OPERANDS
},
14717 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
14718 BUILTIN_DESC_SWAP_OPERANDS
},
14719 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
14720 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
14721 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
14722 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
14723 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
14724 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
14725 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
14726 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
14727 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
14729 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
14730 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
14731 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
14732 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
14734 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
14735 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
14736 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
14737 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
14739 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
14740 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
14741 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
14744 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
14745 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
14746 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
14747 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
14748 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
14749 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
14750 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
14751 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
14753 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
14754 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
14755 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
14756 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
14757 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
14758 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
14759 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
14760 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
14762 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
14763 { MASK_SSE2
, CODE_FOR_sse2_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
14765 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
14766 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
14767 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
14768 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
14770 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
14771 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
14773 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
14774 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
14775 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
14776 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
14777 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
14778 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
14780 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
14781 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
14782 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
14783 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
14785 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
14786 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
14787 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
14788 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
14789 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
14790 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
14791 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
14792 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
14794 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
14795 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
14796 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
14798 { MASK_SSE2
, CODE_FOR_sse2_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
14799 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
14801 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
14802 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
14804 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
14805 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
14806 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
14808 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
14809 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
14810 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
14812 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
14813 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
14815 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
14817 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
14818 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
14819 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
14820 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
14823 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
14824 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
14825 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
14826 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
14827 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
14828 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 }
14831 static const struct builtin_description bdesc_1arg
[] =
14833 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
14834 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
14836 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
14837 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
14838 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
14840 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
14841 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
14842 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
14843 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
14844 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
14845 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
14847 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
14848 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
14850 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
14852 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
14853 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
14855 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
14856 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
14857 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
14858 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
14859 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
14861 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
14863 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
14864 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
14865 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
14866 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
14868 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
14869 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
14870 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
14873 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
14874 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
14878 ix86_init_builtins (void)
14881 ix86_init_mmx_sse_builtins ();
14884 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14885 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14888 ix86_init_mmx_sse_builtins (void)
14890 const struct builtin_description
* d
;
14893 tree V16QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V16QImode
);
14894 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
14895 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
14896 tree V2DI_type_node
14897 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
14898 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
14899 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
14900 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
14901 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
14902 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
14903 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
14905 tree pchar_type_node
= build_pointer_type (char_type_node
);
14906 tree pcchar_type_node
= build_pointer_type (
14907 build_type_variant (char_type_node
, 1, 0));
14908 tree pfloat_type_node
= build_pointer_type (float_type_node
);
14909 tree pcfloat_type_node
= build_pointer_type (
14910 build_type_variant (float_type_node
, 1, 0));
14911 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
14912 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
14913 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
14916 tree int_ftype_v4sf_v4sf
14917 = build_function_type_list (integer_type_node
,
14918 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
14919 tree v4si_ftype_v4sf_v4sf
14920 = build_function_type_list (V4SI_type_node
,
14921 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
14922 /* MMX/SSE/integer conversions. */
14923 tree int_ftype_v4sf
14924 = build_function_type_list (integer_type_node
,
14925 V4SF_type_node
, NULL_TREE
);
14926 tree int64_ftype_v4sf
14927 = build_function_type_list (long_long_integer_type_node
,
14928 V4SF_type_node
, NULL_TREE
);
14929 tree int_ftype_v8qi
14930 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
14931 tree v4sf_ftype_v4sf_int
14932 = build_function_type_list (V4SF_type_node
,
14933 V4SF_type_node
, integer_type_node
, NULL_TREE
);
14934 tree v4sf_ftype_v4sf_int64
14935 = build_function_type_list (V4SF_type_node
,
14936 V4SF_type_node
, long_long_integer_type_node
,
14938 tree v4sf_ftype_v4sf_v2si
14939 = build_function_type_list (V4SF_type_node
,
14940 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
14942 /* Miscellaneous. */
14943 tree v8qi_ftype_v4hi_v4hi
14944 = build_function_type_list (V8QI_type_node
,
14945 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
14946 tree v4hi_ftype_v2si_v2si
14947 = build_function_type_list (V4HI_type_node
,
14948 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
14949 tree v4sf_ftype_v4sf_v4sf_int
14950 = build_function_type_list (V4SF_type_node
,
14951 V4SF_type_node
, V4SF_type_node
,
14952 integer_type_node
, NULL_TREE
);
14953 tree v2si_ftype_v4hi_v4hi
14954 = build_function_type_list (V2SI_type_node
,
14955 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
14956 tree v4hi_ftype_v4hi_int
14957 = build_function_type_list (V4HI_type_node
,
14958 V4HI_type_node
, integer_type_node
, NULL_TREE
);
14959 tree v4hi_ftype_v4hi_di
14960 = build_function_type_list (V4HI_type_node
,
14961 V4HI_type_node
, long_long_unsigned_type_node
,
14963 tree v2si_ftype_v2si_di
14964 = build_function_type_list (V2SI_type_node
,
14965 V2SI_type_node
, long_long_unsigned_type_node
,
14967 tree void_ftype_void
14968 = build_function_type (void_type_node
, void_list_node
);
14969 tree void_ftype_unsigned
14970 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
14971 tree void_ftype_unsigned_unsigned
14972 = build_function_type_list (void_type_node
, unsigned_type_node
,
14973 unsigned_type_node
, NULL_TREE
);
14974 tree void_ftype_pcvoid_unsigned_unsigned
14975 = build_function_type_list (void_type_node
, const_ptr_type_node
,
14976 unsigned_type_node
, unsigned_type_node
,
14978 tree unsigned_ftype_void
14979 = build_function_type (unsigned_type_node
, void_list_node
);
14980 tree v2si_ftype_v4sf
14981 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
14982 /* Loads/stores. */
14983 tree void_ftype_v8qi_v8qi_pchar
14984 = build_function_type_list (void_type_node
,
14985 V8QI_type_node
, V8QI_type_node
,
14986 pchar_type_node
, NULL_TREE
);
14987 tree v4sf_ftype_pcfloat
14988 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
14989 /* @@@ the type is bogus */
14990 tree v4sf_ftype_v4sf_pv2si
14991 = build_function_type_list (V4SF_type_node
,
14992 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
14993 tree void_ftype_pv2si_v4sf
14994 = build_function_type_list (void_type_node
,
14995 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
14996 tree void_ftype_pfloat_v4sf
14997 = build_function_type_list (void_type_node
,
14998 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
14999 tree void_ftype_pdi_di
15000 = build_function_type_list (void_type_node
,
15001 pdi_type_node
, long_long_unsigned_type_node
,
15003 tree void_ftype_pv2di_v2di
15004 = build_function_type_list (void_type_node
,
15005 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
15006 /* Normal vector unops. */
15007 tree v4sf_ftype_v4sf
15008 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15010 /* Normal vector binops. */
15011 tree v4sf_ftype_v4sf_v4sf
15012 = build_function_type_list (V4SF_type_node
,
15013 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15014 tree v8qi_ftype_v8qi_v8qi
15015 = build_function_type_list (V8QI_type_node
,
15016 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
15017 tree v4hi_ftype_v4hi_v4hi
15018 = build_function_type_list (V4HI_type_node
,
15019 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15020 tree v2si_ftype_v2si_v2si
15021 = build_function_type_list (V2SI_type_node
,
15022 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15023 tree di_ftype_di_di
15024 = build_function_type_list (long_long_unsigned_type_node
,
15025 long_long_unsigned_type_node
,
15026 long_long_unsigned_type_node
, NULL_TREE
);
15028 tree v2si_ftype_v2sf
15029 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
15030 tree v2sf_ftype_v2si
15031 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
15032 tree v2si_ftype_v2si
15033 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15034 tree v2sf_ftype_v2sf
15035 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
15036 tree v2sf_ftype_v2sf_v2sf
15037 = build_function_type_list (V2SF_type_node
,
15038 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
15039 tree v2si_ftype_v2sf_v2sf
15040 = build_function_type_list (V2SI_type_node
,
15041 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
15042 tree pint_type_node
= build_pointer_type (integer_type_node
);
15043 tree pdouble_type_node
= build_pointer_type (double_type_node
);
15044 tree pcdouble_type_node
= build_pointer_type (
15045 build_type_variant (double_type_node
, 1, 0));
15046 tree int_ftype_v2df_v2df
15047 = build_function_type_list (integer_type_node
,
15048 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15050 tree void_ftype_pcvoid
15051 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
15052 tree v4sf_ftype_v4si
15053 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
15054 tree v4si_ftype_v4sf
15055 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
15056 tree v2df_ftype_v4si
15057 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
15058 tree v4si_ftype_v2df
15059 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
15060 tree v2si_ftype_v2df
15061 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
15062 tree v4sf_ftype_v2df
15063 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
15064 tree v2df_ftype_v2si
15065 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
15066 tree v2df_ftype_v4sf
15067 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
15068 tree int_ftype_v2df
15069 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
15070 tree int64_ftype_v2df
15071 = build_function_type_list (long_long_integer_type_node
,
15072 V2DF_type_node
, NULL_TREE
);
15073 tree v2df_ftype_v2df_int
15074 = build_function_type_list (V2DF_type_node
,
15075 V2DF_type_node
, integer_type_node
, NULL_TREE
);
15076 tree v2df_ftype_v2df_int64
15077 = build_function_type_list (V2DF_type_node
,
15078 V2DF_type_node
, long_long_integer_type_node
,
15080 tree v4sf_ftype_v4sf_v2df
15081 = build_function_type_list (V4SF_type_node
,
15082 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
15083 tree v2df_ftype_v2df_v4sf
15084 = build_function_type_list (V2DF_type_node
,
15085 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
15086 tree v2df_ftype_v2df_v2df_int
15087 = build_function_type_list (V2DF_type_node
,
15088 V2DF_type_node
, V2DF_type_node
,
15091 tree v2df_ftype_v2df_pcdouble
15092 = build_function_type_list (V2DF_type_node
,
15093 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
15094 tree void_ftype_pdouble_v2df
15095 = build_function_type_list (void_type_node
,
15096 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
15097 tree void_ftype_pint_int
15098 = build_function_type_list (void_type_node
,
15099 pint_type_node
, integer_type_node
, NULL_TREE
);
15100 tree void_ftype_v16qi_v16qi_pchar
15101 = build_function_type_list (void_type_node
,
15102 V16QI_type_node
, V16QI_type_node
,
15103 pchar_type_node
, NULL_TREE
);
15104 tree v2df_ftype_pcdouble
15105 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
15106 tree v2df_ftype_v2df_v2df
15107 = build_function_type_list (V2DF_type_node
,
15108 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15109 tree v16qi_ftype_v16qi_v16qi
15110 = build_function_type_list (V16QI_type_node
,
15111 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
15112 tree v8hi_ftype_v8hi_v8hi
15113 = build_function_type_list (V8HI_type_node
,
15114 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
15115 tree v4si_ftype_v4si_v4si
15116 = build_function_type_list (V4SI_type_node
,
15117 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
15118 tree v2di_ftype_v2di_v2di
15119 = build_function_type_list (V2DI_type_node
,
15120 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
15121 tree v2di_ftype_v2df_v2df
15122 = build_function_type_list (V2DI_type_node
,
15123 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15124 tree v2df_ftype_v2df
15125 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15126 tree v2di_ftype_v2di_int
15127 = build_function_type_list (V2DI_type_node
,
15128 V2DI_type_node
, integer_type_node
, NULL_TREE
);
15129 tree v4si_ftype_v4si_int
15130 = build_function_type_list (V4SI_type_node
,
15131 V4SI_type_node
, integer_type_node
, NULL_TREE
);
15132 tree v8hi_ftype_v8hi_int
15133 = build_function_type_list (V8HI_type_node
,
15134 V8HI_type_node
, integer_type_node
, NULL_TREE
);
15135 tree v8hi_ftype_v8hi_v2di
15136 = build_function_type_list (V8HI_type_node
,
15137 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
15138 tree v4si_ftype_v4si_v2di
15139 = build_function_type_list (V4SI_type_node
,
15140 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
15141 tree v4si_ftype_v8hi_v8hi
15142 = build_function_type_list (V4SI_type_node
,
15143 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
15144 tree di_ftype_v8qi_v8qi
15145 = build_function_type_list (long_long_unsigned_type_node
,
15146 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
15147 tree di_ftype_v2si_v2si
15148 = build_function_type_list (long_long_unsigned_type_node
,
15149 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15150 tree v2di_ftype_v16qi_v16qi
15151 = build_function_type_list (V2DI_type_node
,
15152 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
15153 tree v2di_ftype_v4si_v4si
15154 = build_function_type_list (V2DI_type_node
,
15155 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
15156 tree int_ftype_v16qi
15157 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
15158 tree v16qi_ftype_pcchar
15159 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
15160 tree void_ftype_pchar_v16qi
15161 = build_function_type_list (void_type_node
,
15162 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
15165 tree float128_type
;
15168 /* The __float80 type. */
15169 if (TYPE_MODE (long_double_type_node
) == XFmode
)
15170 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
15174 /* The __float80 type. */
15175 float80_type
= make_node (REAL_TYPE
);
15176 TYPE_PRECISION (float80_type
) = 80;
15177 layout_type (float80_type
);
15178 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
15183 float128_type
= make_node (REAL_TYPE
);
15184 TYPE_PRECISION (float128_type
) = 128;
15185 layout_type (float128_type
);
15186 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
15189 /* Add all builtins that are more or less simple operations on two
15191 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
15193 /* Use one of the operands; the target can have a different mode for
15194 mask-generating compares. */
15195 enum machine_mode mode
;
15200 mode
= insn_data
[d
->icode
].operand
[1].mode
;
15205 type
= v16qi_ftype_v16qi_v16qi
;
15208 type
= v8hi_ftype_v8hi_v8hi
;
15211 type
= v4si_ftype_v4si_v4si
;
15214 type
= v2di_ftype_v2di_v2di
;
15217 type
= v2df_ftype_v2df_v2df
;
15220 type
= v4sf_ftype_v4sf_v4sf
;
15223 type
= v8qi_ftype_v8qi_v8qi
;
15226 type
= v4hi_ftype_v4hi_v4hi
;
15229 type
= v2si_ftype_v2si_v2si
;
15232 type
= di_ftype_di_di
;
15236 gcc_unreachable ();
15239 /* Override for comparisons. */
15240 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
15241 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
15242 type
= v4si_ftype_v4sf_v4sf
;
15244 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
15245 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
15246 type
= v2di_ftype_v2df_v2df
;
15248 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
15251 /* Add the remaining MMX insns with somewhat more complicated types. */
15252 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
15253 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
15254 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
15255 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
15257 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
15258 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
15259 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
15261 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
15262 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
15264 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
15265 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
15267 /* comi/ucomi insns. */
15268 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
15269 if (d
->mask
== MASK_SSE2
)
15270 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
15272 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
15274 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
15275 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
15276 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
15278 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
15279 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
15280 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
15281 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
15282 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
15283 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
15284 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
15285 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
15286 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
15287 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
15288 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
15290 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
15292 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
15293 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
15295 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
15296 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
15297 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
15298 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
15300 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
15301 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
15302 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
15303 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
15305 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
15307 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
15309 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
15310 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
15311 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
15312 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
15313 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
15314 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
15316 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
15318 /* Original 3DNow! */
15319 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
15320 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
15321 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
15322 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
15323 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
15324 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
15325 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
15326 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
15327 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
15328 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
15329 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
15330 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
15331 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
15332 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
15333 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
15334 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
15335 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
15336 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
15337 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
15338 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
15340 /* 3DNow! extension as used in the Athlon CPU. */
15341 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
15342 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
15343 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
15344 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
15345 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
15346 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
15349 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
15351 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
15352 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
15354 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
15355 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
15357 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
15358 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
15359 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
15360 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
15361 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
15363 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
15364 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
15365 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
15366 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
15368 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
15369 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
15371 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
15373 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
15374 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
15376 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
15377 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
15378 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
15379 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
15380 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
15382 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
15384 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
15385 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
15386 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
15387 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
15389 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
15390 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
15391 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
15393 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
15394 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
15395 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
15396 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
15398 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
15399 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
15400 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
15402 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
15403 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
15405 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
15406 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
15408 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
15409 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
15410 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
15412 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
15413 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
15414 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
15416 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
15417 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
15419 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
15420 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
15421 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
15422 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
15424 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
15425 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
15426 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
15427 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
15429 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
15430 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
15432 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
15434 /* Prescott New Instructions. */
15435 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
15436 void_ftype_pcvoid_unsigned_unsigned
,
15437 IX86_BUILTIN_MONITOR
);
15438 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
15439 void_ftype_unsigned_unsigned
,
15440 IX86_BUILTIN_MWAIT
);
15441 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
15443 IX86_BUILTIN_MOVSHDUP
);
15444 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
15446 IX86_BUILTIN_MOVSLDUP
);
15447 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
15448 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
15450 /* Access to the vec_init patterns. */
15451 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
15452 integer_type_node
, NULL_TREE
);
15453 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
15454 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
15456 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
15457 short_integer_type_node
,
15458 short_integer_type_node
,
15459 short_integer_type_node
, NULL_TREE
);
15460 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
15461 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
15463 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
15464 char_type_node
, char_type_node
,
15465 char_type_node
, char_type_node
,
15466 char_type_node
, char_type_node
,
15467 char_type_node
, NULL_TREE
);
15468 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
15469 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
15471 /* Access to the vec_extract patterns. */
15472 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
15473 integer_type_node
, NULL_TREE
);
15474 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
15475 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
15477 ftype
= build_function_type_list (long_long_integer_type_node
,
15478 V2DI_type_node
, integer_type_node
,
15480 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
15481 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
15483 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
15484 integer_type_node
, NULL_TREE
);
15485 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
15486 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
15488 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
15489 integer_type_node
, NULL_TREE
);
15490 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
15491 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
15493 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
15494 integer_type_node
, NULL_TREE
);
15495 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
15496 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
15498 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
15499 integer_type_node
, NULL_TREE
);
15500 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
15501 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
15503 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
15504 integer_type_node
, NULL_TREE
);
15505 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
15506 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
15508 /* Access to the vec_set patterns. */
15509 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
15511 integer_type_node
, NULL_TREE
);
15512 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
15513 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
15515 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
15517 integer_type_node
, NULL_TREE
);
15518 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
15519 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
15522 /* Errors in the source file can cause expand_expr to return const0_rtx
15523 where we expect a vector. To avoid crashing, use one of the vector
15524 clear instructions. */
15526 safe_vector_operand (rtx x
, enum machine_mode mode
)
15528 if (x
== const0_rtx
)
15529 x
= CONST0_RTX (mode
);
15533 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15536 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
15539 tree arg0
= TREE_VALUE (arglist
);
15540 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15541 rtx op0
= expand_normal (arg0
);
15542 rtx op1
= expand_normal (arg1
);
15543 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15544 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15545 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
15547 if (VECTOR_MODE_P (mode0
))
15548 op0
= safe_vector_operand (op0
, mode0
);
15549 if (VECTOR_MODE_P (mode1
))
15550 op1
= safe_vector_operand (op1
, mode1
);
15552 if (optimize
|| !target
15553 || GET_MODE (target
) != tmode
15554 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15555 target
= gen_reg_rtx (tmode
);
15557 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
15559 rtx x
= gen_reg_rtx (V4SImode
);
15560 emit_insn (gen_sse2_loadd (x
, op1
));
15561 op1
= gen_lowpart (TImode
, x
);
15564 /* The insn must want input operands in the same modes as the
15566 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
15567 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
15569 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15570 op0
= copy_to_mode_reg (mode0
, op0
);
15571 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
15572 op1
= copy_to_mode_reg (mode1
, op1
);
15574 /* ??? Using ix86_fixup_binary_operands is problematic when
15575 we've got mismatched modes. Fake it. */
15581 if (tmode
== mode0
&& tmode
== mode1
)
15583 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
15587 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
15589 op0
= force_reg (mode0
, op0
);
15590 op1
= force_reg (mode1
, op1
);
15591 target
= gen_reg_rtx (tmode
);
15594 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
15601 /* Subroutine of ix86_expand_builtin to take care of stores. */
15604 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
15607 tree arg0
= TREE_VALUE (arglist
);
15608 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15609 rtx op0
= expand_normal (arg0
);
15610 rtx op1
= expand_normal (arg1
);
15611 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
15612 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
15614 if (VECTOR_MODE_P (mode1
))
15615 op1
= safe_vector_operand (op1
, mode1
);
15617 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15618 op1
= copy_to_mode_reg (mode1
, op1
);
15620 pat
= GEN_FCN (icode
) (op0
, op1
);
15626 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15629 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
15630 rtx target
, int do_load
)
15633 tree arg0
= TREE_VALUE (arglist
);
15634 rtx op0
= expand_normal (arg0
);
15635 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15636 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15638 if (optimize
|| !target
15639 || GET_MODE (target
) != tmode
15640 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15641 target
= gen_reg_rtx (tmode
);
15643 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15646 if (VECTOR_MODE_P (mode0
))
15647 op0
= safe_vector_operand (op0
, mode0
);
15649 if ((optimize
&& !register_operand (op0
, mode0
))
15650 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15651 op0
= copy_to_mode_reg (mode0
, op0
);
15654 pat
= GEN_FCN (icode
) (target
, op0
);
15661 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15662 sqrtss, rsqrtss, rcpss. */
15665 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
15668 tree arg0
= TREE_VALUE (arglist
);
15669 rtx op1
, op0
= expand_normal (arg0
);
15670 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15671 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15673 if (optimize
|| !target
15674 || GET_MODE (target
) != tmode
15675 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15676 target
= gen_reg_rtx (tmode
);
15678 if (VECTOR_MODE_P (mode0
))
15679 op0
= safe_vector_operand (op0
, mode0
);
15681 if ((optimize
&& !register_operand (op0
, mode0
))
15682 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15683 op0
= copy_to_mode_reg (mode0
, op0
);
15686 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
15687 op1
= copy_to_mode_reg (mode0
, op1
);
15689 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
15696 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15699 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
15703 tree arg0
= TREE_VALUE (arglist
);
15704 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15705 rtx op0
= expand_normal (arg0
);
15706 rtx op1
= expand_normal (arg1
);
15708 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
15709 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
15710 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
15711 enum rtx_code comparison
= d
->comparison
;
15713 if (VECTOR_MODE_P (mode0
))
15714 op0
= safe_vector_operand (op0
, mode0
);
15715 if (VECTOR_MODE_P (mode1
))
15716 op1
= safe_vector_operand (op1
, mode1
);
15718 /* Swap operands if we have a comparison that isn't available in
15720 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
15722 rtx tmp
= gen_reg_rtx (mode1
);
15723 emit_move_insn (tmp
, op1
);
15728 if (optimize
|| !target
15729 || GET_MODE (target
) != tmode
15730 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
15731 target
= gen_reg_rtx (tmode
);
15733 if ((optimize
&& !register_operand (op0
, mode0
))
15734 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
15735 op0
= copy_to_mode_reg (mode0
, op0
);
15736 if ((optimize
&& !register_operand (op1
, mode1
))
15737 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
15738 op1
= copy_to_mode_reg (mode1
, op1
);
15740 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
15741 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
15748 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15751 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
15755 tree arg0
= TREE_VALUE (arglist
);
15756 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15757 rtx op0
= expand_normal (arg0
);
15758 rtx op1
= expand_normal (arg1
);
15760 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
15761 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
15762 enum rtx_code comparison
= d
->comparison
;
15764 if (VECTOR_MODE_P (mode0
))
15765 op0
= safe_vector_operand (op0
, mode0
);
15766 if (VECTOR_MODE_P (mode1
))
15767 op1
= safe_vector_operand (op1
, mode1
);
15769 /* Swap operands if we have a comparison that isn't available in
15771 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
15778 target
= gen_reg_rtx (SImode
);
15779 emit_move_insn (target
, const0_rtx
);
15780 target
= gen_rtx_SUBREG (QImode
, target
, 0);
15782 if ((optimize
&& !register_operand (op0
, mode0
))
15783 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
15784 op0
= copy_to_mode_reg (mode0
, op0
);
15785 if ((optimize
&& !register_operand (op1
, mode1
))
15786 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
15787 op1
= copy_to_mode_reg (mode1
, op1
);
15789 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
15790 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
15794 emit_insn (gen_rtx_SET (VOIDmode
,
15795 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
15796 gen_rtx_fmt_ee (comparison
, QImode
,
15800 return SUBREG_REG (target
);
15803 /* Return the integer constant in ARG. Constrain it to be in the range
15804 of the subparts of VEC_TYPE; issue an error if not. */
15807 get_element_number (tree vec_type
, tree arg
)
15809 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
15811 if (!host_integerp (arg
, 1)
15812 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
15814 error ("selector must be an integer constant in the range 0..%wi", max
);
15821 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15822 ix86_expand_vector_init. We DO have language-level syntax for this, in
15823 the form of (type){ init-list }. Except that since we can't place emms
15824 instructions from inside the compiler, we can't allow the use of MMX
15825 registers unless the user explicitly asks for it. So we do *not* define
15826 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15827 we have builtins invoked by mmintrin.h that gives us license to emit
15828 these sorts of instructions. */
15831 ix86_expand_vec_init_builtin (tree type
, tree arglist
, rtx target
)
15833 enum machine_mode tmode
= TYPE_MODE (type
);
15834 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
15835 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
15836 rtvec v
= rtvec_alloc (n_elt
);
15838 gcc_assert (VECTOR_MODE_P (tmode
));
15840 for (i
= 0; i
< n_elt
; ++i
, arglist
= TREE_CHAIN (arglist
))
15842 rtx x
= expand_normal (TREE_VALUE (arglist
));
15843 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
15846 gcc_assert (arglist
== NULL
);
15848 if (!target
|| !register_operand (target
, tmode
))
15849 target
= gen_reg_rtx (tmode
);
15851 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
15855 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15856 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15857 had a language-level syntax for referencing vector elements. */
15860 ix86_expand_vec_ext_builtin (tree arglist
, rtx target
)
15862 enum machine_mode tmode
, mode0
;
15867 arg0
= TREE_VALUE (arglist
);
15868 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15870 op0
= expand_normal (arg0
);
15871 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
15873 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
15874 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
15875 gcc_assert (VECTOR_MODE_P (mode0
));
15877 op0
= force_reg (mode0
, op0
);
15879 if (optimize
|| !target
|| !register_operand (target
, tmode
))
15880 target
= gen_reg_rtx (tmode
);
15882 ix86_expand_vector_extract (true, target
, op0
, elt
);
15887 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15888 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15889 a language-level syntax for referencing vector elements. */
15892 ix86_expand_vec_set_builtin (tree arglist
)
15894 enum machine_mode tmode
, mode1
;
15895 tree arg0
, arg1
, arg2
;
15899 arg0
= TREE_VALUE (arglist
);
15900 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15901 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
15903 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
15904 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
15905 gcc_assert (VECTOR_MODE_P (tmode
));
15907 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
15908 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
15909 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
15911 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
15912 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
15914 op0
= force_reg (tmode
, op0
);
15915 op1
= force_reg (mode1
, op1
);
15917 ix86_expand_vector_set (true, op0
, op1
, elt
);
15922 /* Expand an expression EXP that calls a built-in function,
15923 with result going to TARGET if that's convenient
15924 (and in mode MODE if that's convenient).
15925 SUBTARGET may be used as the target for computing one of EXP's operands.
15926 IGNORE is nonzero if the value is to be ignored. */
15929 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
15930 enum machine_mode mode ATTRIBUTE_UNUSED
,
15931 int ignore ATTRIBUTE_UNUSED
)
15933 const struct builtin_description
*d
;
15935 enum insn_code icode
;
15936 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
15937 tree arglist
= TREE_OPERAND (exp
, 1);
15938 tree arg0
, arg1
, arg2
;
15939 rtx op0
, op1
, op2
, pat
;
15940 enum machine_mode tmode
, mode0
, mode1
, mode2
;
15941 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
15945 case IX86_BUILTIN_EMMS
:
15946 emit_insn (gen_mmx_emms ());
15949 case IX86_BUILTIN_SFENCE
:
15950 emit_insn (gen_sse_sfence ());
15953 case IX86_BUILTIN_MASKMOVQ
:
15954 case IX86_BUILTIN_MASKMOVDQU
:
15955 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
15956 ? CODE_FOR_mmx_maskmovq
15957 : CODE_FOR_sse2_maskmovdqu
);
15958 /* Note the arg order is different from the operand order. */
15959 arg1
= TREE_VALUE (arglist
);
15960 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
15961 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
15962 op0
= expand_normal (arg0
);
15963 op1
= expand_normal (arg1
);
15964 op2
= expand_normal (arg2
);
15965 mode0
= insn_data
[icode
].operand
[0].mode
;
15966 mode1
= insn_data
[icode
].operand
[1].mode
;
15967 mode2
= insn_data
[icode
].operand
[2].mode
;
15969 op0
= force_reg (Pmode
, op0
);
15970 op0
= gen_rtx_MEM (mode1
, op0
);
15972 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
15973 op0
= copy_to_mode_reg (mode0
, op0
);
15974 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
15975 op1
= copy_to_mode_reg (mode1
, op1
);
15976 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
15977 op2
= copy_to_mode_reg (mode2
, op2
);
15978 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
15984 case IX86_BUILTIN_SQRTSS
:
15985 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, arglist
, target
);
15986 case IX86_BUILTIN_RSQRTSS
:
15987 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, arglist
, target
);
15988 case IX86_BUILTIN_RCPSS
:
15989 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, arglist
, target
);
15991 case IX86_BUILTIN_LOADUPS
:
15992 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
15994 case IX86_BUILTIN_STOREUPS
:
15995 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
15997 case IX86_BUILTIN_LOADHPS
:
15998 case IX86_BUILTIN_LOADLPS
:
15999 case IX86_BUILTIN_LOADHPD
:
16000 case IX86_BUILTIN_LOADLPD
:
16001 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
16002 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
16003 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
16004 : CODE_FOR_sse2_loadlpd
);
16005 arg0
= TREE_VALUE (arglist
);
16006 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16007 op0
= expand_normal (arg0
);
16008 op1
= expand_normal (arg1
);
16009 tmode
= insn_data
[icode
].operand
[0].mode
;
16010 mode0
= insn_data
[icode
].operand
[1].mode
;
16011 mode1
= insn_data
[icode
].operand
[2].mode
;
16013 op0
= force_reg (mode0
, op0
);
16014 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
16015 if (optimize
|| target
== 0
16016 || GET_MODE (target
) != tmode
16017 || !register_operand (target
, tmode
))
16018 target
= gen_reg_rtx (tmode
);
16019 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
16025 case IX86_BUILTIN_STOREHPS
:
16026 case IX86_BUILTIN_STORELPS
:
16027 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
16028 : CODE_FOR_sse_storelps
);
16029 arg0
= TREE_VALUE (arglist
);
16030 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16031 op0
= expand_normal (arg0
);
16032 op1
= expand_normal (arg1
);
16033 mode0
= insn_data
[icode
].operand
[0].mode
;
16034 mode1
= insn_data
[icode
].operand
[1].mode
;
16036 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
16037 op1
= force_reg (mode1
, op1
);
16039 pat
= GEN_FCN (icode
) (op0
, op1
);
16045 case IX86_BUILTIN_MOVNTPS
:
16046 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
16047 case IX86_BUILTIN_MOVNTQ
:
16048 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
16050 case IX86_BUILTIN_LDMXCSR
:
16051 op0
= expand_normal (TREE_VALUE (arglist
));
16052 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
16053 emit_move_insn (target
, op0
);
16054 emit_insn (gen_sse_ldmxcsr (target
));
16057 case IX86_BUILTIN_STMXCSR
:
16058 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
16059 emit_insn (gen_sse_stmxcsr (target
));
16060 return copy_to_mode_reg (SImode
, target
);
16062 case IX86_BUILTIN_SHUFPS
:
16063 case IX86_BUILTIN_SHUFPD
:
16064 icode
= (fcode
== IX86_BUILTIN_SHUFPS
16065 ? CODE_FOR_sse_shufps
16066 : CODE_FOR_sse2_shufpd
);
16067 arg0
= TREE_VALUE (arglist
);
16068 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16069 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16070 op0
= expand_normal (arg0
);
16071 op1
= expand_normal (arg1
);
16072 op2
= expand_normal (arg2
);
16073 tmode
= insn_data
[icode
].operand
[0].mode
;
16074 mode0
= insn_data
[icode
].operand
[1].mode
;
16075 mode1
= insn_data
[icode
].operand
[2].mode
;
16076 mode2
= insn_data
[icode
].operand
[3].mode
;
16078 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16079 op0
= copy_to_mode_reg (mode0
, op0
);
16080 if ((optimize
&& !register_operand (op1
, mode1
))
16081 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16082 op1
= copy_to_mode_reg (mode1
, op1
);
16083 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
16085 /* @@@ better error message */
16086 error ("mask must be an immediate");
16087 return gen_reg_rtx (tmode
);
16089 if (optimize
|| target
== 0
16090 || GET_MODE (target
) != tmode
16091 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16092 target
= gen_reg_rtx (tmode
);
16093 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
16099 case IX86_BUILTIN_PSHUFW
:
16100 case IX86_BUILTIN_PSHUFD
:
16101 case IX86_BUILTIN_PSHUFHW
:
16102 case IX86_BUILTIN_PSHUFLW
:
16103 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
16104 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
16105 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
16106 : CODE_FOR_mmx_pshufw
);
16107 arg0
= TREE_VALUE (arglist
);
16108 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16109 op0
= expand_normal (arg0
);
16110 op1
= expand_normal (arg1
);
16111 tmode
= insn_data
[icode
].operand
[0].mode
;
16112 mode1
= insn_data
[icode
].operand
[1].mode
;
16113 mode2
= insn_data
[icode
].operand
[2].mode
;
16115 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
16116 op0
= copy_to_mode_reg (mode1
, op0
);
16117 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
16119 /* @@@ better error message */
16120 error ("mask must be an immediate");
16124 || GET_MODE (target
) != tmode
16125 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16126 target
= gen_reg_rtx (tmode
);
16127 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
16133 case IX86_BUILTIN_PSLLDQI128
:
16134 case IX86_BUILTIN_PSRLDQI128
:
16135 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
16136 : CODE_FOR_sse2_lshrti3
);
16137 arg0
= TREE_VALUE (arglist
);
16138 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16139 op0
= expand_normal (arg0
);
16140 op1
= expand_normal (arg1
);
16141 tmode
= insn_data
[icode
].operand
[0].mode
;
16142 mode1
= insn_data
[icode
].operand
[1].mode
;
16143 mode2
= insn_data
[icode
].operand
[2].mode
;
16145 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
16147 op0
= copy_to_reg (op0
);
16148 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
16150 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
16152 error ("shift must be an immediate");
16155 target
= gen_reg_rtx (V2DImode
);
16156 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
16162 case IX86_BUILTIN_FEMMS
:
16163 emit_insn (gen_mmx_femms ());
16166 case IX86_BUILTIN_PAVGUSB
:
16167 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, arglist
, target
);
16169 case IX86_BUILTIN_PF2ID
:
16170 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, arglist
, target
, 0);
16172 case IX86_BUILTIN_PFACC
:
16173 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, arglist
, target
);
16175 case IX86_BUILTIN_PFADD
:
16176 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, arglist
, target
);
16178 case IX86_BUILTIN_PFCMPEQ
:
16179 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, arglist
, target
);
16181 case IX86_BUILTIN_PFCMPGE
:
16182 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, arglist
, target
);
16184 case IX86_BUILTIN_PFCMPGT
:
16185 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, arglist
, target
);
16187 case IX86_BUILTIN_PFMAX
:
16188 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, arglist
, target
);
16190 case IX86_BUILTIN_PFMIN
:
16191 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, arglist
, target
);
16193 case IX86_BUILTIN_PFMUL
:
16194 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, arglist
, target
);
16196 case IX86_BUILTIN_PFRCP
:
16197 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, arglist
, target
, 0);
16199 case IX86_BUILTIN_PFRCPIT1
:
16200 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, arglist
, target
);
16202 case IX86_BUILTIN_PFRCPIT2
:
16203 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, arglist
, target
);
16205 case IX86_BUILTIN_PFRSQIT1
:
16206 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, arglist
, target
);
16208 case IX86_BUILTIN_PFRSQRT
:
16209 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, arglist
, target
, 0);
16211 case IX86_BUILTIN_PFSUB
:
16212 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, arglist
, target
);
16214 case IX86_BUILTIN_PFSUBR
:
16215 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, arglist
, target
);
16217 case IX86_BUILTIN_PI2FD
:
16218 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, arglist
, target
, 0);
16220 case IX86_BUILTIN_PMULHRW
:
16221 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, arglist
, target
);
16223 case IX86_BUILTIN_PF2IW
:
16224 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, arglist
, target
, 0);
16226 case IX86_BUILTIN_PFNACC
:
16227 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, arglist
, target
);
16229 case IX86_BUILTIN_PFPNACC
:
16230 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, arglist
, target
);
16232 case IX86_BUILTIN_PI2FW
:
16233 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, arglist
, target
, 0);
16235 case IX86_BUILTIN_PSWAPDSI
:
16236 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, arglist
, target
, 0);
16238 case IX86_BUILTIN_PSWAPDSF
:
16239 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, arglist
, target
, 0);
16241 case IX86_BUILTIN_SQRTSD
:
16242 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, arglist
, target
);
16243 case IX86_BUILTIN_LOADUPD
:
16244 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
16245 case IX86_BUILTIN_STOREUPD
:
16246 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
16248 case IX86_BUILTIN_MFENCE
:
16249 emit_insn (gen_sse2_mfence ());
16251 case IX86_BUILTIN_LFENCE
:
16252 emit_insn (gen_sse2_lfence ());
16255 case IX86_BUILTIN_CLFLUSH
:
16256 arg0
= TREE_VALUE (arglist
);
16257 op0
= expand_normal (arg0
);
16258 icode
= CODE_FOR_sse2_clflush
;
16259 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
16260 op0
= copy_to_mode_reg (Pmode
, op0
);
16262 emit_insn (gen_sse2_clflush (op0
));
16265 case IX86_BUILTIN_MOVNTPD
:
16266 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
16267 case IX86_BUILTIN_MOVNTDQ
:
16268 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
16269 case IX86_BUILTIN_MOVNTI
:
16270 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
16272 case IX86_BUILTIN_LOADDQU
:
16273 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
16274 case IX86_BUILTIN_STOREDQU
:
16275 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
16277 case IX86_BUILTIN_MONITOR
:
16278 arg0
= TREE_VALUE (arglist
);
16279 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16280 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16281 op0
= expand_normal (arg0
);
16282 op1
= expand_normal (arg1
);
16283 op2
= expand_normal (arg2
);
16285 op0
= copy_to_mode_reg (Pmode
, op0
);
16287 op1
= copy_to_mode_reg (SImode
, op1
);
16289 op2
= copy_to_mode_reg (SImode
, op2
);
16291 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
16293 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
16296 case IX86_BUILTIN_MWAIT
:
16297 arg0
= TREE_VALUE (arglist
);
16298 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16299 op0
= expand_normal (arg0
);
16300 op1
= expand_normal (arg1
);
16302 op0
= copy_to_mode_reg (SImode
, op0
);
16304 op1
= copy_to_mode_reg (SImode
, op1
);
16305 emit_insn (gen_sse3_mwait (op0
, op1
));
16308 case IX86_BUILTIN_LDDQU
:
16309 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, arglist
,
16312 case IX86_BUILTIN_VEC_INIT_V2SI
:
16313 case IX86_BUILTIN_VEC_INIT_V4HI
:
16314 case IX86_BUILTIN_VEC_INIT_V8QI
:
16315 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), arglist
, target
);
16317 case IX86_BUILTIN_VEC_EXT_V2DF
:
16318 case IX86_BUILTIN_VEC_EXT_V2DI
:
16319 case IX86_BUILTIN_VEC_EXT_V4SF
:
16320 case IX86_BUILTIN_VEC_EXT_V4SI
:
16321 case IX86_BUILTIN_VEC_EXT_V8HI
:
16322 case IX86_BUILTIN_VEC_EXT_V2SI
:
16323 case IX86_BUILTIN_VEC_EXT_V4HI
:
16324 return ix86_expand_vec_ext_builtin (arglist
, target
);
16326 case IX86_BUILTIN_VEC_SET_V8HI
:
16327 case IX86_BUILTIN_VEC_SET_V4HI
:
16328 return ix86_expand_vec_set_builtin (arglist
);
16334 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16335 if (d
->code
== fcode
)
16337 /* Compares are treated specially. */
16338 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
16339 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
16340 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
16341 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
16342 return ix86_expand_sse_compare (d
, arglist
, target
);
16344 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
16347 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
16348 if (d
->code
== fcode
)
16349 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
16351 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
16352 if (d
->code
== fcode
)
16353 return ix86_expand_sse_comi (d
, arglist
, target
);
16355 gcc_unreachable ();
16358 /* Store OPERAND to the memory after reload is completed. This means
16359 that we can't easily use assign_stack_local. */
16361 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
16365 gcc_assert (reload_completed
);
16366 if (TARGET_RED_ZONE
)
16368 result
= gen_rtx_MEM (mode
,
16369 gen_rtx_PLUS (Pmode
,
16371 GEN_INT (-RED_ZONE_SIZE
)));
16372 emit_move_insn (result
, operand
);
16374 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
16380 operand
= gen_lowpart (DImode
, operand
);
16384 gen_rtx_SET (VOIDmode
,
16385 gen_rtx_MEM (DImode
,
16386 gen_rtx_PRE_DEC (DImode
,
16387 stack_pointer_rtx
)),
16391 gcc_unreachable ();
16393 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16402 split_di (&operand
, 1, operands
, operands
+ 1);
16404 gen_rtx_SET (VOIDmode
,
16405 gen_rtx_MEM (SImode
,
16406 gen_rtx_PRE_DEC (Pmode
,
16407 stack_pointer_rtx
)),
16410 gen_rtx_SET (VOIDmode
,
16411 gen_rtx_MEM (SImode
,
16412 gen_rtx_PRE_DEC (Pmode
,
16413 stack_pointer_rtx
)),
16418 /* Store HImodes as SImodes. */
16419 operand
= gen_lowpart (SImode
, operand
);
16423 gen_rtx_SET (VOIDmode
,
16424 gen_rtx_MEM (GET_MODE (operand
),
16425 gen_rtx_PRE_DEC (SImode
,
16426 stack_pointer_rtx
)),
16430 gcc_unreachable ();
16432 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16437 /* Free operand from the memory. */
16439 ix86_free_from_memory (enum machine_mode mode
)
16441 if (!TARGET_RED_ZONE
)
16445 if (mode
== DImode
|| TARGET_64BIT
)
16449 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16450 to pop or add instruction if registers are available. */
16451 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
16452 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
16457 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16458 QImode must go into class Q_REGS.
16459 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16460 movdf to do mem-to-mem moves through integer regs. */
16462 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
16464 enum machine_mode mode
= GET_MODE (x
);
16466 /* We're only allowed to return a subclass of CLASS. Many of the
16467 following checks fail for NO_REGS, so eliminate that early. */
16468 if (class == NO_REGS
)
16471 /* All classes can load zeros. */
16472 if (x
== CONST0_RTX (mode
))
16475 /* Force constants into memory if we are loading a (nonzero) constant into
16476 an MMX or SSE register. This is because there are no MMX/SSE instructions
16477 to load from a constant. */
16479 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16482 /* Prefer SSE regs only, if we can use them for math. */
16483 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
16484 return SSE_CLASS_P (class) ? class : NO_REGS
;
16486 /* Floating-point constants need more complex checks. */
16487 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
16489 /* General regs can load everything. */
16490 if (reg_class_subset_p (class, GENERAL_REGS
))
16493 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16494 zero above. We only want to wind up preferring 80387 registers if
16495 we plan on doing computation with them. */
16497 && standard_80387_constant_p (x
))
16499 /* Limit class to non-sse. */
16500 if (class == FLOAT_SSE_REGS
)
16502 if (class == FP_TOP_SSE_REGS
)
16504 if (class == FP_SECOND_SSE_REGS
)
16505 return FP_SECOND_REG
;
16506 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
16513 /* Generally when we see PLUS here, it's the function invariant
16514 (plus soft-fp const_int). Which can only be computed into general
16516 if (GET_CODE (x
) == PLUS
)
16517 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
16519 /* QImode constants are easy to load, but non-constant QImode data
16520 must go into Q_REGS. */
16521 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
16523 if (reg_class_subset_p (class, Q_REGS
))
16525 if (reg_class_subset_p (Q_REGS
, class))
16533 /* Discourage putting floating-point values in SSE registers unless
16534 SSE math is being used, and likewise for the 387 registers. */
16536 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
16538 enum machine_mode mode
= GET_MODE (x
);
16540 /* Restrict the output reload class to the register bank that we are doing
16541 math on. If we would like not to return a subset of CLASS, reject this
16542 alternative: if reload cannot do this, it will still use its choice. */
16543 mode
= GET_MODE (x
);
16544 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
16545 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
16547 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
16549 if (class == FP_TOP_SSE_REGS
)
16551 else if (class == FP_SECOND_SSE_REGS
)
16552 return FP_SECOND_REG
;
16554 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
16560 /* If we are copying between general and FP registers, we need a memory
16561 location. The same is true for SSE and MMX registers.
16563 The macro can't work reliably when one of the CLASSES is class containing
16564 registers from multiple units (SSE, MMX, integer). We avoid this by never
16565 combining those units in single alternative in the machine description.
16566 Ensure that this constraint holds to avoid unexpected surprises.
16568 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16569 enforce these sanity checks. */
16572 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
16573 enum machine_mode mode
, int strict
)
16575 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
16576 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
16577 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
16578 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
16579 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
16580 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
16582 gcc_assert (!strict
);
16586 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
16589 /* ??? This is a lie. We do have moves between mmx/general, and for
16590 mmx/sse2. But by saying we need secondary memory we discourage the
16591 register allocator from using the mmx registers unless needed. */
16592 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
16595 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
16597 /* SSE1 doesn't have any direct moves from other classes. */
16601 /* If the target says that inter-unit moves are more expensive
16602 than moving through memory, then don't generate them. */
16603 if (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
)
16606 /* Between SSE and general, we have moves no larger than word size. */
16607 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
16610 /* ??? For the cost of one register reformat penalty, we could use
16611 the same instructions to move SFmode and DFmode data, but the
16612 relevant move patterns don't support those alternatives. */
16613 if (mode
== SFmode
|| mode
== DFmode
)
16620 /* Return true if the registers in CLASS cannot represent the change from
16621 modes FROM to TO. */
16624 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
16625 enum reg_class
class)
16630 /* x87 registers can't do subreg at all, as all values are reformatted
16631 to extended precision. */
16632 if (MAYBE_FLOAT_CLASS_P (class))
16635 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16637 /* Vector registers do not support QI or HImode loads. If we don't
16638 disallow a change to these modes, reload will assume it's ok to
16639 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16640 the vec_dupv4hi pattern. */
16641 if (GET_MODE_SIZE (from
) < 4)
16644 /* Vector registers do not support subreg with nonzero offsets, which
16645 are otherwise valid for integer registers. Since we can't see
16646 whether we have a nonzero offset from here, prohibit all
16647 nonparadoxical subregs changing size. */
16648 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
16655 /* Return the cost of moving data from a register in class CLASS1 to
16656 one in class CLASS2.
16658 It is not required that the cost always equal 2 when FROM is the same as TO;
16659 on some machines it is expensive to move between registers if they are not
16660 general registers. */
16663 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
16664 enum reg_class class2
)
16666 /* In case we require secondary memory, compute cost of the store followed
16667 by load. In order to avoid bad register allocation choices, we need
16668 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16670 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
16674 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
16675 MEMORY_MOVE_COST (mode
, class1
, 1));
16676 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
16677 MEMORY_MOVE_COST (mode
, class2
, 1));
16679 /* In case of copying from general_purpose_register we may emit multiple
16680 stores followed by single load causing memory size mismatch stall.
16681 Count this as arbitrarily high cost of 20. */
16682 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
16685 /* In the case of FP/MMX moves, the registers actually overlap, and we
16686 have to switch modes in order to treat them differently. */
16687 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
16688 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
16694 /* Moves between SSE/MMX and integer unit are expensive. */
16695 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
16696 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
16697 return ix86_cost
->mmxsse_to_integer
;
16698 if (MAYBE_FLOAT_CLASS_P (class1
))
16699 return ix86_cost
->fp_move
;
16700 if (MAYBE_SSE_CLASS_P (class1
))
16701 return ix86_cost
->sse_move
;
16702 if (MAYBE_MMX_CLASS_P (class1
))
16703 return ix86_cost
->mmx_move
;
16707 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16710 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
16712 /* Flags and only flags can only hold CCmode values. */
16713 if (CC_REGNO_P (regno
))
16714 return GET_MODE_CLASS (mode
) == MODE_CC
;
16715 if (GET_MODE_CLASS (mode
) == MODE_CC
16716 || GET_MODE_CLASS (mode
) == MODE_RANDOM
16717 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
16719 if (FP_REGNO_P (regno
))
16720 return VALID_FP_MODE_P (mode
);
16721 if (SSE_REGNO_P (regno
))
16723 /* We implement the move patterns for all vector modes into and
16724 out of SSE registers, even when no operation instructions
16726 return (VALID_SSE_REG_MODE (mode
)
16727 || VALID_SSE2_REG_MODE (mode
)
16728 || VALID_MMX_REG_MODE (mode
)
16729 || VALID_MMX_REG_MODE_3DNOW (mode
));
16731 if (MMX_REGNO_P (regno
))
16733 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16734 so if the register is available at all, then we can move data of
16735 the given mode into or out of it. */
16736 return (VALID_MMX_REG_MODE (mode
)
16737 || VALID_MMX_REG_MODE_3DNOW (mode
));
16740 if (mode
== QImode
)
16742 /* Take care for QImode values - they can be in non-QI regs,
16743 but then they do cause partial register stalls. */
16744 if (regno
< 4 || TARGET_64BIT
)
16746 if (!TARGET_PARTIAL_REG_STALL
)
16748 return reload_in_progress
|| reload_completed
;
16750 /* We handle both integer and floats in the general purpose registers. */
16751 else if (VALID_INT_MODE_P (mode
))
16753 else if (VALID_FP_MODE_P (mode
))
16755 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16756 on to use that value in smaller contexts, this can easily force a
16757 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16758 supporting DImode, allow it. */
16759 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
16765 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16766 tieable integer mode. */
16769 ix86_tieable_integer_mode_p (enum machine_mode mode
)
16778 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
16781 return TARGET_64BIT
;
16788 /* Return true if MODE1 is accessible in a register that can hold MODE2
16789 without copying. That is, all register classes that can hold MODE2
16790 can also hold MODE1. */
16793 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
16795 if (mode1
== mode2
)
16798 if (ix86_tieable_integer_mode_p (mode1
)
16799 && ix86_tieable_integer_mode_p (mode2
))
16802 /* MODE2 being XFmode implies fp stack or general regs, which means we
16803 can tie any smaller floating point modes to it. Note that we do not
16804 tie this with TFmode. */
16805 if (mode2
== XFmode
)
16806 return mode1
== SFmode
|| mode1
== DFmode
;
16808 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16809 that we can tie it with SFmode. */
16810 if (mode2
== DFmode
)
16811 return mode1
== SFmode
;
16813 /* If MODE2 is only appropriate for an SSE register, then tie with
16814 any other mode acceptable to SSE registers. */
16815 if (GET_MODE_SIZE (mode2
) >= 8
16816 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
16817 return ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
);
16819 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16820 with any other mode acceptable to MMX registers. */
16821 if (GET_MODE_SIZE (mode2
) == 8
16822 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
16823 return ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
);
16828 /* Return the cost of moving data of mode M between a
16829 register and memory. A value of 2 is the default; this cost is
16830 relative to those in `REGISTER_MOVE_COST'.
16832 If moving between registers and memory is more expensive than
16833 between two registers, you should define this macro to express the
16836 Model also increased moving costs of QImode registers in non
16840 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
16842 if (FLOAT_CLASS_P (class))
16859 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
16861 if (SSE_CLASS_P (class))
16864 switch (GET_MODE_SIZE (mode
))
16878 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
16880 if (MMX_CLASS_P (class))
16883 switch (GET_MODE_SIZE (mode
))
16894 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
16896 switch (GET_MODE_SIZE (mode
))
16900 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
16901 : ix86_cost
->movzbl_load
);
16903 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
16904 : ix86_cost
->int_store
[0] + 4);
16907 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
16909 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
16910 if (mode
== TFmode
)
16912 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
16913 * (((int) GET_MODE_SIZE (mode
)
16914 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
16918 /* Compute a (partial) cost for rtx X. Return true if the complete
16919 cost has been computed, and false if subexpressions should be
16920 scanned. In either case, *TOTAL contains the cost result. */
16923 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
16925 enum machine_mode mode
= GET_MODE (x
);
16933 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
16935 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
16937 else if (flag_pic
&& SYMBOLIC_CONST (x
)
16939 || (!GET_CODE (x
) != LABEL_REF
16940 && (GET_CODE (x
) != SYMBOL_REF
16941 || !SYMBOL_REF_LOCAL_P (x
)))))
16948 if (mode
== VOIDmode
)
16951 switch (standard_80387_constant_p (x
))
16956 default: /* Other constants */
16961 /* Start with (MEM (SYMBOL_REF)), since that's where
16962 it'll probably end up. Add a penalty for size. */
16963 *total
= (COSTS_N_INSNS (1)
16964 + (flag_pic
!= 0 && !TARGET_64BIT
)
16965 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
16971 /* The zero extensions is often completely free on x86_64, so make
16972 it as cheap as possible. */
16973 if (TARGET_64BIT
&& mode
== DImode
16974 && GET_MODE (XEXP (x
, 0)) == SImode
)
16976 else if (TARGET_ZERO_EXTEND_WITH_AND
)
16977 *total
= ix86_cost
->add
;
16979 *total
= ix86_cost
->movzx
;
16983 *total
= ix86_cost
->movsx
;
16987 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
16988 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
16990 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
16993 *total
= ix86_cost
->add
;
16996 if ((value
== 2 || value
== 3)
16997 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
16999 *total
= ix86_cost
->lea
;
17009 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
17011 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17013 if (INTVAL (XEXP (x
, 1)) > 32)
17014 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
17016 *total
= ix86_cost
->shift_const
* 2;
17020 if (GET_CODE (XEXP (x
, 1)) == AND
)
17021 *total
= ix86_cost
->shift_var
* 2;
17023 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
17028 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17029 *total
= ix86_cost
->shift_const
;
17031 *total
= ix86_cost
->shift_var
;
17036 if (FLOAT_MODE_P (mode
))
17038 *total
= ix86_cost
->fmul
;
17043 rtx op0
= XEXP (x
, 0);
17044 rtx op1
= XEXP (x
, 1);
17046 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17048 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
17049 for (nbits
= 0; value
!= 0; value
&= value
- 1)
17053 /* This is arbitrary. */
17056 /* Compute costs correctly for widening multiplication. */
17057 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
17058 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
17059 == GET_MODE_SIZE (mode
))
17061 int is_mulwiden
= 0;
17062 enum machine_mode inner_mode
= GET_MODE (op0
);
17064 if (GET_CODE (op0
) == GET_CODE (op1
))
17065 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
17066 else if (GET_CODE (op1
) == CONST_INT
)
17068 if (GET_CODE (op0
) == SIGN_EXTEND
)
17069 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
17072 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
17076 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
17079 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
17080 + nbits
* ix86_cost
->mult_bit
17081 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
17090 if (FLOAT_MODE_P (mode
))
17091 *total
= ix86_cost
->fdiv
;
17093 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
17097 if (FLOAT_MODE_P (mode
))
17098 *total
= ix86_cost
->fadd
;
17099 else if (GET_MODE_CLASS (mode
) == MODE_INT
17100 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
17102 if (GET_CODE (XEXP (x
, 0)) == PLUS
17103 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
17104 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
17105 && CONSTANT_P (XEXP (x
, 1)))
17107 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
17108 if (val
== 2 || val
== 4 || val
== 8)
17110 *total
= ix86_cost
->lea
;
17111 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
17112 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
17114 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
17118 else if (GET_CODE (XEXP (x
, 0)) == MULT
17119 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
17121 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
17122 if (val
== 2 || val
== 4 || val
== 8)
17124 *total
= ix86_cost
->lea
;
17125 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
17126 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
17130 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
17132 *total
= ix86_cost
->lea
;
17133 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
17134 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
17135 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
17142 if (FLOAT_MODE_P (mode
))
17144 *total
= ix86_cost
->fadd
;
17152 if (!TARGET_64BIT
&& mode
== DImode
)
17154 *total
= (ix86_cost
->add
* 2
17155 + (rtx_cost (XEXP (x
, 0), outer_code
)
17156 << (GET_MODE (XEXP (x
, 0)) != DImode
))
17157 + (rtx_cost (XEXP (x
, 1), outer_code
)
17158 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
17164 if (FLOAT_MODE_P (mode
))
17166 *total
= ix86_cost
->fchs
;
17172 if (!TARGET_64BIT
&& mode
== DImode
)
17173 *total
= ix86_cost
->add
* 2;
17175 *total
= ix86_cost
->add
;
17179 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
17180 && XEXP (XEXP (x
, 0), 1) == const1_rtx
17181 && GET_CODE (XEXP (XEXP (x
, 0), 2)) == CONST_INT
17182 && XEXP (x
, 1) == const0_rtx
)
17184 /* This kind of construct is implemented using test[bwl].
17185 Treat it as if we had an AND. */
17186 *total
= (ix86_cost
->add
17187 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
17188 + rtx_cost (const1_rtx
, outer_code
));
17194 if (!TARGET_SSE_MATH
17196 || (mode
== DFmode
&& !TARGET_SSE2
))
17197 /* For standard 80387 constants, raise the cost to prevent
17198 compress_float_constant() to generate load from memory. */
17199 switch (standard_80387_constant_p (XEXP (x
, 0)))
17209 *total
= (x86_ext_80387_constants
& TUNEMASK
17216 if (FLOAT_MODE_P (mode
))
17217 *total
= ix86_cost
->fabs
;
17221 if (FLOAT_MODE_P (mode
))
17222 *total
= ix86_cost
->fsqrt
;
17226 if (XINT (x
, 1) == UNSPEC_TP
)
17237 static int current_machopic_label_num
;
17239 /* Given a symbol name and its associated stub, write out the
17240 definition of the stub. */
17243 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
17245 unsigned int length
;
17246 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
17247 int label
= ++current_machopic_label_num
;
17249 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17250 symb
= (*targetm
.strip_name_encoding
) (symb
);
17252 length
= strlen (stub
);
17253 binder_name
= alloca (length
+ 32);
17254 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
17256 length
= strlen (symb
);
17257 symbol_name
= alloca (length
+ 32);
17258 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
17260 sprintf (lazy_ptr_name
, "L%d$lz", label
);
17263 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
17265 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
17267 fprintf (file
, "%s:\n", stub
);
17268 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
17272 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
17273 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
17274 fprintf (file
, "\tjmp\t*%%edx\n");
17277 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
17279 fprintf (file
, "%s:\n", binder_name
);
17283 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
17284 fprintf (file
, "\tpushl\t%%eax\n");
17287 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
17289 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
17291 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
17292 fprintf (file
, "%s:\n", lazy_ptr_name
);
17293 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
17294 fprintf (file
, "\t.long %s\n", binder_name
);
17298 darwin_x86_file_end (void)
17300 darwin_file_end ();
17303 #endif /* TARGET_MACHO */
17305 /* Order the registers for register allocator. */
17308 x86_order_regs_for_local_alloc (void)
17313 /* First allocate the local general purpose registers. */
17314 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
17315 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
17316 reg_alloc_order
[pos
++] = i
;
17318 /* Global general purpose registers. */
17319 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
17320 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
17321 reg_alloc_order
[pos
++] = i
;
17323 /* x87 registers come first in case we are doing FP math
17325 if (!TARGET_SSE_MATH
)
17326 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
17327 reg_alloc_order
[pos
++] = i
;
17329 /* SSE registers. */
17330 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
17331 reg_alloc_order
[pos
++] = i
;
17332 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
17333 reg_alloc_order
[pos
++] = i
;
17335 /* x87 registers. */
17336 if (TARGET_SSE_MATH
)
17337 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
17338 reg_alloc_order
[pos
++] = i
;
17340 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
17341 reg_alloc_order
[pos
++] = i
;
17343 /* Initialize the rest of array as we do not allocate some registers
17345 while (pos
< FIRST_PSEUDO_REGISTER
)
17346 reg_alloc_order
[pos
++] = 0;
17349 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17350 struct attribute_spec.handler. */
17352 ix86_handle_struct_attribute (tree
*node
, tree name
,
17353 tree args ATTRIBUTE_UNUSED
,
17354 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
17357 if (DECL_P (*node
))
17359 if (TREE_CODE (*node
) == TYPE_DECL
)
17360 type
= &TREE_TYPE (*node
);
17365 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
17366 || TREE_CODE (*type
) == UNION_TYPE
)))
17368 warning (OPT_Wattributes
, "%qs attribute ignored",
17369 IDENTIFIER_POINTER (name
));
17370 *no_add_attrs
= true;
17373 else if ((is_attribute_p ("ms_struct", name
)
17374 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
17375 || ((is_attribute_p ("gcc_struct", name
)
17376 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
17378 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
17379 IDENTIFIER_POINTER (name
));
17380 *no_add_attrs
= true;
17387 ix86_ms_bitfield_layout_p (tree record_type
)
17389 return (TARGET_MS_BITFIELD_LAYOUT
&&
17390 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
17391 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
17394 /* Returns an expression indicating where the this parameter is
17395 located on entry to the FUNCTION. */
17398 x86_this_parameter (tree function
)
17400 tree type
= TREE_TYPE (function
);
17404 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
17405 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
17408 if (ix86_function_regparm (type
, function
) > 0)
17412 parm
= TYPE_ARG_TYPES (type
);
17413 /* Figure out whether or not the function has a variable number of
17415 for (; parm
; parm
= TREE_CHAIN (parm
))
17416 if (TREE_VALUE (parm
) == void_type_node
)
17418 /* If not, the this parameter is in the first argument. */
17422 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
17424 return gen_rtx_REG (SImode
, regno
);
17428 if (aggregate_value_p (TREE_TYPE (type
), type
))
17429 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
17431 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
17434 /* Determine whether x86_output_mi_thunk can succeed. */
17437 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
17438 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
17439 HOST_WIDE_INT vcall_offset
, tree function
)
17441 /* 64-bit can handle anything. */
17445 /* For 32-bit, everything's fine if we have one free register. */
17446 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
17449 /* Need a free register for vcall_offset. */
17453 /* Need a free register for GOT references. */
17454 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
17457 /* Otherwise ok. */
17461 /* Output the assembler code for a thunk function. THUNK_DECL is the
17462 declaration for the thunk function itself, FUNCTION is the decl for
17463 the target function. DELTA is an immediate constant offset to be
17464 added to THIS. If VCALL_OFFSET is nonzero, the word at
17465 *(*this + vcall_offset) should be added to THIS. */
17468 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
17469 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
17470 HOST_WIDE_INT vcall_offset
, tree function
)
17473 rtx
this = x86_this_parameter (function
);
17476 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17477 pull it in now and let DELTA benefit. */
17480 else if (vcall_offset
)
17482 /* Put the this parameter into %eax. */
17484 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
17485 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
17488 this_reg
= NULL_RTX
;
17490 /* Adjust the this parameter by a fixed constant. */
17493 xops
[0] = GEN_INT (delta
);
17494 xops
[1] = this_reg
? this_reg
: this;
17497 if (!x86_64_general_operand (xops
[0], DImode
))
17499 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
17501 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
17505 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
17508 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
17511 /* Adjust the this parameter by a value stored in the vtable. */
17515 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
17518 int tmp_regno
= 2 /* ECX */;
17519 if (lookup_attribute ("fastcall",
17520 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
17521 tmp_regno
= 0 /* EAX */;
17522 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
17525 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
17528 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
17530 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
17532 /* Adjust the this parameter. */
17533 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
17534 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
17536 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
17537 xops
[0] = GEN_INT (vcall_offset
);
17539 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
17540 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
17542 xops
[1] = this_reg
;
17544 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
17546 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
17549 /* If necessary, drop THIS back to its stack slot. */
17550 if (this_reg
&& this_reg
!= this)
17552 xops
[0] = this_reg
;
17554 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
17557 xops
[0] = XEXP (DECL_RTL (function
), 0);
17560 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
17561 output_asm_insn ("jmp\t%P0", xops
);
17564 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
17565 tmp
= gen_rtx_CONST (Pmode
, tmp
);
17566 tmp
= gen_rtx_MEM (QImode
, tmp
);
17568 output_asm_insn ("jmp\t%A0", xops
);
17573 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
17574 output_asm_insn ("jmp\t%P0", xops
);
17579 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
17580 tmp
= (gen_rtx_SYMBOL_REF
17582 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
17583 tmp
= gen_rtx_MEM (QImode
, tmp
);
17585 output_asm_insn ("jmp\t%0", xops
);
17588 #endif /* TARGET_MACHO */
17590 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
17591 output_set_got (tmp
, NULL_RTX
);
17594 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
17595 output_asm_insn ("jmp\t{*}%1", xops
);
17601 x86_file_start (void)
17603 default_file_start ();
17605 darwin_file_start ();
17607 if (X86_FILE_START_VERSION_DIRECTIVE
)
17608 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
17609 if (X86_FILE_START_FLTUSED
)
17610 fputs ("\t.global\t__fltused\n", asm_out_file
);
17611 if (ix86_asm_dialect
== ASM_INTEL
)
17612 fputs ("\t.intel_syntax\n", asm_out_file
);
17616 x86_field_alignment (tree field
, int computed
)
17618 enum machine_mode mode
;
17619 tree type
= TREE_TYPE (field
);
17621 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
17623 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
17624 ? get_inner_array_type (type
) : type
);
17625 if (mode
== DFmode
|| mode
== DCmode
17626 || GET_MODE_CLASS (mode
) == MODE_INT
17627 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
17628 return MIN (32, computed
);
17632 /* Output assembler code to FILE to increment profiler label # LABELNO
17633 for profiling a function entry. */
17635 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
17640 #ifndef NO_PROFILE_COUNTERS
17641 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
17643 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
17647 #ifndef NO_PROFILE_COUNTERS
17648 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
17650 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
17654 #ifndef NO_PROFILE_COUNTERS
17655 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17656 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
17658 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
17662 #ifndef NO_PROFILE_COUNTERS
17663 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
17664 PROFILE_COUNT_REGISTER
);
17666 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
17670 /* We don't have exact information about the insn sizes, but we may assume
17671 quite safely that we are informed about all 1 byte insns and memory
17672 address sizes. This is enough to eliminate unnecessary padding in
17676 min_insn_size (rtx insn
)
17680 if (!INSN_P (insn
) || !active_insn_p (insn
))
17683 /* Discard alignments we've emit and jump instructions. */
17684 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
17685 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
17687 if (GET_CODE (insn
) == JUMP_INSN
17688 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
17689 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
17692 /* Important case - calls are always 5 bytes.
17693 It is common to have many calls in the row. */
17694 if (GET_CODE (insn
) == CALL_INSN
17695 && symbolic_reference_mentioned_p (PATTERN (insn
))
17696 && !SIBLING_CALL_P (insn
))
17698 if (get_attr_length (insn
) <= 1)
17701 /* For normal instructions we may rely on the sizes of addresses
17702 and the presence of symbol to require 4 bytes of encoding.
17703 This is not the case for jumps where references are PC relative. */
17704 if (GET_CODE (insn
) != JUMP_INSN
)
17706 l
= get_attr_length_address (insn
);
17707 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
17716 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17720 ix86_avoid_jump_misspredicts (void)
17722 rtx insn
, start
= get_insns ();
17723 int nbytes
= 0, njumps
= 0;
17726 /* Look for all minimal intervals of instructions containing 4 jumps.
17727 The intervals are bounded by START and INSN. NBYTES is the total
17728 size of instructions in the interval including INSN and not including
17729 START. When the NBYTES is smaller than 16 bytes, it is possible
17730 that the end of START and INSN ends up in the same 16byte page.
17732 The smallest offset in the page INSN can start is the case where START
17733 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17734 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17736 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
17739 nbytes
+= min_insn_size (insn
);
17741 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
17742 INSN_UID (insn
), min_insn_size (insn
));
17743 if ((GET_CODE (insn
) == JUMP_INSN
17744 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
17745 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
17746 || GET_CODE (insn
) == CALL_INSN
)
17753 start
= NEXT_INSN (start
);
17754 if ((GET_CODE (start
) == JUMP_INSN
17755 && GET_CODE (PATTERN (start
)) != ADDR_VEC
17756 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
17757 || GET_CODE (start
) == CALL_INSN
)
17758 njumps
--, isjump
= 1;
17761 nbytes
-= min_insn_size (start
);
17763 gcc_assert (njumps
>= 0);
17765 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
17766 INSN_UID (start
), INSN_UID (insn
), nbytes
);
17768 if (njumps
== 3 && isjump
&& nbytes
< 16)
17770 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
17773 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
17774 INSN_UID (insn
), padsize
);
17775 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
17780 /* AMD Athlon works faster
17781 when RET is not destination of conditional jump or directly preceded
17782 by other jump instruction. We avoid the penalty by inserting NOP just
17783 before the RET instructions in such cases. */
17785 ix86_pad_returns (void)
17790 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
17792 basic_block bb
= e
->src
;
17793 rtx ret
= BB_END (bb
);
17795 bool replace
= false;
17797 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
17798 || !maybe_hot_bb_p (bb
))
17800 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
17801 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
17803 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
17808 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17809 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
17810 && !(e
->flags
& EDGE_FALLTHRU
))
17815 prev
= prev_active_insn (ret
);
17817 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
17818 || GET_CODE (prev
) == CALL_INSN
))
17820 /* Empty functions get branch mispredict even when the jump destination
17821 is not visible to us. */
17822 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
17827 emit_insn_before (gen_return_internal_long (), ret
);
17833 /* Implement machine specific optimizations. We implement padding of returns
17834 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
17838 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
17839 ix86_pad_returns ();
17840 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
17841 ix86_avoid_jump_misspredicts ();
17844 /* Return nonzero when QImode register that must be represented via REX prefix
17847 x86_extended_QIreg_mentioned_p (rtx insn
)
17850 extract_insn_cached (insn
);
17851 for (i
= 0; i
< recog_data
.n_operands
; i
++)
17852 if (REG_P (recog_data
.operand
[i
])
17853 && REGNO (recog_data
.operand
[i
]) >= 4)
17858 /* Return nonzero when P points to register encoded via REX prefix.
17859 Called via for_each_rtx. */
17861 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
17863 unsigned int regno
;
17866 regno
= REGNO (*p
);
17867 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
17870 /* Return true when INSN mentions register that must be encoded using REX
17873 x86_extended_reg_mentioned_p (rtx insn
)
17875 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
17878 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
17879 optabs would emit if we didn't have TFmode patterns. */
17882 x86_emit_floatuns (rtx operands
[2])
17884 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
17885 enum machine_mode mode
, inmode
;
17887 inmode
= GET_MODE (operands
[1]);
17888 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
17891 in
= force_reg (inmode
, operands
[1]);
17892 mode
= GET_MODE (out
);
17893 neglab
= gen_label_rtx ();
17894 donelab
= gen_label_rtx ();
17895 i1
= gen_reg_rtx (Pmode
);
17896 f0
= gen_reg_rtx (mode
);
17898 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
17900 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
17901 emit_jump_insn (gen_jump (donelab
));
17904 emit_label (neglab
);
17906 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
17907 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
17908 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
17909 expand_float (f0
, i0
, 0);
17910 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
17912 emit_label (donelab
);
17915 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17916 with all elements equal to VAR. Return true if successful. */
17919 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
17920 rtx target
, rtx val
)
17922 enum machine_mode smode
, wsmode
, wvmode
;
17929 if (!mmx_ok
&& !TARGET_SSE
)
17937 val
= force_reg (GET_MODE_INNER (mode
), val
);
17938 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
17939 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
17945 if (TARGET_SSE
|| TARGET_3DNOW_A
)
17947 val
= gen_lowpart (SImode
, val
);
17948 x
= gen_rtx_TRUNCATE (HImode
, val
);
17949 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
17950 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
17972 /* Extend HImode to SImode using a paradoxical SUBREG. */
17973 tmp1
= gen_reg_rtx (SImode
);
17974 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
17975 /* Insert the SImode value as low element of V4SImode vector. */
17976 tmp2
= gen_reg_rtx (V4SImode
);
17977 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
17978 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
17979 CONST0_RTX (V4SImode
),
17981 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
17982 /* Cast the V4SImode vector back to a V8HImode vector. */
17983 tmp1
= gen_reg_rtx (V8HImode
);
17984 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
17985 /* Duplicate the low short through the whole low SImode word. */
17986 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
17987 /* Cast the V8HImode vector back to a V4SImode vector. */
17988 tmp2
= gen_reg_rtx (V4SImode
);
17989 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
17990 /* Replicate the low element of the V4SImode vector. */
17991 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
17992 /* Cast the V2SImode back to V8HImode, and store in target. */
17993 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
18004 /* Extend QImode to SImode using a paradoxical SUBREG. */
18005 tmp1
= gen_reg_rtx (SImode
);
18006 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
18007 /* Insert the SImode value as low element of V4SImode vector. */
18008 tmp2
= gen_reg_rtx (V4SImode
);
18009 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
18010 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
18011 CONST0_RTX (V4SImode
),
18013 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
18014 /* Cast the V4SImode vector back to a V16QImode vector. */
18015 tmp1
= gen_reg_rtx (V16QImode
);
18016 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
18017 /* Duplicate the low byte through the whole low SImode word. */
18018 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
18019 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
18020 /* Cast the V16QImode vector back to a V4SImode vector. */
18021 tmp2
= gen_reg_rtx (V4SImode
);
18022 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
18023 /* Replicate the low element of the V4SImode vector. */
18024 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
18025 /* Cast the V2SImode back to V16QImode, and store in target. */
18026 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
18034 /* Replicate the value once into the next wider mode and recurse. */
18035 val
= convert_modes (wsmode
, smode
, val
, true);
18036 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
18037 GEN_INT (GET_MODE_BITSIZE (smode
)),
18038 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
18039 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
18041 x
= gen_reg_rtx (wvmode
);
18042 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
18043 gcc_unreachable ();
18044 emit_move_insn (target
, gen_lowpart (mode
, x
));
18052 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18053 whose ONE_VAR element is VAR, and other elements are zero. Return true
18057 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
18058 rtx target
, rtx var
, int one_var
)
18060 enum machine_mode vsimode
;
18068 if (!mmx_ok
&& !TARGET_SSE
)
18076 var
= force_reg (GET_MODE_INNER (mode
), var
);
18077 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
18078 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
18083 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
18084 new_target
= gen_reg_rtx (mode
);
18086 new_target
= target
;
18087 var
= force_reg (GET_MODE_INNER (mode
), var
);
18088 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
18089 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
18090 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
18093 /* We need to shuffle the value to the correct position, so
18094 create a new pseudo to store the intermediate result. */
18096 /* With SSE2, we can use the integer shuffle insns. */
18097 if (mode
!= V4SFmode
&& TARGET_SSE2
)
18099 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
18101 GEN_INT (one_var
== 1 ? 0 : 1),
18102 GEN_INT (one_var
== 2 ? 0 : 1),
18103 GEN_INT (one_var
== 3 ? 0 : 1)));
18104 if (target
!= new_target
)
18105 emit_move_insn (target
, new_target
);
18109 /* Otherwise convert the intermediate result to V4SFmode and
18110 use the SSE1 shuffle instructions. */
18111 if (mode
!= V4SFmode
)
18113 tmp
= gen_reg_rtx (V4SFmode
);
18114 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
18119 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
18121 GEN_INT (one_var
== 1 ? 0 : 1),
18122 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
18123 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
18125 if (mode
!= V4SFmode
)
18126 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
18127 else if (tmp
!= target
)
18128 emit_move_insn (target
, tmp
);
18130 else if (target
!= new_target
)
18131 emit_move_insn (target
, new_target
);
18136 vsimode
= V4SImode
;
18142 vsimode
= V2SImode
;
18148 /* Zero extend the variable element to SImode and recurse. */
18149 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
18151 x
= gen_reg_rtx (vsimode
);
18152 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
18154 gcc_unreachable ();
18156 emit_move_insn (target
, gen_lowpart (mode
, x
));
18164 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18165 consisting of the values in VALS. It is known that all elements
18166 except ONE_VAR are constants. Return true if successful. */
18169 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
18170 rtx target
, rtx vals
, int one_var
)
18172 rtx var
= XVECEXP (vals
, 0, one_var
);
18173 enum machine_mode wmode
;
18176 const_vec
= copy_rtx (vals
);
18177 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
18178 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
18186 /* For the two element vectors, it's just as easy to use
18187 the general case. */
18203 /* There's no way to set one QImode entry easily. Combine
18204 the variable value with its adjacent constant value, and
18205 promote to an HImode set. */
18206 x
= XVECEXP (vals
, 0, one_var
^ 1);
18209 var
= convert_modes (HImode
, QImode
, var
, true);
18210 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
18211 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
18212 x
= GEN_INT (INTVAL (x
) & 0xff);
18216 var
= convert_modes (HImode
, QImode
, var
, true);
18217 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
18219 if (x
!= const0_rtx
)
18220 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
18221 1, OPTAB_LIB_WIDEN
);
18223 x
= gen_reg_rtx (wmode
);
18224 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
18225 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
18227 emit_move_insn (target
, gen_lowpart (mode
, x
));
18234 emit_move_insn (target
, const_vec
);
18235 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
18239 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18240 all values variable, and none identical. */
18243 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
18244 rtx target
, rtx vals
)
18246 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
18247 rtx op0
= NULL
, op1
= NULL
;
18248 bool use_vec_concat
= false;
18254 if (!mmx_ok
&& !TARGET_SSE
)
18260 /* For the two element vectors, we always implement VEC_CONCAT. */
18261 op0
= XVECEXP (vals
, 0, 0);
18262 op1
= XVECEXP (vals
, 0, 1);
18263 use_vec_concat
= true;
18267 half_mode
= V2SFmode
;
18270 half_mode
= V2SImode
;
18276 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18277 Recurse to load the two halves. */
18279 op0
= gen_reg_rtx (half_mode
);
18280 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
18281 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
18283 op1
= gen_reg_rtx (half_mode
);
18284 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
18285 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
18287 use_vec_concat
= true;
18298 gcc_unreachable ();
18301 if (use_vec_concat
)
18303 if (!register_operand (op0
, half_mode
))
18304 op0
= force_reg (half_mode
, op0
);
18305 if (!register_operand (op1
, half_mode
))
18306 op1
= force_reg (half_mode
, op1
);
18308 emit_insn (gen_rtx_SET (VOIDmode
, target
,
18309 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
18313 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
18314 enum machine_mode inner_mode
;
18315 rtx words
[4], shift
;
18317 inner_mode
= GET_MODE_INNER (mode
);
18318 n_elts
= GET_MODE_NUNITS (mode
);
18319 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
18320 n_elt_per_word
= n_elts
/ n_words
;
18321 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
18323 for (i
= 0; i
< n_words
; ++i
)
18325 rtx word
= NULL_RTX
;
18327 for (j
= 0; j
< n_elt_per_word
; ++j
)
18329 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
18330 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
18336 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
18337 word
, 1, OPTAB_LIB_WIDEN
);
18338 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
18339 word
, 1, OPTAB_LIB_WIDEN
);
18347 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
18348 else if (n_words
== 2)
18350 rtx tmp
= gen_reg_rtx (mode
);
18351 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
18352 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
18353 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
18354 emit_move_insn (target
, tmp
);
18356 else if (n_words
== 4)
18358 rtx tmp
= gen_reg_rtx (V4SImode
);
18359 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
18360 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
18361 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
18364 gcc_unreachable ();
18368 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18369 instructions unless MMX_OK is true. */
18372 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
18374 enum machine_mode mode
= GET_MODE (target
);
18375 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18376 int n_elts
= GET_MODE_NUNITS (mode
);
18377 int n_var
= 0, one_var
= -1;
18378 bool all_same
= true, all_const_zero
= true;
18382 for (i
= 0; i
< n_elts
; ++i
)
18384 x
= XVECEXP (vals
, 0, i
);
18385 if (!CONSTANT_P (x
))
18386 n_var
++, one_var
= i
;
18387 else if (x
!= CONST0_RTX (inner_mode
))
18388 all_const_zero
= false;
18389 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
18393 /* Constants are best loaded from the constant pool. */
18396 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
18400 /* If all values are identical, broadcast the value. */
18402 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
18403 XVECEXP (vals
, 0, 0)))
18406 /* Values where only one field is non-constant are best loaded from
18407 the pool and overwritten via move later. */
18411 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
18412 XVECEXP (vals
, 0, one_var
),
18416 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
18420 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
18424 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
18426 enum machine_mode mode
= GET_MODE (target
);
18427 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18428 bool use_vec_merge
= false;
18437 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
18438 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
18440 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
18442 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
18443 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18453 /* For the two element vectors, we implement a VEC_CONCAT with
18454 the extraction of the other element. */
18456 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
18457 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
18460 op0
= val
, op1
= tmp
;
18462 op0
= tmp
, op1
= val
;
18464 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
18465 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18473 use_vec_merge
= true;
18477 /* tmp = target = A B C D */
18478 tmp
= copy_to_reg (target
);
18479 /* target = A A B B */
18480 emit_insn (gen_sse_unpcklps (target
, target
, target
));
18481 /* target = X A B B */
18482 ix86_expand_vector_set (false, target
, val
, 0);
18483 /* target = A X C D */
18484 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18485 GEN_INT (1), GEN_INT (0),
18486 GEN_INT (2+4), GEN_INT (3+4)));
18490 /* tmp = target = A B C D */
18491 tmp
= copy_to_reg (target
);
18492 /* tmp = X B C D */
18493 ix86_expand_vector_set (false, tmp
, val
, 0);
18494 /* target = A B X D */
18495 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18496 GEN_INT (0), GEN_INT (1),
18497 GEN_INT (0+4), GEN_INT (3+4)));
18501 /* tmp = target = A B C D */
18502 tmp
= copy_to_reg (target
);
18503 /* tmp = X B C D */
18504 ix86_expand_vector_set (false, tmp
, val
, 0);
18505 /* target = A B X D */
18506 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18507 GEN_INT (0), GEN_INT (1),
18508 GEN_INT (2+4), GEN_INT (0+4)));
18512 gcc_unreachable ();
18517 /* Element 0 handled by vec_merge below. */
18520 use_vec_merge
= true;
18526 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18527 store into element 0, then shuffle them back. */
18531 order
[0] = GEN_INT (elt
);
18532 order
[1] = const1_rtx
;
18533 order
[2] = const2_rtx
;
18534 order
[3] = GEN_INT (3);
18535 order
[elt
] = const0_rtx
;
18537 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
18538 order
[1], order
[2], order
[3]));
18540 ix86_expand_vector_set (false, target
, val
, 0);
18542 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
18543 order
[1], order
[2], order
[3]));
18547 /* For SSE1, we have to reuse the V4SF code. */
18548 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
18549 gen_lowpart (SFmode
, val
), elt
);
18554 use_vec_merge
= TARGET_SSE2
;
18557 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
18568 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
18569 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
18570 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18574 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
18576 emit_move_insn (mem
, target
);
18578 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
18579 emit_move_insn (tmp
, val
);
18581 emit_move_insn (target
, mem
);
18586 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
18588 enum machine_mode mode
= GET_MODE (vec
);
18589 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18590 bool use_vec_extr
= false;
18603 use_vec_extr
= true;
18615 tmp
= gen_reg_rtx (mode
);
18616 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
18617 GEN_INT (elt
), GEN_INT (elt
),
18618 GEN_INT (elt
+4), GEN_INT (elt
+4)));
18622 tmp
= gen_reg_rtx (mode
);
18623 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
18627 gcc_unreachable ();
18630 use_vec_extr
= true;
18645 tmp
= gen_reg_rtx (mode
);
18646 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
18647 GEN_INT (elt
), GEN_INT (elt
),
18648 GEN_INT (elt
), GEN_INT (elt
)));
18652 tmp
= gen_reg_rtx (mode
);
18653 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
18657 gcc_unreachable ();
18660 use_vec_extr
= true;
18665 /* For SSE1, we have to reuse the V4SF code. */
18666 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
18667 gen_lowpart (V4SFmode
, vec
), elt
);
18673 use_vec_extr
= TARGET_SSE2
;
18676 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
18681 /* ??? Could extract the appropriate HImode element and shift. */
18688 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
18689 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
18691 /* Let the rtl optimizers know about the zero extension performed. */
18692 if (inner_mode
== HImode
)
18694 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
18695 target
= gen_lowpart (SImode
, target
);
18698 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18702 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
18704 emit_move_insn (mem
, vec
);
18706 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
18707 emit_move_insn (target
, tmp
);
18711 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18712 pattern to reduce; DEST is the destination; IN is the input vector. */
18715 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
18717 rtx tmp1
, tmp2
, tmp3
;
18719 tmp1
= gen_reg_rtx (V4SFmode
);
18720 tmp2
= gen_reg_rtx (V4SFmode
);
18721 tmp3
= gen_reg_rtx (V4SFmode
);
18723 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
18724 emit_insn (fn (tmp2
, tmp1
, in
));
18726 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
18727 GEN_INT (1), GEN_INT (1),
18728 GEN_INT (1+4), GEN_INT (1+4)));
18729 emit_insn (fn (dest
, tmp2
, tmp3
));
18732 /* Target hook for scalar_mode_supported_p. */
18734 ix86_scalar_mode_supported_p (enum machine_mode mode
)
18736 if (DECIMAL_FLOAT_MODE_P (mode
))
18739 return default_scalar_mode_supported_p (mode
);
18742 /* Implements target hook vector_mode_supported_p. */
18744 ix86_vector_mode_supported_p (enum machine_mode mode
)
18746 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
18748 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
18750 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
18752 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
18757 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18759 We do this in the new i386 backend to maintain source compatibility
18760 with the old cc0-based compiler. */
18763 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
18764 tree inputs ATTRIBUTE_UNUSED
,
18767 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
18769 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
18771 clobbers
= tree_cons (NULL_TREE
, build_string (7, "dirflag"),
18776 /* Return true if this goes in small data/bss. */
18779 ix86_in_large_data_p (tree exp
)
18781 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
18784 /* Functions are never large data. */
18785 if (TREE_CODE (exp
) == FUNCTION_DECL
)
18788 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
18790 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
18791 if (strcmp (section
, ".ldata") == 0
18792 || strcmp (section
, ".lbss") == 0)
18798 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
18800 /* If this is an incomplete type with size 0, then we can't put it
18801 in data because it might be too big when completed. */
18802 if (!size
|| size
> ix86_section_threshold
)
18809 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
18811 default_encode_section_info (decl
, rtl
, first
);
18813 if (TREE_CODE (decl
) == VAR_DECL
18814 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
18815 && ix86_in_large_data_p (decl
))
18816 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
18819 /* Worker function for REVERSE_CONDITION. */
18822 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
18824 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
18825 ? reverse_condition (code
)
18826 : reverse_condition_maybe_unordered (code
));
18829 /* Output code to perform an x87 FP register move, from OPERANDS[1]
18833 output_387_reg_move (rtx insn
, rtx
*operands
)
18835 if (REG_P (operands
[1])
18836 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
18838 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
18839 return output_387_ffreep (operands
, 0);
18840 return "fstp\t%y0";
18842 if (STACK_TOP_P (operands
[0]))
18843 return "fld%z1\t%y1";
18847 /* Output code to perform a conditional jump to LABEL, if C2 flag in
18848 FP status register is set. */
18851 ix86_emit_fp_unordered_jump (rtx label
)
18853 rtx reg
= gen_reg_rtx (HImode
);
18856 emit_insn (gen_x86_fnstsw_1 (reg
));
18858 if (TARGET_USE_SAHF
)
18860 emit_insn (gen_x86_sahf_1 (reg
));
18862 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
18863 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
18867 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
18869 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
18870 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
18873 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
18874 gen_rtx_LABEL_REF (VOIDmode
, label
),
18876 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
18877 emit_jump_insn (temp
);
18880 /* Output code to perform a log1p XFmode calculation. */
18882 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
18884 rtx label1
= gen_label_rtx ();
18885 rtx label2
= gen_label_rtx ();
18887 rtx tmp
= gen_reg_rtx (XFmode
);
18888 rtx tmp2
= gen_reg_rtx (XFmode
);
18890 emit_insn (gen_absxf2 (tmp
, op1
));
18891 emit_insn (gen_cmpxf (tmp
,
18892 CONST_DOUBLE_FROM_REAL_VALUE (
18893 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
18895 emit_jump_insn (gen_bge (label1
));
18897 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
18898 emit_insn (gen_fyl2xp1_xf3 (op0
, tmp2
, op1
));
18899 emit_jump (label2
);
18901 emit_label (label1
);
18902 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
18903 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
18904 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
18905 emit_insn (gen_fyl2x_xf3 (op0
, tmp2
, tmp
));
18907 emit_label (label2
);
18910 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
18913 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
18916 /* With Binutils 2.15, the "@unwind" marker must be specified on
18917 every occurrence of the ".eh_frame" section, not just the first
18920 && strcmp (name
, ".eh_frame") == 0)
18922 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
18923 flags
& SECTION_WRITE
? "aw" : "a");
18926 default_elf_asm_named_section (name
, flags
, decl
);
18929 /* Return the mangling of TYPE if it is an extended fundamental type. */
18931 static const char *
18932 ix86_mangle_fundamental_type (tree type
)
18934 switch (TYPE_MODE (type
))
18937 /* __float128 is "g". */
18940 /* "long double" or __float80 is "e". */
18947 /* For 32-bit code we can save PIC register setup by using
18948 __stack_chk_fail_local hidden function instead of calling
18949 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
18950 register, so it is better to call __stack_chk_fail directly. */
18953 ix86_stack_protect_fail (void)
18955 return TARGET_64BIT
18956 ? default_external_stack_protect_fail ()
18957 : default_hidden_stack_protect_fail ();
18960 /* Select a format to encode pointers in exception handling data. CODE
18961 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
18962 true if the symbol may be affected by dynamic relocations.
18964 ??? All x86 object file formats are capable of representing this.
18965 After all, the relocation needed is the same as for the call insn.
18966 Whether or not a particular assembler allows us to enter such, I
18967 guess we'll have to see. */
18969 asm_preferred_eh_data_format (int code
, int global
)
18973 int type
= DW_EH_PE_sdata8
;
18975 || ix86_cmodel
== CM_SMALL_PIC
18976 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
18977 type
= DW_EH_PE_sdata4
;
18978 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
18980 if (ix86_cmodel
== CM_SMALL
18981 || (ix86_cmodel
== CM_MEDIUM
&& code
))
18982 return DW_EH_PE_udata4
;
18983 return DW_EH_PE_absptr
;
18986 #include "gt-i386.h"