]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
PR target/27968
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
56 #endif
57
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
64 : 4)
65
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
69
70 static const
71 struct processor_costs size_cost = { /* costs for tuning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
89 0, /* "large" insn */
90 2, /* MOVE_RATIO */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
114 2, /* Branch cost */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
121 };
122
123 /* Processor costs (relative to an add) */
124 static const
125 struct processor_costs i386_cost = { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
144 3, /* MOVE_RATIO */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
168 1, /* Branch cost */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
175 };
176
177 static const
178 struct processor_costs i486_cost = { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
197 3, /* MOVE_RATIO */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
221 1, /* Branch cost */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
228 };
229
230 static const
231 struct processor_costs pentium_cost = {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
250 6, /* MOVE_RATIO */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
274 2, /* Branch cost */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
281 };
282
283 static const
284 struct processor_costs pentiumpro_cost = {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
303 6, /* MOVE_RATIO */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
327 2, /* Branch cost */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
334 };
335
336 static const
337 struct processor_costs k6_cost = {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (2), /* cost of a lea instruction */
340 COSTS_N_INSNS (1), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (3), /* HI */
344 COSTS_N_INSNS (3), /* SI */
345 COSTS_N_INSNS (3), /* DI */
346 COSTS_N_INSNS (3)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (18), /* HI */
350 COSTS_N_INSNS (18), /* SI */
351 COSTS_N_INSNS (18), /* DI */
352 COSTS_N_INSNS (18)}, /* other */
353 COSTS_N_INSNS (2), /* cost of movsx */
354 COSTS_N_INSNS (2), /* cost of movzx */
355 8, /* "large" insn */
356 4, /* MOVE_RATIO */
357 3, /* cost for loading QImode using movzbl */
358 {4, 5, 4}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {2, 3, 2}, /* cost of storing integer registers */
362 4, /* cost of reg,reg fld/fst */
363 {6, 6, 6}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 4, 4}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
367 2, /* cost of moving MMX register */
368 {2, 2}, /* cost of loading MMX registers
369 in SImode and DImode */
370 {2, 2}, /* cost of storing MMX registers
371 in SImode and DImode */
372 2, /* cost of moving SSE register */
373 {2, 2, 8}, /* cost of loading SSE registers
374 in SImode, DImode and TImode */
375 {2, 2, 8}, /* cost of storing SSE registers
376 in SImode, DImode and TImode */
377 6, /* MMX or SSE register to integer */
378 32, /* size of prefetch block */
379 1, /* number of parallel prefetches */
380 1, /* Branch cost */
381 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
382 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
383 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
384 COSTS_N_INSNS (2), /* cost of FABS instruction. */
385 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
386 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
387 };
388
389 static const
390 struct processor_costs athlon_cost = {
391 COSTS_N_INSNS (1), /* cost of an add instruction */
392 COSTS_N_INSNS (2), /* cost of a lea instruction */
393 COSTS_N_INSNS (1), /* variable shift costs */
394 COSTS_N_INSNS (1), /* constant shift costs */
395 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
396 COSTS_N_INSNS (5), /* HI */
397 COSTS_N_INSNS (5), /* SI */
398 COSTS_N_INSNS (5), /* DI */
399 COSTS_N_INSNS (5)}, /* other */
400 0, /* cost of multiply per each bit set */
401 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
402 COSTS_N_INSNS (26), /* HI */
403 COSTS_N_INSNS (42), /* SI */
404 COSTS_N_INSNS (74), /* DI */
405 COSTS_N_INSNS (74)}, /* other */
406 COSTS_N_INSNS (1), /* cost of movsx */
407 COSTS_N_INSNS (1), /* cost of movzx */
408 8, /* "large" insn */
409 9, /* MOVE_RATIO */
410 4, /* cost for loading QImode using movzbl */
411 {3, 4, 3}, /* cost of loading integer registers
412 in QImode, HImode and SImode.
413 Relative to reg-reg move (2). */
414 {3, 4, 3}, /* cost of storing integer registers */
415 4, /* cost of reg,reg fld/fst */
416 {4, 4, 12}, /* cost of loading fp registers
417 in SFmode, DFmode and XFmode */
418 {6, 6, 8}, /* cost of storing fp registers
419 in SFmode, DFmode and XFmode */
420 2, /* cost of moving MMX register */
421 {4, 4}, /* cost of loading MMX registers
422 in SImode and DImode */
423 {4, 4}, /* cost of storing MMX registers
424 in SImode and DImode */
425 2, /* cost of moving SSE register */
426 {4, 4, 6}, /* cost of loading SSE registers
427 in SImode, DImode and TImode */
428 {4, 4, 5}, /* cost of storing SSE registers
429 in SImode, DImode and TImode */
430 5, /* MMX or SSE register to integer */
431 64, /* size of prefetch block */
432 6, /* number of parallel prefetches */
433 5, /* Branch cost */
434 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
435 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
436 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
437 COSTS_N_INSNS (2), /* cost of FABS instruction. */
438 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
439 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
440 };
441
442 static const
443 struct processor_costs k8_cost = {
444 COSTS_N_INSNS (1), /* cost of an add instruction */
445 COSTS_N_INSNS (2), /* cost of a lea instruction */
446 COSTS_N_INSNS (1), /* variable shift costs */
447 COSTS_N_INSNS (1), /* constant shift costs */
448 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
449 COSTS_N_INSNS (4), /* HI */
450 COSTS_N_INSNS (3), /* SI */
451 COSTS_N_INSNS (4), /* DI */
452 COSTS_N_INSNS (5)}, /* other */
453 0, /* cost of multiply per each bit set */
454 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
455 COSTS_N_INSNS (26), /* HI */
456 COSTS_N_INSNS (42), /* SI */
457 COSTS_N_INSNS (74), /* DI */
458 COSTS_N_INSNS (74)}, /* other */
459 COSTS_N_INSNS (1), /* cost of movsx */
460 COSTS_N_INSNS (1), /* cost of movzx */
461 8, /* "large" insn */
462 9, /* MOVE_RATIO */
463 4, /* cost for loading QImode using movzbl */
464 {3, 4, 3}, /* cost of loading integer registers
465 in QImode, HImode and SImode.
466 Relative to reg-reg move (2). */
467 {3, 4, 3}, /* cost of storing integer registers */
468 4, /* cost of reg,reg fld/fst */
469 {4, 4, 12}, /* cost of loading fp registers
470 in SFmode, DFmode and XFmode */
471 {6, 6, 8}, /* cost of storing fp registers
472 in SFmode, DFmode and XFmode */
473 2, /* cost of moving MMX register */
474 {3, 3}, /* cost of loading MMX registers
475 in SImode and DImode */
476 {4, 4}, /* cost of storing MMX registers
477 in SImode and DImode */
478 2, /* cost of moving SSE register */
479 {4, 3, 6}, /* cost of loading SSE registers
480 in SImode, DImode and TImode */
481 {4, 4, 5}, /* cost of storing SSE registers
482 in SImode, DImode and TImode */
483 5, /* MMX or SSE register to integer */
484 64, /* size of prefetch block */
485 6, /* number of parallel prefetches */
486 5, /* Branch cost */
487 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (2), /* cost of FABS instruction. */
491 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
493 };
494
495 static const
496 struct processor_costs pentium4_cost = {
497 COSTS_N_INSNS (1), /* cost of an add instruction */
498 COSTS_N_INSNS (3), /* cost of a lea instruction */
499 COSTS_N_INSNS (4), /* variable shift costs */
500 COSTS_N_INSNS (4), /* constant shift costs */
501 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
502 COSTS_N_INSNS (15), /* HI */
503 COSTS_N_INSNS (15), /* SI */
504 COSTS_N_INSNS (15), /* DI */
505 COSTS_N_INSNS (15)}, /* other */
506 0, /* cost of multiply per each bit set */
507 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
508 COSTS_N_INSNS (56), /* HI */
509 COSTS_N_INSNS (56), /* SI */
510 COSTS_N_INSNS (56), /* DI */
511 COSTS_N_INSNS (56)}, /* other */
512 COSTS_N_INSNS (1), /* cost of movsx */
513 COSTS_N_INSNS (1), /* cost of movzx */
514 16, /* "large" insn */
515 6, /* MOVE_RATIO */
516 2, /* cost for loading QImode using movzbl */
517 {4, 5, 4}, /* cost of loading integer registers
518 in QImode, HImode and SImode.
519 Relative to reg-reg move (2). */
520 {2, 3, 2}, /* cost of storing integer registers */
521 2, /* cost of reg,reg fld/fst */
522 {2, 2, 6}, /* cost of loading fp registers
523 in SFmode, DFmode and XFmode */
524 {4, 4, 6}, /* cost of storing fp registers
525 in SFmode, DFmode and XFmode */
526 2, /* cost of moving MMX register */
527 {2, 2}, /* cost of loading MMX registers
528 in SImode and DImode */
529 {2, 2}, /* cost of storing MMX registers
530 in SImode and DImode */
531 12, /* cost of moving SSE register */
532 {12, 12, 12}, /* cost of loading SSE registers
533 in SImode, DImode and TImode */
534 {2, 2, 8}, /* cost of storing SSE registers
535 in SImode, DImode and TImode */
536 10, /* MMX or SSE register to integer */
537 64, /* size of prefetch block */
538 6, /* number of parallel prefetches */
539 2, /* Branch cost */
540 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
541 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
542 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
543 COSTS_N_INSNS (2), /* cost of FABS instruction. */
544 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
545 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
546 };
547
548 static const
549 struct processor_costs nocona_cost = {
550 COSTS_N_INSNS (1), /* cost of an add instruction */
551 COSTS_N_INSNS (1), /* cost of a lea instruction */
552 COSTS_N_INSNS (1), /* variable shift costs */
553 COSTS_N_INSNS (1), /* constant shift costs */
554 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
555 COSTS_N_INSNS (10), /* HI */
556 COSTS_N_INSNS (10), /* SI */
557 COSTS_N_INSNS (10), /* DI */
558 COSTS_N_INSNS (10)}, /* other */
559 0, /* cost of multiply per each bit set */
560 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
561 COSTS_N_INSNS (66), /* HI */
562 COSTS_N_INSNS (66), /* SI */
563 COSTS_N_INSNS (66), /* DI */
564 COSTS_N_INSNS (66)}, /* other */
565 COSTS_N_INSNS (1), /* cost of movsx */
566 COSTS_N_INSNS (1), /* cost of movzx */
567 16, /* "large" insn */
568 17, /* MOVE_RATIO */
569 4, /* cost for loading QImode using movzbl */
570 {4, 4, 4}, /* cost of loading integer registers
571 in QImode, HImode and SImode.
572 Relative to reg-reg move (2). */
573 {4, 4, 4}, /* cost of storing integer registers */
574 3, /* cost of reg,reg fld/fst */
575 {12, 12, 12}, /* cost of loading fp registers
576 in SFmode, DFmode and XFmode */
577 {4, 4, 4}, /* cost of storing fp registers
578 in SFmode, DFmode and XFmode */
579 6, /* cost of moving MMX register */
580 {12, 12}, /* cost of loading MMX registers
581 in SImode and DImode */
582 {12, 12}, /* cost of storing MMX registers
583 in SImode and DImode */
584 6, /* cost of moving SSE register */
585 {12, 12, 12}, /* cost of loading SSE registers
586 in SImode, DImode and TImode */
587 {12, 12, 12}, /* cost of storing SSE registers
588 in SImode, DImode and TImode */
589 8, /* MMX or SSE register to integer */
590 128, /* size of prefetch block */
591 8, /* number of parallel prefetches */
592 1, /* Branch cost */
593 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
594 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
595 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
596 COSTS_N_INSNS (3), /* cost of FABS instruction. */
597 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
598 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
599 };
600
601 /* Generic64 should produce code tuned for Nocona and K8. */
602 static const
603 struct processor_costs generic64_cost = {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 /* On all chips taken into consideration lea is 2 cycles and more. With
606 this cost however our current implementation of synth_mult results in
607 use of unnecessary temporary registers causing regression on several
608 SPECfp benchmarks. */
609 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (2)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (26), /* HI */
620 COSTS_N_INSNS (42), /* SI */
621 COSTS_N_INSNS (74), /* DI */
622 COSTS_N_INSNS (74)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 17, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {4, 4, 4}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {4, 4, 4}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {12, 12, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {8, 8}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {8, 8}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {8, 8, 8}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {8, 8, 8}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 5, /* MMX or SSE register to integer */
648 64, /* size of prefetch block */
649 6, /* number of parallel prefetches */
650 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
651 is increased to perhaps more appropriate value of 5. */
652 3, /* Branch cost */
653 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (8), /* cost of FABS instruction. */
657 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
659 };
660
661 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
662 static const
663 struct processor_costs generic32_cost = {
664 COSTS_N_INSNS (1), /* cost of an add instruction */
665 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
666 COSTS_N_INSNS (1), /* variable shift costs */
667 COSTS_N_INSNS (1), /* constant shift costs */
668 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
669 COSTS_N_INSNS (4), /* HI */
670 COSTS_N_INSNS (3), /* SI */
671 COSTS_N_INSNS (4), /* DI */
672 COSTS_N_INSNS (2)}, /* other */
673 0, /* cost of multiply per each bit set */
674 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
675 COSTS_N_INSNS (26), /* HI */
676 COSTS_N_INSNS (42), /* SI */
677 COSTS_N_INSNS (74), /* DI */
678 COSTS_N_INSNS (74)}, /* other */
679 COSTS_N_INSNS (1), /* cost of movsx */
680 COSTS_N_INSNS (1), /* cost of movzx */
681 8, /* "large" insn */
682 17, /* MOVE_RATIO */
683 4, /* cost for loading QImode using movzbl */
684 {4, 4, 4}, /* cost of loading integer registers
685 in QImode, HImode and SImode.
686 Relative to reg-reg move (2). */
687 {4, 4, 4}, /* cost of storing integer registers */
688 4, /* cost of reg,reg fld/fst */
689 {12, 12, 12}, /* cost of loading fp registers
690 in SFmode, DFmode and XFmode */
691 {6, 6, 8}, /* cost of storing fp registers
692 in SFmode, DFmode and XFmode */
693 2, /* cost of moving MMX register */
694 {8, 8}, /* cost of loading MMX registers
695 in SImode and DImode */
696 {8, 8}, /* cost of storing MMX registers
697 in SImode and DImode */
698 2, /* cost of moving SSE register */
699 {8, 8, 8}, /* cost of loading SSE registers
700 in SImode, DImode and TImode */
701 {8, 8, 8}, /* cost of storing SSE registers
702 in SImode, DImode and TImode */
703 5, /* MMX or SSE register to integer */
704 64, /* size of prefetch block */
705 6, /* number of parallel prefetches */
706 3, /* Branch cost */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
713 };
714
715 const struct processor_costs *ix86_cost = &pentium_cost;
716
717 /* Processor feature/optimization bitmasks. */
718 #define m_386 (1<<PROCESSOR_I386)
719 #define m_486 (1<<PROCESSOR_I486)
720 #define m_PENT (1<<PROCESSOR_PENTIUM)
721 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
722 #define m_K6 (1<<PROCESSOR_K6)
723 #define m_ATHLON (1<<PROCESSOR_ATHLON)
724 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
725 #define m_K8 (1<<PROCESSOR_K8)
726 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
727 #define m_NOCONA (1<<PROCESSOR_NOCONA)
728 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
729 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
730 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
731
732 /* Generic instruction choice should be common subset of supported CPUs
733 (PPro/PENT4/NOCONA/Athlon/K8). */
734
735 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
736 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
737 generic because it is not working well with PPro base chips. */
738 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
739 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
740 const int x86_zero_extend_with_and = m_486 | m_PENT;
741 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
742 const int x86_double_with_add = ~m_386;
743 const int x86_use_bit_test = m_386;
744 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
745 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
746 const int x86_fisttp = m_NOCONA;
747 const int x86_3dnow_a = m_ATHLON_K8;
748 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
749 /* Branch hints were put in P4 based on simulation result. But
750 after P4 was made, no performance benefit was observed with
751 branch hints. It also increases the code size. As the result,
752 icc never generates branch hints. */
753 const int x86_branch_hints = 0;
754 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
755 /* We probably ought to watch for partial register stalls on Generic32
756 compilation setting as well. However in current implementation the
757 partial register stalls are not eliminated very well - they can
758 be introduced via subregs synthesized by combine and can happen
759 in caller/callee saving sequences.
760 Because this option pays back little on PPro based chips and is in conflict
761 with partial reg. dependencies used by Athlon/P4 based chips, it is better
762 to leave it off for generic32 for now. */
763 const int x86_partial_reg_stall = m_PPRO;
764 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
765 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
766 const int x86_use_mov0 = m_K6;
767 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
768 const int x86_read_modify_write = ~m_PENT;
769 const int x86_read_modify = ~(m_PENT | m_PPRO);
770 const int x86_split_long_moves = m_PPRO;
771 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
772 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
773 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
774 const int x86_qimode_math = ~(0);
775 const int x86_promote_qi_regs = 0;
776 /* On PPro this flag is meant to avoid partial register stalls. Just like
777 the x86_partial_reg_stall this option might be considered for Generic32
778 if our scheme for avoiding partial stalls was more effective. */
779 const int x86_himode_math = ~(m_PPRO);
780 const int x86_promote_hi_regs = m_PPRO;
781 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
782 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
783 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
784 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
785 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
786 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
787 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
788 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
789 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
790 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
791 const int x86_shift1 = ~m_486;
792 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
793 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
794 that thread 128bit SSE registers as single units versus K8 based chips that
795 divide SSE registers to two 64bit halves.
796 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
797 to allow register renaming on 128bit SSE units, but usually results in one
798 extra microop on 64bit SSE units. Experimental results shows that disabling
799 this option on P4 brings over 20% SPECfp regression, while enabling it on
800 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
801 of moves. */
802 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
803 /* Set for machines where the type and dependencies are resolved on SSE
804 register parts instead of whole registers, so we may maintain just
805 lower part of scalar values in proper format leaving the upper part
806 undefined. */
807 const int x86_sse_split_regs = m_ATHLON_K8;
808 const int x86_sse_typeless_stores = m_ATHLON_K8;
809 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
810 const int x86_use_ffreep = m_ATHLON_K8;
811 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
812 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
813
814 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
815 integer data in xmm registers. Which results in pretty abysmal code. */
816 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
817
818 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
819 /* Some CPU cores are not able to predict more than 4 branch instructions in
820 the 16 byte window. */
821 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
822 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
823 const int x86_use_bt = m_ATHLON_K8;
824 /* Compare and exchange was added for 80486. */
825 const int x86_cmpxchg = ~m_386;
826 /* Compare and exchange 8 bytes was added for pentium. */
827 const int x86_cmpxchg8b = ~(m_386 | m_486);
828 /* Compare and exchange 16 bytes was added for nocona. */
829 const int x86_cmpxchg16b = m_NOCONA;
830 /* Exchange and add was added for 80486. */
831 const int x86_xadd = ~m_386;
832 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
833
834 /* In case the average insn count for single function invocation is
835 lower than this constant, emit fast (but longer) prologue and
836 epilogue code. */
837 #define FAST_PROLOGUE_INSN_COUNT 20
838
839 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
840 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
841 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
842 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
843
844 /* Array of the smallest class containing reg number REGNO, indexed by
845 REGNO. Used by REGNO_REG_CLASS in i386.h. */
846
847 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
848 {
849 /* ax, dx, cx, bx */
850 AREG, DREG, CREG, BREG,
851 /* si, di, bp, sp */
852 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
853 /* FP registers */
854 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
855 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
856 /* arg pointer */
857 NON_Q_REGS,
858 /* flags, fpsr, dirflag, frame */
859 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
860 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
861 SSE_REGS, SSE_REGS,
862 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
863 MMX_REGS, MMX_REGS,
864 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
865 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
866 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
867 SSE_REGS, SSE_REGS,
868 };
869
870 /* The "default" register map used in 32bit mode. */
871
872 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
873 {
874 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
875 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
876 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
877 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
878 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
879 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
880 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
881 };
882
883 static int const x86_64_int_parameter_registers[6] =
884 {
885 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
886 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
887 };
888
889 static int const x86_64_int_return_registers[4] =
890 {
891 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
892 };
893
894 /* The "default" register map used in 64bit mode. */
895 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
896 {
897 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
898 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
899 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
900 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
901 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
902 8,9,10,11,12,13,14,15, /* extended integer registers */
903 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
904 };
905
906 /* Define the register numbers to be used in Dwarf debugging information.
907 The SVR4 reference port C compiler uses the following register numbers
908 in its Dwarf output code:
909 0 for %eax (gcc regno = 0)
910 1 for %ecx (gcc regno = 2)
911 2 for %edx (gcc regno = 1)
912 3 for %ebx (gcc regno = 3)
913 4 for %esp (gcc regno = 7)
914 5 for %ebp (gcc regno = 6)
915 6 for %esi (gcc regno = 4)
916 7 for %edi (gcc regno = 5)
917 The following three DWARF register numbers are never generated by
918 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
919 believes these numbers have these meanings.
920 8 for %eip (no gcc equivalent)
921 9 for %eflags (gcc regno = 17)
922 10 for %trapno (no gcc equivalent)
923 It is not at all clear how we should number the FP stack registers
924 for the x86 architecture. If the version of SDB on x86/svr4 were
925 a bit less brain dead with respect to floating-point then we would
926 have a precedent to follow with respect to DWARF register numbers
927 for x86 FP registers, but the SDB on x86/svr4 is so completely
928 broken with respect to FP registers that it is hardly worth thinking
929 of it as something to strive for compatibility with.
930 The version of x86/svr4 SDB I have at the moment does (partially)
931 seem to believe that DWARF register number 11 is associated with
932 the x86 register %st(0), but that's about all. Higher DWARF
933 register numbers don't seem to be associated with anything in
934 particular, and even for DWARF regno 11, SDB only seems to under-
935 stand that it should say that a variable lives in %st(0) (when
936 asked via an `=' command) if we said it was in DWARF regno 11,
937 but SDB still prints garbage when asked for the value of the
938 variable in question (via a `/' command).
939 (Also note that the labels SDB prints for various FP stack regs
940 when doing an `x' command are all wrong.)
941 Note that these problems generally don't affect the native SVR4
942 C compiler because it doesn't allow the use of -O with -g and
943 because when it is *not* optimizing, it allocates a memory
944 location for each floating-point variable, and the memory
945 location is what gets described in the DWARF AT_location
946 attribute for the variable in question.
947 Regardless of the severe mental illness of the x86/svr4 SDB, we
948 do something sensible here and we use the following DWARF
949 register numbers. Note that these are all stack-top-relative
950 numbers.
951 11 for %st(0) (gcc regno = 8)
952 12 for %st(1) (gcc regno = 9)
953 13 for %st(2) (gcc regno = 10)
954 14 for %st(3) (gcc regno = 11)
955 15 for %st(4) (gcc regno = 12)
956 16 for %st(5) (gcc regno = 13)
957 17 for %st(6) (gcc regno = 14)
958 18 for %st(7) (gcc regno = 15)
959 */
960 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
961 {
962 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
963 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
964 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
965 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
966 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
967 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
968 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
969 };
970
971 /* Test and compare insns in i386.md store the information needed to
972 generate branch and scc insns here. */
973
974 rtx ix86_compare_op0 = NULL_RTX;
975 rtx ix86_compare_op1 = NULL_RTX;
976 rtx ix86_compare_emitted = NULL_RTX;
977
978 /* Size of the register save area. */
979 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
980
981 /* Define the structure for the machine field in struct function. */
982
983 struct stack_local_entry GTY(())
984 {
985 unsigned short mode;
986 unsigned short n;
987 rtx rtl;
988 struct stack_local_entry *next;
989 };
990
991 /* Structure describing stack frame layout.
992 Stack grows downward:
993
994 [arguments]
995 <- ARG_POINTER
996 saved pc
997
998 saved frame pointer if frame_pointer_needed
999 <- HARD_FRAME_POINTER
1000 [saved regs]
1001
1002 [padding1] \
1003 )
1004 [va_arg registers] (
1005 > to_allocate <- FRAME_POINTER
1006 [frame] (
1007 )
1008 [padding2] /
1009 */
1010 struct ix86_frame
1011 {
1012 int nregs;
1013 int padding1;
1014 int va_arg_size;
1015 HOST_WIDE_INT frame;
1016 int padding2;
1017 int outgoing_arguments_size;
1018 int red_zone_size;
1019
1020 HOST_WIDE_INT to_allocate;
1021 /* The offsets relative to ARG_POINTER. */
1022 HOST_WIDE_INT frame_pointer_offset;
1023 HOST_WIDE_INT hard_frame_pointer_offset;
1024 HOST_WIDE_INT stack_pointer_offset;
1025
1026 /* When save_regs_using_mov is set, emit prologue using
1027 move instead of push instructions. */
1028 bool save_regs_using_mov;
1029 };
1030
1031 /* Code model option. */
1032 enum cmodel ix86_cmodel;
1033 /* Asm dialect. */
1034 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1035 /* TLS dialects. */
1036 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1037
1038 /* Which unit we are generating floating point math for. */
1039 enum fpmath_unit ix86_fpmath;
1040
1041 /* Which cpu are we scheduling for. */
1042 enum processor_type ix86_tune;
1043 /* Which instruction set architecture to use. */
1044 enum processor_type ix86_arch;
1045
1046 /* true if sse prefetch instruction is not NOOP. */
1047 int x86_prefetch_sse;
1048
1049 /* ix86_regparm_string as a number */
1050 static int ix86_regparm;
1051
1052 /* -mstackrealign option */
1053 extern int ix86_force_align_arg_pointer;
1054 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1055
1056 /* Preferred alignment for stack boundary in bits. */
1057 unsigned int ix86_preferred_stack_boundary;
1058
1059 /* Values 1-5: see jump.c */
1060 int ix86_branch_cost;
1061
1062 /* Variables which are this size or smaller are put in the data/bss
1063 or ldata/lbss sections. */
1064
1065 int ix86_section_threshold = 65536;
1066
1067 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1068 char internal_label_prefix[16];
1069 int internal_label_prefix_len;
1070 \f
1071 static bool ix86_handle_option (size_t, const char *, int);
1072 static void output_pic_addr_const (FILE *, rtx, int);
1073 static void put_condition_code (enum rtx_code, enum machine_mode,
1074 int, int, FILE *);
1075 static const char *get_some_local_dynamic_name (void);
1076 static int get_some_local_dynamic_name_1 (rtx *, void *);
1077 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1078 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1079 rtx *);
1080 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1081 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1082 enum machine_mode);
1083 static rtx get_thread_pointer (int);
1084 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1085 static void get_pc_thunk_name (char [32], unsigned int);
1086 static rtx gen_push (rtx);
1087 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1088 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1089 static struct machine_function * ix86_init_machine_status (void);
1090 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1091 static int ix86_nsaved_regs (void);
1092 static void ix86_emit_save_regs (void);
1093 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1094 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1095 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1096 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1097 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1098 static rtx ix86_expand_aligntest (rtx, int);
1099 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1100 static int ix86_issue_rate (void);
1101 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1102 static int ia32_multipass_dfa_lookahead (void);
1103 static void ix86_init_mmx_sse_builtins (void);
1104 static rtx x86_this_parameter (tree);
1105 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1106 HOST_WIDE_INT, tree);
1107 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1108 static void x86_file_start (void);
1109 static void ix86_reorg (void);
1110 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1111 static tree ix86_build_builtin_va_list (void);
1112 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1113 tree, int *, int);
1114 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1115 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1116 static bool ix86_vector_mode_supported_p (enum machine_mode);
1117
1118 static int ix86_address_cost (rtx);
1119 static bool ix86_cannot_force_const_mem (rtx);
1120 static rtx ix86_delegitimize_address (rtx);
1121
1122 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1123
1124 struct builtin_description;
1125 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1126 tree, rtx);
1127 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1128 tree, rtx);
1129 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1130 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1131 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1132 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1133 static rtx safe_vector_operand (rtx, enum machine_mode);
1134 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1135 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1136 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1137 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1138 static int ix86_fp_comparison_cost (enum rtx_code code);
1139 static unsigned int ix86_select_alt_pic_regnum (void);
1140 static int ix86_save_reg (unsigned int, int);
1141 static void ix86_compute_frame_layout (struct ix86_frame *);
1142 static int ix86_comp_type_attributes (tree, tree);
1143 static int ix86_function_regparm (tree, tree);
1144 const struct attribute_spec ix86_attribute_table[];
1145 static bool ix86_function_ok_for_sibcall (tree, tree);
1146 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1147 static int ix86_value_regno (enum machine_mode, tree, tree);
1148 static bool contains_128bit_aligned_vector_p (tree);
1149 static rtx ix86_struct_value_rtx (tree, int);
1150 static bool ix86_ms_bitfield_layout_p (tree);
1151 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1152 static int extended_reg_mentioned_1 (rtx *, void *);
1153 static bool ix86_rtx_costs (rtx, int, int, int *);
1154 static int min_insn_size (rtx);
1155 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1156 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1157 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1158 tree, bool);
1159 static void ix86_init_builtins (void);
1160 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1161 static const char *ix86_mangle_fundamental_type (tree);
1162 static tree ix86_stack_protect_fail (void);
1163 static rtx ix86_internal_arg_pointer (void);
1164 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1165
1166 /* This function is only used on Solaris. */
1167 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1168 ATTRIBUTE_UNUSED;
1169
1170 /* Register class used for passing given 64bit part of the argument.
1171 These represent classes as documented by the PS ABI, with the exception
1172 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1173 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1174
1175 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1176 whenever possible (upper half does contain padding).
1177 */
1178 enum x86_64_reg_class
1179 {
1180 X86_64_NO_CLASS,
1181 X86_64_INTEGER_CLASS,
1182 X86_64_INTEGERSI_CLASS,
1183 X86_64_SSE_CLASS,
1184 X86_64_SSESF_CLASS,
1185 X86_64_SSEDF_CLASS,
1186 X86_64_SSEUP_CLASS,
1187 X86_64_X87_CLASS,
1188 X86_64_X87UP_CLASS,
1189 X86_64_COMPLEX_X87_CLASS,
1190 X86_64_MEMORY_CLASS
1191 };
1192 static const char * const x86_64_reg_class_name[] = {
1193 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1194 "sseup", "x87", "x87up", "cplx87", "no"
1195 };
1196
1197 #define MAX_CLASSES 4
1198
1199 /* Table of constants used by fldpi, fldln2, etc.... */
1200 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1201 static bool ext_80387_constants_init = 0;
1202 static void init_ext_80387_constants (void);
1203 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1204 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1205 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1206 static section *x86_64_elf_select_section (tree decl, int reloc,
1207 unsigned HOST_WIDE_INT align)
1208 ATTRIBUTE_UNUSED;
1209 \f
1210 /* Initialize the GCC target structure. */
1211 #undef TARGET_ATTRIBUTE_TABLE
1212 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1213 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1214 # undef TARGET_MERGE_DECL_ATTRIBUTES
1215 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1216 #endif
1217
1218 #undef TARGET_COMP_TYPE_ATTRIBUTES
1219 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1220
1221 #undef TARGET_INIT_BUILTINS
1222 #define TARGET_INIT_BUILTINS ix86_init_builtins
1223 #undef TARGET_EXPAND_BUILTIN
1224 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1225
1226 #undef TARGET_ASM_FUNCTION_EPILOGUE
1227 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1228
1229 #undef TARGET_ENCODE_SECTION_INFO
1230 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1231 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1232 #else
1233 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1234 #endif
1235
1236 #undef TARGET_ASM_OPEN_PAREN
1237 #define TARGET_ASM_OPEN_PAREN ""
1238 #undef TARGET_ASM_CLOSE_PAREN
1239 #define TARGET_ASM_CLOSE_PAREN ""
1240
1241 #undef TARGET_ASM_ALIGNED_HI_OP
1242 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1243 #undef TARGET_ASM_ALIGNED_SI_OP
1244 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1245 #ifdef ASM_QUAD
1246 #undef TARGET_ASM_ALIGNED_DI_OP
1247 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1248 #endif
1249
1250 #undef TARGET_ASM_UNALIGNED_HI_OP
1251 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1252 #undef TARGET_ASM_UNALIGNED_SI_OP
1253 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1254 #undef TARGET_ASM_UNALIGNED_DI_OP
1255 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1256
1257 #undef TARGET_SCHED_ADJUST_COST
1258 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1259 #undef TARGET_SCHED_ISSUE_RATE
1260 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1261 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1262 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1263 ia32_multipass_dfa_lookahead
1264
1265 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1266 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1267
1268 #ifdef HAVE_AS_TLS
1269 #undef TARGET_HAVE_TLS
1270 #define TARGET_HAVE_TLS true
1271 #endif
1272 #undef TARGET_CANNOT_FORCE_CONST_MEM
1273 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1274 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1275 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1276
1277 #undef TARGET_DELEGITIMIZE_ADDRESS
1278 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1279
1280 #undef TARGET_MS_BITFIELD_LAYOUT_P
1281 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1282
1283 #if TARGET_MACHO
1284 #undef TARGET_BINDS_LOCAL_P
1285 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1286 #endif
1287
1288 #undef TARGET_ASM_OUTPUT_MI_THUNK
1289 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1290 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1291 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1292
1293 #undef TARGET_ASM_FILE_START
1294 #define TARGET_ASM_FILE_START x86_file_start
1295
1296 #undef TARGET_DEFAULT_TARGET_FLAGS
1297 #define TARGET_DEFAULT_TARGET_FLAGS \
1298 (TARGET_DEFAULT \
1299 | TARGET_64BIT_DEFAULT \
1300 | TARGET_SUBTARGET_DEFAULT \
1301 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1302
1303 #undef TARGET_HANDLE_OPTION
1304 #define TARGET_HANDLE_OPTION ix86_handle_option
1305
1306 #undef TARGET_RTX_COSTS
1307 #define TARGET_RTX_COSTS ix86_rtx_costs
1308 #undef TARGET_ADDRESS_COST
1309 #define TARGET_ADDRESS_COST ix86_address_cost
1310
1311 #undef TARGET_FIXED_CONDITION_CODE_REGS
1312 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1313 #undef TARGET_CC_MODES_COMPATIBLE
1314 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1315
1316 #undef TARGET_MACHINE_DEPENDENT_REORG
1317 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1318
1319 #undef TARGET_BUILD_BUILTIN_VA_LIST
1320 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1321
1322 #undef TARGET_MD_ASM_CLOBBERS
1323 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1324
1325 #undef TARGET_PROMOTE_PROTOTYPES
1326 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1327 #undef TARGET_STRUCT_VALUE_RTX
1328 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1329 #undef TARGET_SETUP_INCOMING_VARARGS
1330 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1331 #undef TARGET_MUST_PASS_IN_STACK
1332 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1333 #undef TARGET_PASS_BY_REFERENCE
1334 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1335 #undef TARGET_INTERNAL_ARG_POINTER
1336 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1337 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1338 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1339
1340 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1341 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1342
1343 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1344 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1345
1346 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1347 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1348
1349 #ifdef HAVE_AS_TLS
1350 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1351 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1352 #endif
1353
1354 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1355 #undef TARGET_INSERT_ATTRIBUTES
1356 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1357 #endif
1358
1359 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1360 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1361
1362 #undef TARGET_STACK_PROTECT_FAIL
1363 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1364
1365 #undef TARGET_FUNCTION_VALUE
1366 #define TARGET_FUNCTION_VALUE ix86_function_value
1367
1368 struct gcc_target targetm = TARGET_INITIALIZER;
1369
1370 \f
1371 /* The svr4 ABI for the i386 says that records and unions are returned
1372 in memory. */
1373 #ifndef DEFAULT_PCC_STRUCT_RETURN
1374 #define DEFAULT_PCC_STRUCT_RETURN 1
1375 #endif
1376
1377 /* Implement TARGET_HANDLE_OPTION. */
1378
1379 static bool
1380 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1381 {
1382 switch (code)
1383 {
1384 case OPT_m3dnow:
1385 if (!value)
1386 {
1387 target_flags &= ~MASK_3DNOW_A;
1388 target_flags_explicit |= MASK_3DNOW_A;
1389 }
1390 return true;
1391
1392 case OPT_mmmx:
1393 if (!value)
1394 {
1395 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1396 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1397 }
1398 return true;
1399
1400 case OPT_msse:
1401 if (!value)
1402 {
1403 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1404 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1405 }
1406 return true;
1407
1408 case OPT_msse2:
1409 if (!value)
1410 {
1411 target_flags &= ~MASK_SSE3;
1412 target_flags_explicit |= MASK_SSE3;
1413 }
1414 return true;
1415
1416 default:
1417 return true;
1418 }
1419 }
1420
1421 /* Sometimes certain combinations of command options do not make
1422 sense on a particular target machine. You can define a macro
1423 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1424 defined, is executed once just after all the command options have
1425 been parsed.
1426
1427 Don't use this macro to turn on various extra optimizations for
1428 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1429
1430 void
1431 override_options (void)
1432 {
1433 int i;
1434 int ix86_tune_defaulted = 0;
1435
1436 /* Comes from final.c -- no real reason to change it. */
1437 #define MAX_CODE_ALIGN 16
1438
1439 static struct ptt
1440 {
1441 const struct processor_costs *cost; /* Processor costs */
1442 const int target_enable; /* Target flags to enable. */
1443 const int target_disable; /* Target flags to disable. */
1444 const int align_loop; /* Default alignments. */
1445 const int align_loop_max_skip;
1446 const int align_jump;
1447 const int align_jump_max_skip;
1448 const int align_func;
1449 }
1450 const processor_target_table[PROCESSOR_max] =
1451 {
1452 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1453 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1454 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1455 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1456 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1457 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1458 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1459 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1460 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1461 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1462 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1463 };
1464
1465 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1466 static struct pta
1467 {
1468 const char *const name; /* processor name or nickname. */
1469 const enum processor_type processor;
1470 const enum pta_flags
1471 {
1472 PTA_SSE = 1,
1473 PTA_SSE2 = 2,
1474 PTA_SSE3 = 4,
1475 PTA_MMX = 8,
1476 PTA_PREFETCH_SSE = 16,
1477 PTA_3DNOW = 32,
1478 PTA_3DNOW_A = 64,
1479 PTA_64BIT = 128
1480 } flags;
1481 }
1482 const processor_alias_table[] =
1483 {
1484 {"i386", PROCESSOR_I386, 0},
1485 {"i486", PROCESSOR_I486, 0},
1486 {"i586", PROCESSOR_PENTIUM, 0},
1487 {"pentium", PROCESSOR_PENTIUM, 0},
1488 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1489 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1490 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1491 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1492 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1493 {"i686", PROCESSOR_PENTIUMPRO, 0},
1494 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1495 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1496 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1497 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1498 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1499 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1500 | PTA_MMX | PTA_PREFETCH_SSE},
1501 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1502 | PTA_MMX | PTA_PREFETCH_SSE},
1503 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1504 | PTA_MMX | PTA_PREFETCH_SSE},
1505 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1506 | PTA_MMX | PTA_PREFETCH_SSE},
1507 {"k6", PROCESSOR_K6, PTA_MMX},
1508 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1509 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1510 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1511 | PTA_3DNOW_A},
1512 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1513 | PTA_3DNOW | PTA_3DNOW_A},
1514 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1515 | PTA_3DNOW_A | PTA_SSE},
1516 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1517 | PTA_3DNOW_A | PTA_SSE},
1518 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1519 | PTA_3DNOW_A | PTA_SSE},
1520 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1521 | PTA_SSE | PTA_SSE2 },
1522 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1523 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1524 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1525 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1526 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1527 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1528 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1529 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1530 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1531 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1532 };
1533
1534 int const pta_size = ARRAY_SIZE (processor_alias_table);
1535
1536 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1537 SUBTARGET_OVERRIDE_OPTIONS;
1538 #endif
1539
1540 /* Set the default values for switches whose default depends on TARGET_64BIT
1541 in case they weren't overwritten by command line options. */
1542 if (TARGET_64BIT)
1543 {
1544 if (flag_omit_frame_pointer == 2)
1545 flag_omit_frame_pointer = 1;
1546 if (flag_asynchronous_unwind_tables == 2)
1547 flag_asynchronous_unwind_tables = 1;
1548 if (flag_pcc_struct_return == 2)
1549 flag_pcc_struct_return = 0;
1550 }
1551 else
1552 {
1553 if (flag_omit_frame_pointer == 2)
1554 flag_omit_frame_pointer = 0;
1555 if (flag_asynchronous_unwind_tables == 2)
1556 flag_asynchronous_unwind_tables = 0;
1557 if (flag_pcc_struct_return == 2)
1558 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1559 }
1560
1561 /* Need to check -mtune=generic first. */
1562 if (ix86_tune_string)
1563 {
1564 if (!strcmp (ix86_tune_string, "generic")
1565 || !strcmp (ix86_tune_string, "i686")
1566 /* As special support for cross compilers we read -mtune=native
1567 as -mtune=generic. With native compilers we won't see the
1568 -mtune=native, as it was changed by the driver. */
1569 || !strcmp (ix86_tune_string, "native"))
1570 {
1571 if (TARGET_64BIT)
1572 ix86_tune_string = "generic64";
1573 else
1574 ix86_tune_string = "generic32";
1575 }
1576 else if (!strncmp (ix86_tune_string, "generic", 7))
1577 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1578 }
1579 else
1580 {
1581 if (ix86_arch_string)
1582 ix86_tune_string = ix86_arch_string;
1583 if (!ix86_tune_string)
1584 {
1585 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1586 ix86_tune_defaulted = 1;
1587 }
1588
1589 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1590 need to use a sensible tune option. */
1591 if (!strcmp (ix86_tune_string, "generic")
1592 || !strcmp (ix86_tune_string, "x86-64")
1593 || !strcmp (ix86_tune_string, "i686"))
1594 {
1595 if (TARGET_64BIT)
1596 ix86_tune_string = "generic64";
1597 else
1598 ix86_tune_string = "generic32";
1599 }
1600 }
1601 if (!strcmp (ix86_tune_string, "x86-64"))
1602 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1603 "-mtune=generic instead as appropriate.");
1604
1605 if (!ix86_arch_string)
1606 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1607 if (!strcmp (ix86_arch_string, "generic"))
1608 error ("generic CPU can be used only for -mtune= switch");
1609 if (!strncmp (ix86_arch_string, "generic", 7))
1610 error ("bad value (%s) for -march= switch", ix86_arch_string);
1611
1612 if (ix86_cmodel_string != 0)
1613 {
1614 if (!strcmp (ix86_cmodel_string, "small"))
1615 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1616 else if (!strcmp (ix86_cmodel_string, "medium"))
1617 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1618 else if (flag_pic)
1619 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1620 else if (!strcmp (ix86_cmodel_string, "32"))
1621 ix86_cmodel = CM_32;
1622 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1623 ix86_cmodel = CM_KERNEL;
1624 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1625 ix86_cmodel = CM_LARGE;
1626 else
1627 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1628 }
1629 else
1630 {
1631 ix86_cmodel = CM_32;
1632 if (TARGET_64BIT)
1633 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1634 }
1635 if (ix86_asm_string != 0)
1636 {
1637 if (! TARGET_MACHO
1638 && !strcmp (ix86_asm_string, "intel"))
1639 ix86_asm_dialect = ASM_INTEL;
1640 else if (!strcmp (ix86_asm_string, "att"))
1641 ix86_asm_dialect = ASM_ATT;
1642 else
1643 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1644 }
1645 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1646 error ("code model %qs not supported in the %s bit mode",
1647 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1648 if (ix86_cmodel == CM_LARGE)
1649 sorry ("code model %<large%> not supported yet");
1650 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1651 sorry ("%i-bit mode not compiled in",
1652 (target_flags & MASK_64BIT) ? 64 : 32);
1653
1654 for (i = 0; i < pta_size; i++)
1655 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1656 {
1657 ix86_arch = processor_alias_table[i].processor;
1658 /* Default cpu tuning to the architecture. */
1659 ix86_tune = ix86_arch;
1660 if (processor_alias_table[i].flags & PTA_MMX
1661 && !(target_flags_explicit & MASK_MMX))
1662 target_flags |= MASK_MMX;
1663 if (processor_alias_table[i].flags & PTA_3DNOW
1664 && !(target_flags_explicit & MASK_3DNOW))
1665 target_flags |= MASK_3DNOW;
1666 if (processor_alias_table[i].flags & PTA_3DNOW_A
1667 && !(target_flags_explicit & MASK_3DNOW_A))
1668 target_flags |= MASK_3DNOW_A;
1669 if (processor_alias_table[i].flags & PTA_SSE
1670 && !(target_flags_explicit & MASK_SSE))
1671 target_flags |= MASK_SSE;
1672 if (processor_alias_table[i].flags & PTA_SSE2
1673 && !(target_flags_explicit & MASK_SSE2))
1674 target_flags |= MASK_SSE2;
1675 if (processor_alias_table[i].flags & PTA_SSE3
1676 && !(target_flags_explicit & MASK_SSE3))
1677 target_flags |= MASK_SSE3;
1678 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1679 x86_prefetch_sse = true;
1680 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1681 error ("CPU you selected does not support x86-64 "
1682 "instruction set");
1683 break;
1684 }
1685
1686 if (i == pta_size)
1687 error ("bad value (%s) for -march= switch", ix86_arch_string);
1688
1689 for (i = 0; i < pta_size; i++)
1690 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1691 {
1692 ix86_tune = processor_alias_table[i].processor;
1693 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1694 {
1695 if (ix86_tune_defaulted)
1696 {
1697 ix86_tune_string = "x86-64";
1698 for (i = 0; i < pta_size; i++)
1699 if (! strcmp (ix86_tune_string,
1700 processor_alias_table[i].name))
1701 break;
1702 ix86_tune = processor_alias_table[i].processor;
1703 }
1704 else
1705 error ("CPU you selected does not support x86-64 "
1706 "instruction set");
1707 }
1708 /* Intel CPUs have always interpreted SSE prefetch instructions as
1709 NOPs; so, we can enable SSE prefetch instructions even when
1710 -mtune (rather than -march) points us to a processor that has them.
1711 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1712 higher processors. */
1713 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1714 x86_prefetch_sse = true;
1715 break;
1716 }
1717 if (i == pta_size)
1718 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1719
1720 if (optimize_size)
1721 ix86_cost = &size_cost;
1722 else
1723 ix86_cost = processor_target_table[ix86_tune].cost;
1724 target_flags |= processor_target_table[ix86_tune].target_enable;
1725 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1726
1727 /* Arrange to set up i386_stack_locals for all functions. */
1728 init_machine_status = ix86_init_machine_status;
1729
1730 /* Validate -mregparm= value. */
1731 if (ix86_regparm_string)
1732 {
1733 i = atoi (ix86_regparm_string);
1734 if (i < 0 || i > REGPARM_MAX)
1735 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1736 else
1737 ix86_regparm = i;
1738 }
1739 else
1740 if (TARGET_64BIT)
1741 ix86_regparm = REGPARM_MAX;
1742
1743 /* If the user has provided any of the -malign-* options,
1744 warn and use that value only if -falign-* is not set.
1745 Remove this code in GCC 3.2 or later. */
1746 if (ix86_align_loops_string)
1747 {
1748 warning (0, "-malign-loops is obsolete, use -falign-loops");
1749 if (align_loops == 0)
1750 {
1751 i = atoi (ix86_align_loops_string);
1752 if (i < 0 || i > MAX_CODE_ALIGN)
1753 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1754 else
1755 align_loops = 1 << i;
1756 }
1757 }
1758
1759 if (ix86_align_jumps_string)
1760 {
1761 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1762 if (align_jumps == 0)
1763 {
1764 i = atoi (ix86_align_jumps_string);
1765 if (i < 0 || i > MAX_CODE_ALIGN)
1766 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1767 else
1768 align_jumps = 1 << i;
1769 }
1770 }
1771
1772 if (ix86_align_funcs_string)
1773 {
1774 warning (0, "-malign-functions is obsolete, use -falign-functions");
1775 if (align_functions == 0)
1776 {
1777 i = atoi (ix86_align_funcs_string);
1778 if (i < 0 || i > MAX_CODE_ALIGN)
1779 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1780 else
1781 align_functions = 1 << i;
1782 }
1783 }
1784
1785 /* Default align_* from the processor table. */
1786 if (align_loops == 0)
1787 {
1788 align_loops = processor_target_table[ix86_tune].align_loop;
1789 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1790 }
1791 if (align_jumps == 0)
1792 {
1793 align_jumps = processor_target_table[ix86_tune].align_jump;
1794 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1795 }
1796 if (align_functions == 0)
1797 {
1798 align_functions = processor_target_table[ix86_tune].align_func;
1799 }
1800
1801 /* Validate -mpreferred-stack-boundary= value, or provide default.
1802 The default of 128 bits is for Pentium III's SSE __m128, but we
1803 don't want additional code to keep the stack aligned when
1804 optimizing for code size. */
1805 ix86_preferred_stack_boundary = ((TARGET_64BIT || TARGET_MACHO || !optimize_size)
1806 ? 128 : 32);
1807 if (ix86_preferred_stack_boundary_string)
1808 {
1809 i = atoi (ix86_preferred_stack_boundary_string);
1810 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1811 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1812 TARGET_64BIT ? 4 : 2);
1813 else
1814 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1815 }
1816
1817 /* Validate -mbranch-cost= value, or provide default. */
1818 ix86_branch_cost = ix86_cost->branch_cost;
1819 if (ix86_branch_cost_string)
1820 {
1821 i = atoi (ix86_branch_cost_string);
1822 if (i < 0 || i > 5)
1823 error ("-mbranch-cost=%d is not between 0 and 5", i);
1824 else
1825 ix86_branch_cost = i;
1826 }
1827 if (ix86_section_threshold_string)
1828 {
1829 i = atoi (ix86_section_threshold_string);
1830 if (i < 0)
1831 error ("-mlarge-data-threshold=%d is negative", i);
1832 else
1833 ix86_section_threshold = i;
1834 }
1835
1836 if (ix86_tls_dialect_string)
1837 {
1838 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1839 ix86_tls_dialect = TLS_DIALECT_GNU;
1840 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1841 ix86_tls_dialect = TLS_DIALECT_GNU2;
1842 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1843 ix86_tls_dialect = TLS_DIALECT_SUN;
1844 else
1845 error ("bad value (%s) for -mtls-dialect= switch",
1846 ix86_tls_dialect_string);
1847 }
1848
1849 /* Keep nonleaf frame pointers. */
1850 if (flag_omit_frame_pointer)
1851 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1852 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1853 flag_omit_frame_pointer = 1;
1854
1855 /* If we're doing fast math, we don't care about comparison order
1856 wrt NaNs. This lets us use a shorter comparison sequence. */
1857 if (flag_unsafe_math_optimizations)
1858 target_flags &= ~MASK_IEEE_FP;
1859
1860 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1861 since the insns won't need emulation. */
1862 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1863 target_flags &= ~MASK_NO_FANCY_MATH_387;
1864
1865 /* Likewise, if the target doesn't have a 387, or we've specified
1866 software floating point, don't use 387 inline intrinsics. */
1867 if (!TARGET_80387)
1868 target_flags |= MASK_NO_FANCY_MATH_387;
1869
1870 /* Turn on SSE2 builtins for -msse3. */
1871 if (TARGET_SSE3)
1872 target_flags |= MASK_SSE2;
1873
1874 /* Turn on SSE builtins for -msse2. */
1875 if (TARGET_SSE2)
1876 target_flags |= MASK_SSE;
1877
1878 /* Turn on MMX builtins for -msse. */
1879 if (TARGET_SSE)
1880 {
1881 target_flags |= MASK_MMX & ~target_flags_explicit;
1882 x86_prefetch_sse = true;
1883 }
1884
1885 /* Turn on MMX builtins for 3Dnow. */
1886 if (TARGET_3DNOW)
1887 target_flags |= MASK_MMX;
1888
1889 if (TARGET_64BIT)
1890 {
1891 if (TARGET_ALIGN_DOUBLE)
1892 error ("-malign-double makes no sense in the 64bit mode");
1893 if (TARGET_RTD)
1894 error ("-mrtd calling convention not supported in the 64bit mode");
1895
1896 /* Enable by default the SSE and MMX builtins. Do allow the user to
1897 explicitly disable any of these. In particular, disabling SSE and
1898 MMX for kernel code is extremely useful. */
1899 target_flags
1900 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1901 & ~target_flags_explicit);
1902 }
1903 else
1904 {
1905 /* i386 ABI does not specify red zone. It still makes sense to use it
1906 when programmer takes care to stack from being destroyed. */
1907 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1908 target_flags |= MASK_NO_RED_ZONE;
1909 }
1910
1911 /* Accept -msseregparm only if at least SSE support is enabled. */
1912 if (TARGET_SSEREGPARM
1913 && ! TARGET_SSE)
1914 error ("-msseregparm used without SSE enabled");
1915
1916 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1917
1918 if (ix86_fpmath_string != 0)
1919 {
1920 if (! strcmp (ix86_fpmath_string, "387"))
1921 ix86_fpmath = FPMATH_387;
1922 else if (! strcmp (ix86_fpmath_string, "sse"))
1923 {
1924 if (!TARGET_SSE)
1925 {
1926 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1927 ix86_fpmath = FPMATH_387;
1928 }
1929 else
1930 ix86_fpmath = FPMATH_SSE;
1931 }
1932 else if (! strcmp (ix86_fpmath_string, "387,sse")
1933 || ! strcmp (ix86_fpmath_string, "sse,387"))
1934 {
1935 if (!TARGET_SSE)
1936 {
1937 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1938 ix86_fpmath = FPMATH_387;
1939 }
1940 else if (!TARGET_80387)
1941 {
1942 warning (0, "387 instruction set disabled, using SSE arithmetics");
1943 ix86_fpmath = FPMATH_SSE;
1944 }
1945 else
1946 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1947 }
1948 else
1949 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1950 }
1951
1952 /* If the i387 is disabled, then do not return values in it. */
1953 if (!TARGET_80387)
1954 target_flags &= ~MASK_FLOAT_RETURNS;
1955
1956 if ((x86_accumulate_outgoing_args & TUNEMASK)
1957 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1958 && !optimize_size)
1959 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1960
1961 /* ??? Unwind info is not correct around the CFG unless either a frame
1962 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1963 unwind info generation to be aware of the CFG and propagating states
1964 around edges. */
1965 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1966 || flag_exceptions || flag_non_call_exceptions)
1967 && flag_omit_frame_pointer
1968 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1969 {
1970 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1971 warning (0, "unwind tables currently require either a frame pointer "
1972 "or -maccumulate-outgoing-args for correctness");
1973 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1974 }
1975
1976 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1977 {
1978 char *p;
1979 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1980 p = strchr (internal_label_prefix, 'X');
1981 internal_label_prefix_len = p - internal_label_prefix;
1982 *p = '\0';
1983 }
1984
1985 /* When scheduling description is not available, disable scheduler pass
1986 so it won't slow down the compilation and make x87 code slower. */
1987 if (!TARGET_SCHEDULE)
1988 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1989 }
1990 \f
1991 /* switch to the appropriate section for output of DECL.
1992 DECL is either a `VAR_DECL' node or a constant of some sort.
1993 RELOC indicates whether forming the initial value of DECL requires
1994 link-time relocations. */
1995
1996 static section *
1997 x86_64_elf_select_section (tree decl, int reloc,
1998 unsigned HOST_WIDE_INT align)
1999 {
2000 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2001 && ix86_in_large_data_p (decl))
2002 {
2003 const char *sname = NULL;
2004 unsigned int flags = SECTION_WRITE;
2005 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2006 {
2007 case SECCAT_DATA:
2008 sname = ".ldata";
2009 break;
2010 case SECCAT_DATA_REL:
2011 sname = ".ldata.rel";
2012 break;
2013 case SECCAT_DATA_REL_LOCAL:
2014 sname = ".ldata.rel.local";
2015 break;
2016 case SECCAT_DATA_REL_RO:
2017 sname = ".ldata.rel.ro";
2018 break;
2019 case SECCAT_DATA_REL_RO_LOCAL:
2020 sname = ".ldata.rel.ro.local";
2021 break;
2022 case SECCAT_BSS:
2023 sname = ".lbss";
2024 flags |= SECTION_BSS;
2025 break;
2026 case SECCAT_RODATA:
2027 case SECCAT_RODATA_MERGE_STR:
2028 case SECCAT_RODATA_MERGE_STR_INIT:
2029 case SECCAT_RODATA_MERGE_CONST:
2030 sname = ".lrodata";
2031 flags = 0;
2032 break;
2033 case SECCAT_SRODATA:
2034 case SECCAT_SDATA:
2035 case SECCAT_SBSS:
2036 gcc_unreachable ();
2037 case SECCAT_TEXT:
2038 case SECCAT_TDATA:
2039 case SECCAT_TBSS:
2040 /* We don't split these for medium model. Place them into
2041 default sections and hope for best. */
2042 break;
2043 }
2044 if (sname)
2045 {
2046 /* We might get called with string constants, but get_named_section
2047 doesn't like them as they are not DECLs. Also, we need to set
2048 flags in that case. */
2049 if (!DECL_P (decl))
2050 return get_section (sname, flags, NULL);
2051 return get_named_section (decl, sname, reloc);
2052 }
2053 }
2054 return default_elf_select_section (decl, reloc, align);
2055 }
2056
2057 /* Build up a unique section name, expressed as a
2058 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2059 RELOC indicates whether the initial value of EXP requires
2060 link-time relocations. */
2061
2062 static void
2063 x86_64_elf_unique_section (tree decl, int reloc)
2064 {
2065 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2066 && ix86_in_large_data_p (decl))
2067 {
2068 const char *prefix = NULL;
2069 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2070 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2071
2072 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2073 {
2074 case SECCAT_DATA:
2075 case SECCAT_DATA_REL:
2076 case SECCAT_DATA_REL_LOCAL:
2077 case SECCAT_DATA_REL_RO:
2078 case SECCAT_DATA_REL_RO_LOCAL:
2079 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2080 break;
2081 case SECCAT_BSS:
2082 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2083 break;
2084 case SECCAT_RODATA:
2085 case SECCAT_RODATA_MERGE_STR:
2086 case SECCAT_RODATA_MERGE_STR_INIT:
2087 case SECCAT_RODATA_MERGE_CONST:
2088 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2089 break;
2090 case SECCAT_SRODATA:
2091 case SECCAT_SDATA:
2092 case SECCAT_SBSS:
2093 gcc_unreachable ();
2094 case SECCAT_TEXT:
2095 case SECCAT_TDATA:
2096 case SECCAT_TBSS:
2097 /* We don't split these for medium model. Place them into
2098 default sections and hope for best. */
2099 break;
2100 }
2101 if (prefix)
2102 {
2103 const char *name;
2104 size_t nlen, plen;
2105 char *string;
2106 plen = strlen (prefix);
2107
2108 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2109 name = targetm.strip_name_encoding (name);
2110 nlen = strlen (name);
2111
2112 string = alloca (nlen + plen + 1);
2113 memcpy (string, prefix, plen);
2114 memcpy (string + plen, name, nlen + 1);
2115
2116 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2117 return;
2118 }
2119 }
2120 default_unique_section (decl, reloc);
2121 }
2122
2123 #ifdef COMMON_ASM_OP
2124 /* This says how to output assembler code to declare an
2125 uninitialized external linkage data object.
2126
2127 For medium model x86-64 we need to use .largecomm opcode for
2128 large objects. */
2129 void
2130 x86_elf_aligned_common (FILE *file,
2131 const char *name, unsigned HOST_WIDE_INT size,
2132 int align)
2133 {
2134 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2135 && size > (unsigned int)ix86_section_threshold)
2136 fprintf (file, ".largecomm\t");
2137 else
2138 fprintf (file, "%s", COMMON_ASM_OP);
2139 assemble_name (file, name);
2140 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2141 size, align / BITS_PER_UNIT);
2142 }
2143
2144 /* Utility function for targets to use in implementing
2145 ASM_OUTPUT_ALIGNED_BSS. */
2146
2147 void
2148 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2149 const char *name, unsigned HOST_WIDE_INT size,
2150 int align)
2151 {
2152 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2153 && size > (unsigned int)ix86_section_threshold)
2154 switch_to_section (get_named_section (decl, ".lbss", 0));
2155 else
2156 switch_to_section (bss_section);
2157 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2158 #ifdef ASM_DECLARE_OBJECT_NAME
2159 last_assemble_variable_decl = decl;
2160 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2161 #else
2162 /* Standard thing is just output label for the object. */
2163 ASM_OUTPUT_LABEL (file, name);
2164 #endif /* ASM_DECLARE_OBJECT_NAME */
2165 ASM_OUTPUT_SKIP (file, size ? size : 1);
2166 }
2167 #endif
2168 \f
2169 void
2170 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2171 {
2172 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2173 make the problem with not enough registers even worse. */
2174 #ifdef INSN_SCHEDULING
2175 if (level > 1)
2176 flag_schedule_insns = 0;
2177 #endif
2178
2179 if (TARGET_MACHO)
2180 /* The Darwin libraries never set errno, so we might as well
2181 avoid calling them when that's the only reason we would. */
2182 flag_errno_math = 0;
2183
2184 /* The default values of these switches depend on the TARGET_64BIT
2185 that is not known at this moment. Mark these values with 2 and
2186 let user the to override these. In case there is no command line option
2187 specifying them, we will set the defaults in override_options. */
2188 if (optimize >= 1)
2189 flag_omit_frame_pointer = 2;
2190 flag_pcc_struct_return = 2;
2191 flag_asynchronous_unwind_tables = 2;
2192 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2193 SUBTARGET_OPTIMIZATION_OPTIONS;
2194 #endif
2195 }
2196 \f
2197 /* Table of valid machine attributes. */
2198 const struct attribute_spec ix86_attribute_table[] =
2199 {
2200 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2201 /* Stdcall attribute says callee is responsible for popping arguments
2202 if they are not variable. */
2203 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2204 /* Fastcall attribute says callee is responsible for popping arguments
2205 if they are not variable. */
2206 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2207 /* Cdecl attribute says the callee is a normal C declaration */
2208 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2209 /* Regparm attribute specifies how many integer arguments are to be
2210 passed in registers. */
2211 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2212 /* Sseregparm attribute says we are using x86_64 calling conventions
2213 for FP arguments. */
2214 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2215 /* force_align_arg_pointer says this function realigns the stack at entry. */
2216 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2217 false, true, true, ix86_handle_cconv_attribute },
2218 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2219 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2220 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2221 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2222 #endif
2223 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2224 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2225 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2226 SUBTARGET_ATTRIBUTE_TABLE,
2227 #endif
2228 { NULL, 0, 0, false, false, false, NULL }
2229 };
2230
2231 /* Decide whether we can make a sibling call to a function. DECL is the
2232 declaration of the function being targeted by the call and EXP is the
2233 CALL_EXPR representing the call. */
2234
2235 static bool
2236 ix86_function_ok_for_sibcall (tree decl, tree exp)
2237 {
2238 tree func;
2239 rtx a, b;
2240
2241 /* If we are generating position-independent code, we cannot sibcall
2242 optimize any indirect call, or a direct call to a global function,
2243 as the PLT requires %ebx be live. */
2244 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2245 return false;
2246
2247 if (decl)
2248 func = decl;
2249 else
2250 {
2251 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2252 if (POINTER_TYPE_P (func))
2253 func = TREE_TYPE (func);
2254 }
2255
2256 /* Check that the return value locations are the same. Like
2257 if we are returning floats on the 80387 register stack, we cannot
2258 make a sibcall from a function that doesn't return a float to a
2259 function that does or, conversely, from a function that does return
2260 a float to a function that doesn't; the necessary stack adjustment
2261 would not be executed. This is also the place we notice
2262 differences in the return value ABI. Note that it is ok for one
2263 of the functions to have void return type as long as the return
2264 value of the other is passed in a register. */
2265 a = ix86_function_value (TREE_TYPE (exp), func, false);
2266 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2267 cfun->decl, false);
2268 if (STACK_REG_P (a) || STACK_REG_P (b))
2269 {
2270 if (!rtx_equal_p (a, b))
2271 return false;
2272 }
2273 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2274 ;
2275 else if (!rtx_equal_p (a, b))
2276 return false;
2277
2278 /* If this call is indirect, we'll need to be able to use a call-clobbered
2279 register for the address of the target function. Make sure that all
2280 such registers are not used for passing parameters. */
2281 if (!decl && !TARGET_64BIT)
2282 {
2283 tree type;
2284
2285 /* We're looking at the CALL_EXPR, we need the type of the function. */
2286 type = TREE_OPERAND (exp, 0); /* pointer expression */
2287 type = TREE_TYPE (type); /* pointer type */
2288 type = TREE_TYPE (type); /* function type */
2289
2290 if (ix86_function_regparm (type, NULL) >= 3)
2291 {
2292 /* ??? Need to count the actual number of registers to be used,
2293 not the possible number of registers. Fix later. */
2294 return false;
2295 }
2296 }
2297
2298 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2299 /* Dllimport'd functions are also called indirectly. */
2300 if (decl && DECL_DLLIMPORT_P (decl)
2301 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2302 return false;
2303 #endif
2304
2305 /* If we forced aligned the stack, then sibcalling would unalign the
2306 stack, which may break the called function. */
2307 if (cfun->machine->force_align_arg_pointer)
2308 return false;
2309
2310 /* Otherwise okay. That also includes certain types of indirect calls. */
2311 return true;
2312 }
2313
2314 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2315 calling convention attributes;
2316 arguments as in struct attribute_spec.handler. */
2317
2318 static tree
2319 ix86_handle_cconv_attribute (tree *node, tree name,
2320 tree args,
2321 int flags ATTRIBUTE_UNUSED,
2322 bool *no_add_attrs)
2323 {
2324 if (TREE_CODE (*node) != FUNCTION_TYPE
2325 && TREE_CODE (*node) != METHOD_TYPE
2326 && TREE_CODE (*node) != FIELD_DECL
2327 && TREE_CODE (*node) != TYPE_DECL)
2328 {
2329 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2330 IDENTIFIER_POINTER (name));
2331 *no_add_attrs = true;
2332 return NULL_TREE;
2333 }
2334
2335 /* Can combine regparm with all attributes but fastcall. */
2336 if (is_attribute_p ("regparm", name))
2337 {
2338 tree cst;
2339
2340 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2341 {
2342 error ("fastcall and regparm attributes are not compatible");
2343 }
2344
2345 cst = TREE_VALUE (args);
2346 if (TREE_CODE (cst) != INTEGER_CST)
2347 {
2348 warning (OPT_Wattributes,
2349 "%qs attribute requires an integer constant argument",
2350 IDENTIFIER_POINTER (name));
2351 *no_add_attrs = true;
2352 }
2353 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2354 {
2355 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2356 IDENTIFIER_POINTER (name), REGPARM_MAX);
2357 *no_add_attrs = true;
2358 }
2359
2360 if (!TARGET_64BIT
2361 && lookup_attribute (ix86_force_align_arg_pointer_string,
2362 TYPE_ATTRIBUTES (*node))
2363 && compare_tree_int (cst, REGPARM_MAX-1))
2364 {
2365 error ("%s functions limited to %d register parameters",
2366 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2367 }
2368
2369 return NULL_TREE;
2370 }
2371
2372 if (TARGET_64BIT)
2373 {
2374 warning (OPT_Wattributes, "%qs attribute ignored",
2375 IDENTIFIER_POINTER (name));
2376 *no_add_attrs = true;
2377 return NULL_TREE;
2378 }
2379
2380 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2381 if (is_attribute_p ("fastcall", name))
2382 {
2383 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2384 {
2385 error ("fastcall and cdecl attributes are not compatible");
2386 }
2387 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2388 {
2389 error ("fastcall and stdcall attributes are not compatible");
2390 }
2391 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2392 {
2393 error ("fastcall and regparm attributes are not compatible");
2394 }
2395 }
2396
2397 /* Can combine stdcall with fastcall (redundant), regparm and
2398 sseregparm. */
2399 else if (is_attribute_p ("stdcall", name))
2400 {
2401 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2402 {
2403 error ("stdcall and cdecl attributes are not compatible");
2404 }
2405 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2406 {
2407 error ("stdcall and fastcall attributes are not compatible");
2408 }
2409 }
2410
2411 /* Can combine cdecl with regparm and sseregparm. */
2412 else if (is_attribute_p ("cdecl", name))
2413 {
2414 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2415 {
2416 error ("stdcall and cdecl attributes are not compatible");
2417 }
2418 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2419 {
2420 error ("fastcall and cdecl attributes are not compatible");
2421 }
2422 }
2423
2424 /* Can combine sseregparm with all attributes. */
2425
2426 return NULL_TREE;
2427 }
2428
2429 /* Return 0 if the attributes for two types are incompatible, 1 if they
2430 are compatible, and 2 if they are nearly compatible (which causes a
2431 warning to be generated). */
2432
2433 static int
2434 ix86_comp_type_attributes (tree type1, tree type2)
2435 {
2436 /* Check for mismatch of non-default calling convention. */
2437 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2438
2439 if (TREE_CODE (type1) != FUNCTION_TYPE)
2440 return 1;
2441
2442 /* Check for mismatched fastcall/regparm types. */
2443 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2444 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2445 || (ix86_function_regparm (type1, NULL)
2446 != ix86_function_regparm (type2, NULL)))
2447 return 0;
2448
2449 /* Check for mismatched sseregparm types. */
2450 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2451 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2452 return 0;
2453
2454 /* Check for mismatched return types (cdecl vs stdcall). */
2455 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2456 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2457 return 0;
2458
2459 return 1;
2460 }
2461 \f
2462 /* Return the regparm value for a function with the indicated TYPE and DECL.
2463 DECL may be NULL when calling function indirectly
2464 or considering a libcall. */
2465
2466 static int
2467 ix86_function_regparm (tree type, tree decl)
2468 {
2469 tree attr;
2470 int regparm = ix86_regparm;
2471 bool user_convention = false;
2472
2473 if (!TARGET_64BIT)
2474 {
2475 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2476 if (attr)
2477 {
2478 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2479 user_convention = true;
2480 }
2481
2482 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2483 {
2484 regparm = 2;
2485 user_convention = true;
2486 }
2487
2488 /* Use register calling convention for local functions when possible. */
2489 if (!TARGET_64BIT && !user_convention && decl
2490 && flag_unit_at_a_time && !profile_flag)
2491 {
2492 struct cgraph_local_info *i = cgraph_local_info (decl);
2493 if (i && i->local)
2494 {
2495 int local_regparm, globals = 0, regno;
2496
2497 /* Make sure no regparm register is taken by a global register
2498 variable. */
2499 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2500 if (global_regs[local_regparm])
2501 break;
2502 /* We can't use regparm(3) for nested functions as these use
2503 static chain pointer in third argument. */
2504 if (local_regparm == 3
2505 && decl_function_context (decl)
2506 && !DECL_NO_STATIC_CHAIN (decl))
2507 local_regparm = 2;
2508 /* If the function realigns its stackpointer, the
2509 prologue will clobber %ecx. If we've already
2510 generated code for the callee, the callee
2511 DECL_STRUCT_FUNCTION is gone, so we fall back to
2512 scanning the attributes for the self-realigning
2513 property. */
2514 if ((DECL_STRUCT_FUNCTION (decl)
2515 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2516 || (!DECL_STRUCT_FUNCTION (decl)
2517 && lookup_attribute (ix86_force_align_arg_pointer_string,
2518 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2519 local_regparm = 2;
2520 /* Each global register variable increases register preassure,
2521 so the more global reg vars there are, the smaller regparm
2522 optimization use, unless requested by the user explicitly. */
2523 for (regno = 0; regno < 6; regno++)
2524 if (global_regs[regno])
2525 globals++;
2526 local_regparm
2527 = globals < local_regparm ? local_regparm - globals : 0;
2528
2529 if (local_regparm > regparm)
2530 regparm = local_regparm;
2531 }
2532 }
2533 }
2534 return regparm;
2535 }
2536
2537 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2538 in SSE registers for a function with the indicated TYPE and DECL.
2539 DECL may be NULL when calling function indirectly
2540 or considering a libcall. Otherwise return 0. */
2541
2542 static int
2543 ix86_function_sseregparm (tree type, tree decl)
2544 {
2545 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2546 by the sseregparm attribute. */
2547 if (TARGET_SSEREGPARM
2548 || (type
2549 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2550 {
2551 if (!TARGET_SSE)
2552 {
2553 if (decl)
2554 error ("Calling %qD with attribute sseregparm without "
2555 "SSE/SSE2 enabled", decl);
2556 else
2557 error ("Calling %qT with attribute sseregparm without "
2558 "SSE/SSE2 enabled", type);
2559 return 0;
2560 }
2561
2562 return 2;
2563 }
2564
2565 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2566 in SSE registers even for 32-bit mode and not just 3, but up to
2567 8 SSE arguments in registers. */
2568 if (!TARGET_64BIT && decl
2569 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2570 {
2571 struct cgraph_local_info *i = cgraph_local_info (decl);
2572 if (i && i->local)
2573 return TARGET_SSE2 ? 2 : 1;
2574 }
2575
2576 return 0;
2577 }
2578
2579 /* Return true if EAX is live at the start of the function. Used by
2580 ix86_expand_prologue to determine if we need special help before
2581 calling allocate_stack_worker. */
2582
2583 static bool
2584 ix86_eax_live_at_start_p (void)
2585 {
2586 /* Cheat. Don't bother working forward from ix86_function_regparm
2587 to the function type to whether an actual argument is located in
2588 eax. Instead just look at cfg info, which is still close enough
2589 to correct at this point. This gives false positives for broken
2590 functions that might use uninitialized data that happens to be
2591 allocated in eax, but who cares? */
2592 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2593 }
2594
2595 /* Value is the number of bytes of arguments automatically
2596 popped when returning from a subroutine call.
2597 FUNDECL is the declaration node of the function (as a tree),
2598 FUNTYPE is the data type of the function (as a tree),
2599 or for a library call it is an identifier node for the subroutine name.
2600 SIZE is the number of bytes of arguments passed on the stack.
2601
2602 On the 80386, the RTD insn may be used to pop them if the number
2603 of args is fixed, but if the number is variable then the caller
2604 must pop them all. RTD can't be used for library calls now
2605 because the library is compiled with the Unix compiler.
2606 Use of RTD is a selectable option, since it is incompatible with
2607 standard Unix calling sequences. If the option is not selected,
2608 the caller must always pop the args.
2609
2610 The attribute stdcall is equivalent to RTD on a per module basis. */
2611
2612 int
2613 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2614 {
2615 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2616
2617 /* Cdecl functions override -mrtd, and never pop the stack. */
2618 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2619
2620 /* Stdcall and fastcall functions will pop the stack if not
2621 variable args. */
2622 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2623 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2624 rtd = 1;
2625
2626 if (rtd
2627 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2628 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2629 == void_type_node)))
2630 return size;
2631 }
2632
2633 /* Lose any fake structure return argument if it is passed on the stack. */
2634 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2635 && !TARGET_64BIT
2636 && !KEEP_AGGREGATE_RETURN_POINTER)
2637 {
2638 int nregs = ix86_function_regparm (funtype, fundecl);
2639
2640 if (!nregs)
2641 return GET_MODE_SIZE (Pmode);
2642 }
2643
2644 return 0;
2645 }
2646 \f
2647 /* Argument support functions. */
2648
2649 /* Return true when register may be used to pass function parameters. */
2650 bool
2651 ix86_function_arg_regno_p (int regno)
2652 {
2653 int i;
2654 if (!TARGET_64BIT)
2655 return (regno < REGPARM_MAX
2656 || (TARGET_MMX && MMX_REGNO_P (regno)
2657 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2658 || (TARGET_SSE && SSE_REGNO_P (regno)
2659 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2660
2661 if (TARGET_SSE && SSE_REGNO_P (regno)
2662 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2663 return true;
2664 /* RAX is used as hidden argument to va_arg functions. */
2665 if (!regno)
2666 return true;
2667 for (i = 0; i < REGPARM_MAX; i++)
2668 if (regno == x86_64_int_parameter_registers[i])
2669 return true;
2670 return false;
2671 }
2672
2673 /* Return if we do not know how to pass TYPE solely in registers. */
2674
2675 static bool
2676 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2677 {
2678 if (must_pass_in_stack_var_size_or_pad (mode, type))
2679 return true;
2680
2681 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2682 The layout_type routine is crafty and tries to trick us into passing
2683 currently unsupported vector types on the stack by using TImode. */
2684 return (!TARGET_64BIT && mode == TImode
2685 && type && TREE_CODE (type) != VECTOR_TYPE);
2686 }
2687
2688 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2689 for a call to a function whose data type is FNTYPE.
2690 For a library call, FNTYPE is 0. */
2691
2692 void
2693 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2694 tree fntype, /* tree ptr for function decl */
2695 rtx libname, /* SYMBOL_REF of library name or 0 */
2696 tree fndecl)
2697 {
2698 static CUMULATIVE_ARGS zero_cum;
2699 tree param, next_param;
2700
2701 if (TARGET_DEBUG_ARG)
2702 {
2703 fprintf (stderr, "\ninit_cumulative_args (");
2704 if (fntype)
2705 fprintf (stderr, "fntype code = %s, ret code = %s",
2706 tree_code_name[(int) TREE_CODE (fntype)],
2707 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2708 else
2709 fprintf (stderr, "no fntype");
2710
2711 if (libname)
2712 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2713 }
2714
2715 *cum = zero_cum;
2716
2717 /* Set up the number of registers to use for passing arguments. */
2718 cum->nregs = ix86_regparm;
2719 if (TARGET_SSE)
2720 cum->sse_nregs = SSE_REGPARM_MAX;
2721 if (TARGET_MMX)
2722 cum->mmx_nregs = MMX_REGPARM_MAX;
2723 cum->warn_sse = true;
2724 cum->warn_mmx = true;
2725 cum->maybe_vaarg = false;
2726
2727 /* Use ecx and edx registers if function has fastcall attribute,
2728 else look for regparm information. */
2729 if (fntype && !TARGET_64BIT)
2730 {
2731 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2732 {
2733 cum->nregs = 2;
2734 cum->fastcall = 1;
2735 }
2736 else
2737 cum->nregs = ix86_function_regparm (fntype, fndecl);
2738 }
2739
2740 /* Set up the number of SSE registers used for passing SFmode
2741 and DFmode arguments. Warn for mismatching ABI. */
2742 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2743
2744 /* Determine if this function has variable arguments. This is
2745 indicated by the last argument being 'void_type_mode' if there
2746 are no variable arguments. If there are variable arguments, then
2747 we won't pass anything in registers in 32-bit mode. */
2748
2749 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2750 {
2751 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2752 param != 0; param = next_param)
2753 {
2754 next_param = TREE_CHAIN (param);
2755 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2756 {
2757 if (!TARGET_64BIT)
2758 {
2759 cum->nregs = 0;
2760 cum->sse_nregs = 0;
2761 cum->mmx_nregs = 0;
2762 cum->warn_sse = 0;
2763 cum->warn_mmx = 0;
2764 cum->fastcall = 0;
2765 cum->float_in_sse = 0;
2766 }
2767 cum->maybe_vaarg = true;
2768 }
2769 }
2770 }
2771 if ((!fntype && !libname)
2772 || (fntype && !TYPE_ARG_TYPES (fntype)))
2773 cum->maybe_vaarg = true;
2774
2775 if (TARGET_DEBUG_ARG)
2776 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2777
2778 return;
2779 }
2780
2781 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2782 But in the case of vector types, it is some vector mode.
2783
2784 When we have only some of our vector isa extensions enabled, then there
2785 are some modes for which vector_mode_supported_p is false. For these
2786 modes, the generic vector support in gcc will choose some non-vector mode
2787 in order to implement the type. By computing the natural mode, we'll
2788 select the proper ABI location for the operand and not depend on whatever
2789 the middle-end decides to do with these vector types. */
2790
2791 static enum machine_mode
2792 type_natural_mode (tree type)
2793 {
2794 enum machine_mode mode = TYPE_MODE (type);
2795
2796 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2797 {
2798 HOST_WIDE_INT size = int_size_in_bytes (type);
2799 if ((size == 8 || size == 16)
2800 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2801 && TYPE_VECTOR_SUBPARTS (type) > 1)
2802 {
2803 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2804
2805 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2806 mode = MIN_MODE_VECTOR_FLOAT;
2807 else
2808 mode = MIN_MODE_VECTOR_INT;
2809
2810 /* Get the mode which has this inner mode and number of units. */
2811 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2812 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2813 && GET_MODE_INNER (mode) == innermode)
2814 return mode;
2815
2816 gcc_unreachable ();
2817 }
2818 }
2819
2820 return mode;
2821 }
2822
2823 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2824 this may not agree with the mode that the type system has chosen for the
2825 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2826 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2827
2828 static rtx
2829 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2830 unsigned int regno)
2831 {
2832 rtx tmp;
2833
2834 if (orig_mode != BLKmode)
2835 tmp = gen_rtx_REG (orig_mode, regno);
2836 else
2837 {
2838 tmp = gen_rtx_REG (mode, regno);
2839 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2840 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2841 }
2842
2843 return tmp;
2844 }
2845
2846 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2847 of this code is to classify each 8bytes of incoming argument by the register
2848 class and assign registers accordingly. */
2849
2850 /* Return the union class of CLASS1 and CLASS2.
2851 See the x86-64 PS ABI for details. */
2852
2853 static enum x86_64_reg_class
2854 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2855 {
2856 /* Rule #1: If both classes are equal, this is the resulting class. */
2857 if (class1 == class2)
2858 return class1;
2859
2860 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2861 the other class. */
2862 if (class1 == X86_64_NO_CLASS)
2863 return class2;
2864 if (class2 == X86_64_NO_CLASS)
2865 return class1;
2866
2867 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2868 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2869 return X86_64_MEMORY_CLASS;
2870
2871 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2872 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2873 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2874 return X86_64_INTEGERSI_CLASS;
2875 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2876 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2877 return X86_64_INTEGER_CLASS;
2878
2879 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2880 MEMORY is used. */
2881 if (class1 == X86_64_X87_CLASS
2882 || class1 == X86_64_X87UP_CLASS
2883 || class1 == X86_64_COMPLEX_X87_CLASS
2884 || class2 == X86_64_X87_CLASS
2885 || class2 == X86_64_X87UP_CLASS
2886 || class2 == X86_64_COMPLEX_X87_CLASS)
2887 return X86_64_MEMORY_CLASS;
2888
2889 /* Rule #6: Otherwise class SSE is used. */
2890 return X86_64_SSE_CLASS;
2891 }
2892
2893 /* Classify the argument of type TYPE and mode MODE.
2894 CLASSES will be filled by the register class used to pass each word
2895 of the operand. The number of words is returned. In case the parameter
2896 should be passed in memory, 0 is returned. As a special case for zero
2897 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2898
2899 BIT_OFFSET is used internally for handling records and specifies offset
2900 of the offset in bits modulo 256 to avoid overflow cases.
2901
2902 See the x86-64 PS ABI for details.
2903 */
2904
2905 static int
2906 classify_argument (enum machine_mode mode, tree type,
2907 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2908 {
2909 HOST_WIDE_INT bytes =
2910 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2911 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2912
2913 /* Variable sized entities are always passed/returned in memory. */
2914 if (bytes < 0)
2915 return 0;
2916
2917 if (mode != VOIDmode
2918 && targetm.calls.must_pass_in_stack (mode, type))
2919 return 0;
2920
2921 if (type && AGGREGATE_TYPE_P (type))
2922 {
2923 int i;
2924 tree field;
2925 enum x86_64_reg_class subclasses[MAX_CLASSES];
2926
2927 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2928 if (bytes > 16)
2929 return 0;
2930
2931 for (i = 0; i < words; i++)
2932 classes[i] = X86_64_NO_CLASS;
2933
2934 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2935 signalize memory class, so handle it as special case. */
2936 if (!words)
2937 {
2938 classes[0] = X86_64_NO_CLASS;
2939 return 1;
2940 }
2941
2942 /* Classify each field of record and merge classes. */
2943 switch (TREE_CODE (type))
2944 {
2945 case RECORD_TYPE:
2946 /* For classes first merge in the field of the subclasses. */
2947 if (TYPE_BINFO (type))
2948 {
2949 tree binfo, base_binfo;
2950 int basenum;
2951
2952 for (binfo = TYPE_BINFO (type), basenum = 0;
2953 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2954 {
2955 int num;
2956 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2957 tree type = BINFO_TYPE (base_binfo);
2958
2959 num = classify_argument (TYPE_MODE (type),
2960 type, subclasses,
2961 (offset + bit_offset) % 256);
2962 if (!num)
2963 return 0;
2964 for (i = 0; i < num; i++)
2965 {
2966 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2967 classes[i + pos] =
2968 merge_classes (subclasses[i], classes[i + pos]);
2969 }
2970 }
2971 }
2972 /* And now merge the fields of structure. */
2973 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2974 {
2975 if (TREE_CODE (field) == FIELD_DECL)
2976 {
2977 int num;
2978
2979 if (TREE_TYPE (field) == error_mark_node)
2980 continue;
2981
2982 /* Bitfields are always classified as integer. Handle them
2983 early, since later code would consider them to be
2984 misaligned integers. */
2985 if (DECL_BIT_FIELD (field))
2986 {
2987 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2988 i < ((int_bit_position (field) + (bit_offset % 64))
2989 + tree_low_cst (DECL_SIZE (field), 0)
2990 + 63) / 8 / 8; i++)
2991 classes[i] =
2992 merge_classes (X86_64_INTEGER_CLASS,
2993 classes[i]);
2994 }
2995 else
2996 {
2997 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2998 TREE_TYPE (field), subclasses,
2999 (int_bit_position (field)
3000 + bit_offset) % 256);
3001 if (!num)
3002 return 0;
3003 for (i = 0; i < num; i++)
3004 {
3005 int pos =
3006 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3007 classes[i + pos] =
3008 merge_classes (subclasses[i], classes[i + pos]);
3009 }
3010 }
3011 }
3012 }
3013 break;
3014
3015 case ARRAY_TYPE:
3016 /* Arrays are handled as small records. */
3017 {
3018 int num;
3019 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3020 TREE_TYPE (type), subclasses, bit_offset);
3021 if (!num)
3022 return 0;
3023
3024 /* The partial classes are now full classes. */
3025 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3026 subclasses[0] = X86_64_SSE_CLASS;
3027 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3028 subclasses[0] = X86_64_INTEGER_CLASS;
3029
3030 for (i = 0; i < words; i++)
3031 classes[i] = subclasses[i % num];
3032
3033 break;
3034 }
3035 case UNION_TYPE:
3036 case QUAL_UNION_TYPE:
3037 /* Unions are similar to RECORD_TYPE but offset is always 0.
3038 */
3039
3040 /* Unions are not derived. */
3041 gcc_assert (!TYPE_BINFO (type)
3042 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3043 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3044 {
3045 if (TREE_CODE (field) == FIELD_DECL)
3046 {
3047 int num;
3048
3049 if (TREE_TYPE (field) == error_mark_node)
3050 continue;
3051
3052 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3053 TREE_TYPE (field), subclasses,
3054 bit_offset);
3055 if (!num)
3056 return 0;
3057 for (i = 0; i < num; i++)
3058 classes[i] = merge_classes (subclasses[i], classes[i]);
3059 }
3060 }
3061 break;
3062
3063 default:
3064 gcc_unreachable ();
3065 }
3066
3067 /* Final merger cleanup. */
3068 for (i = 0; i < words; i++)
3069 {
3070 /* If one class is MEMORY, everything should be passed in
3071 memory. */
3072 if (classes[i] == X86_64_MEMORY_CLASS)
3073 return 0;
3074
3075 /* The X86_64_SSEUP_CLASS should be always preceded by
3076 X86_64_SSE_CLASS. */
3077 if (classes[i] == X86_64_SSEUP_CLASS
3078 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3079 classes[i] = X86_64_SSE_CLASS;
3080
3081 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3082 if (classes[i] == X86_64_X87UP_CLASS
3083 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3084 classes[i] = X86_64_SSE_CLASS;
3085 }
3086 return words;
3087 }
3088
3089 /* Compute alignment needed. We align all types to natural boundaries with
3090 exception of XFmode that is aligned to 64bits. */
3091 if (mode != VOIDmode && mode != BLKmode)
3092 {
3093 int mode_alignment = GET_MODE_BITSIZE (mode);
3094
3095 if (mode == XFmode)
3096 mode_alignment = 128;
3097 else if (mode == XCmode)
3098 mode_alignment = 256;
3099 if (COMPLEX_MODE_P (mode))
3100 mode_alignment /= 2;
3101 /* Misaligned fields are always returned in memory. */
3102 if (bit_offset % mode_alignment)
3103 return 0;
3104 }
3105
3106 /* for V1xx modes, just use the base mode */
3107 if (VECTOR_MODE_P (mode)
3108 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3109 mode = GET_MODE_INNER (mode);
3110
3111 /* Classification of atomic types. */
3112 switch (mode)
3113 {
3114 case SDmode:
3115 case DDmode:
3116 classes[0] = X86_64_SSE_CLASS;
3117 return 1;
3118 case TDmode:
3119 classes[0] = X86_64_SSE_CLASS;
3120 classes[1] = X86_64_SSEUP_CLASS;
3121 return 2;
3122 case DImode:
3123 case SImode:
3124 case HImode:
3125 case QImode:
3126 case CSImode:
3127 case CHImode:
3128 case CQImode:
3129 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3130 classes[0] = X86_64_INTEGERSI_CLASS;
3131 else
3132 classes[0] = X86_64_INTEGER_CLASS;
3133 return 1;
3134 case CDImode:
3135 case TImode:
3136 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3137 return 2;
3138 case CTImode:
3139 return 0;
3140 case SFmode:
3141 if (!(bit_offset % 64))
3142 classes[0] = X86_64_SSESF_CLASS;
3143 else
3144 classes[0] = X86_64_SSE_CLASS;
3145 return 1;
3146 case DFmode:
3147 classes[0] = X86_64_SSEDF_CLASS;
3148 return 1;
3149 case XFmode:
3150 classes[0] = X86_64_X87_CLASS;
3151 classes[1] = X86_64_X87UP_CLASS;
3152 return 2;
3153 case TFmode:
3154 classes[0] = X86_64_SSE_CLASS;
3155 classes[1] = X86_64_SSEUP_CLASS;
3156 return 2;
3157 case SCmode:
3158 classes[0] = X86_64_SSE_CLASS;
3159 return 1;
3160 case DCmode:
3161 classes[0] = X86_64_SSEDF_CLASS;
3162 classes[1] = X86_64_SSEDF_CLASS;
3163 return 2;
3164 case XCmode:
3165 classes[0] = X86_64_COMPLEX_X87_CLASS;
3166 return 1;
3167 case TCmode:
3168 /* This modes is larger than 16 bytes. */
3169 return 0;
3170 case V4SFmode:
3171 case V4SImode:
3172 case V16QImode:
3173 case V8HImode:
3174 case V2DFmode:
3175 case V2DImode:
3176 classes[0] = X86_64_SSE_CLASS;
3177 classes[1] = X86_64_SSEUP_CLASS;
3178 return 2;
3179 case V2SFmode:
3180 case V2SImode:
3181 case V4HImode:
3182 case V8QImode:
3183 classes[0] = X86_64_SSE_CLASS;
3184 return 1;
3185 case BLKmode:
3186 case VOIDmode:
3187 return 0;
3188 default:
3189 gcc_assert (VECTOR_MODE_P (mode));
3190
3191 if (bytes > 16)
3192 return 0;
3193
3194 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3195
3196 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3197 classes[0] = X86_64_INTEGERSI_CLASS;
3198 else
3199 classes[0] = X86_64_INTEGER_CLASS;
3200 classes[1] = X86_64_INTEGER_CLASS;
3201 return 1 + (bytes > 8);
3202 }
3203 }
3204
3205 /* Examine the argument and return set number of register required in each
3206 class. Return 0 iff parameter should be passed in memory. */
3207 static int
3208 examine_argument (enum machine_mode mode, tree type, int in_return,
3209 int *int_nregs, int *sse_nregs)
3210 {
3211 enum x86_64_reg_class class[MAX_CLASSES];
3212 int n = classify_argument (mode, type, class, 0);
3213
3214 *int_nregs = 0;
3215 *sse_nregs = 0;
3216 if (!n)
3217 return 0;
3218 for (n--; n >= 0; n--)
3219 switch (class[n])
3220 {
3221 case X86_64_INTEGER_CLASS:
3222 case X86_64_INTEGERSI_CLASS:
3223 (*int_nregs)++;
3224 break;
3225 case X86_64_SSE_CLASS:
3226 case X86_64_SSESF_CLASS:
3227 case X86_64_SSEDF_CLASS:
3228 (*sse_nregs)++;
3229 break;
3230 case X86_64_NO_CLASS:
3231 case X86_64_SSEUP_CLASS:
3232 break;
3233 case X86_64_X87_CLASS:
3234 case X86_64_X87UP_CLASS:
3235 if (!in_return)
3236 return 0;
3237 break;
3238 case X86_64_COMPLEX_X87_CLASS:
3239 return in_return ? 2 : 0;
3240 case X86_64_MEMORY_CLASS:
3241 gcc_unreachable ();
3242 }
3243 return 1;
3244 }
3245
3246 /* Construct container for the argument used by GCC interface. See
3247 FUNCTION_ARG for the detailed description. */
3248
3249 static rtx
3250 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3251 tree type, int in_return, int nintregs, int nsseregs,
3252 const int *intreg, int sse_regno)
3253 {
3254 /* The following variables hold the static issued_error state. */
3255 static bool issued_sse_arg_error;
3256 static bool issued_sse_ret_error;
3257 static bool issued_x87_ret_error;
3258
3259 enum machine_mode tmpmode;
3260 int bytes =
3261 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3262 enum x86_64_reg_class class[MAX_CLASSES];
3263 int n;
3264 int i;
3265 int nexps = 0;
3266 int needed_sseregs, needed_intregs;
3267 rtx exp[MAX_CLASSES];
3268 rtx ret;
3269
3270 n = classify_argument (mode, type, class, 0);
3271 if (TARGET_DEBUG_ARG)
3272 {
3273 if (!n)
3274 fprintf (stderr, "Memory class\n");
3275 else
3276 {
3277 fprintf (stderr, "Classes:");
3278 for (i = 0; i < n; i++)
3279 {
3280 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3281 }
3282 fprintf (stderr, "\n");
3283 }
3284 }
3285 if (!n)
3286 return NULL;
3287 if (!examine_argument (mode, type, in_return, &needed_intregs,
3288 &needed_sseregs))
3289 return NULL;
3290 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3291 return NULL;
3292
3293 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3294 some less clueful developer tries to use floating-point anyway. */
3295 if (needed_sseregs && !TARGET_SSE)
3296 {
3297 if (in_return)
3298 {
3299 if (!issued_sse_ret_error)
3300 {
3301 error ("SSE register return with SSE disabled");
3302 issued_sse_ret_error = true;
3303 }
3304 }
3305 else if (!issued_sse_arg_error)
3306 {
3307 error ("SSE register argument with SSE disabled");
3308 issued_sse_arg_error = true;
3309 }
3310 return NULL;
3311 }
3312
3313 /* Likewise, error if the ABI requires us to return values in the
3314 x87 registers and the user specified -mno-80387. */
3315 if (!TARGET_80387 && in_return)
3316 for (i = 0; i < n; i++)
3317 if (class[i] == X86_64_X87_CLASS
3318 || class[i] == X86_64_X87UP_CLASS
3319 || class[i] == X86_64_COMPLEX_X87_CLASS)
3320 {
3321 if (!issued_x87_ret_error)
3322 {
3323 error ("x87 register return with x87 disabled");
3324 issued_x87_ret_error = true;
3325 }
3326 return NULL;
3327 }
3328
3329 /* First construct simple cases. Avoid SCmode, since we want to use
3330 single register to pass this type. */
3331 if (n == 1 && mode != SCmode)
3332 switch (class[0])
3333 {
3334 case X86_64_INTEGER_CLASS:
3335 case X86_64_INTEGERSI_CLASS:
3336 return gen_rtx_REG (mode, intreg[0]);
3337 case X86_64_SSE_CLASS:
3338 case X86_64_SSESF_CLASS:
3339 case X86_64_SSEDF_CLASS:
3340 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3341 case X86_64_X87_CLASS:
3342 case X86_64_COMPLEX_X87_CLASS:
3343 return gen_rtx_REG (mode, FIRST_STACK_REG);
3344 case X86_64_NO_CLASS:
3345 /* Zero sized array, struct or class. */
3346 return NULL;
3347 default:
3348 gcc_unreachable ();
3349 }
3350 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3351 && mode != BLKmode)
3352 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3353 if (n == 2
3354 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3355 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3356 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3357 && class[1] == X86_64_INTEGER_CLASS
3358 && (mode == CDImode || mode == TImode || mode == TFmode)
3359 && intreg[0] + 1 == intreg[1])
3360 return gen_rtx_REG (mode, intreg[0]);
3361
3362 /* Otherwise figure out the entries of the PARALLEL. */
3363 for (i = 0; i < n; i++)
3364 {
3365 switch (class[i])
3366 {
3367 case X86_64_NO_CLASS:
3368 break;
3369 case X86_64_INTEGER_CLASS:
3370 case X86_64_INTEGERSI_CLASS:
3371 /* Merge TImodes on aligned occasions here too. */
3372 if (i * 8 + 8 > bytes)
3373 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3374 else if (class[i] == X86_64_INTEGERSI_CLASS)
3375 tmpmode = SImode;
3376 else
3377 tmpmode = DImode;
3378 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3379 if (tmpmode == BLKmode)
3380 tmpmode = DImode;
3381 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3382 gen_rtx_REG (tmpmode, *intreg),
3383 GEN_INT (i*8));
3384 intreg++;
3385 break;
3386 case X86_64_SSESF_CLASS:
3387 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3388 gen_rtx_REG (SFmode,
3389 SSE_REGNO (sse_regno)),
3390 GEN_INT (i*8));
3391 sse_regno++;
3392 break;
3393 case X86_64_SSEDF_CLASS:
3394 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3395 gen_rtx_REG (DFmode,
3396 SSE_REGNO (sse_regno)),
3397 GEN_INT (i*8));
3398 sse_regno++;
3399 break;
3400 case X86_64_SSE_CLASS:
3401 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3402 tmpmode = TImode;
3403 else
3404 tmpmode = DImode;
3405 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3406 gen_rtx_REG (tmpmode,
3407 SSE_REGNO (sse_regno)),
3408 GEN_INT (i*8));
3409 if (tmpmode == TImode)
3410 i++;
3411 sse_regno++;
3412 break;
3413 default:
3414 gcc_unreachable ();
3415 }
3416 }
3417
3418 /* Empty aligned struct, union or class. */
3419 if (nexps == 0)
3420 return NULL;
3421
3422 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3423 for (i = 0; i < nexps; i++)
3424 XVECEXP (ret, 0, i) = exp [i];
3425 return ret;
3426 }
3427
3428 /* Update the data in CUM to advance over an argument
3429 of mode MODE and data type TYPE.
3430 (TYPE is null for libcalls where that information may not be available.) */
3431
3432 void
3433 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3434 tree type, int named)
3435 {
3436 int bytes =
3437 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3438 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3439
3440 if (type)
3441 mode = type_natural_mode (type);
3442
3443 if (TARGET_DEBUG_ARG)
3444 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3445 "mode=%s, named=%d)\n\n",
3446 words, cum->words, cum->nregs, cum->sse_nregs,
3447 GET_MODE_NAME (mode), named);
3448
3449 if (TARGET_64BIT)
3450 {
3451 int int_nregs, sse_nregs;
3452 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3453 cum->words += words;
3454 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3455 {
3456 cum->nregs -= int_nregs;
3457 cum->sse_nregs -= sse_nregs;
3458 cum->regno += int_nregs;
3459 cum->sse_regno += sse_nregs;
3460 }
3461 else
3462 cum->words += words;
3463 }
3464 else
3465 {
3466 switch (mode)
3467 {
3468 default:
3469 break;
3470
3471 case BLKmode:
3472 if (bytes < 0)
3473 break;
3474 /* FALLTHRU */
3475
3476 case DImode:
3477 case SImode:
3478 case HImode:
3479 case QImode:
3480 cum->words += words;
3481 cum->nregs -= words;
3482 cum->regno += words;
3483
3484 if (cum->nregs <= 0)
3485 {
3486 cum->nregs = 0;
3487 cum->regno = 0;
3488 }
3489 break;
3490
3491 case DFmode:
3492 if (cum->float_in_sse < 2)
3493 break;
3494 case SFmode:
3495 if (cum->float_in_sse < 1)
3496 break;
3497 /* FALLTHRU */
3498
3499 case TImode:
3500 case V16QImode:
3501 case V8HImode:
3502 case V4SImode:
3503 case V2DImode:
3504 case V4SFmode:
3505 case V2DFmode:
3506 if (!type || !AGGREGATE_TYPE_P (type))
3507 {
3508 cum->sse_words += words;
3509 cum->sse_nregs -= 1;
3510 cum->sse_regno += 1;
3511 if (cum->sse_nregs <= 0)
3512 {
3513 cum->sse_nregs = 0;
3514 cum->sse_regno = 0;
3515 }
3516 }
3517 break;
3518
3519 case V8QImode:
3520 case V4HImode:
3521 case V2SImode:
3522 case V2SFmode:
3523 if (!type || !AGGREGATE_TYPE_P (type))
3524 {
3525 cum->mmx_words += words;
3526 cum->mmx_nregs -= 1;
3527 cum->mmx_regno += 1;
3528 if (cum->mmx_nregs <= 0)
3529 {
3530 cum->mmx_nregs = 0;
3531 cum->mmx_regno = 0;
3532 }
3533 }
3534 break;
3535 }
3536 }
3537 }
3538
3539 /* Define where to put the arguments to a function.
3540 Value is zero to push the argument on the stack,
3541 or a hard register in which to store the argument.
3542
3543 MODE is the argument's machine mode.
3544 TYPE is the data type of the argument (as a tree).
3545 This is null for libcalls where that information may
3546 not be available.
3547 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3548 the preceding args and about the function being called.
3549 NAMED is nonzero if this argument is a named parameter
3550 (otherwise it is an extra parameter matching an ellipsis). */
3551
3552 rtx
3553 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3554 tree type, int named)
3555 {
3556 enum machine_mode mode = orig_mode;
3557 rtx ret = NULL_RTX;
3558 int bytes =
3559 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3560 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3561 static bool warnedsse, warnedmmx;
3562
3563 /* To simplify the code below, represent vector types with a vector mode
3564 even if MMX/SSE are not active. */
3565 if (type && TREE_CODE (type) == VECTOR_TYPE)
3566 mode = type_natural_mode (type);
3567
3568 /* Handle a hidden AL argument containing number of registers for varargs
3569 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3570 any AL settings. */
3571 if (mode == VOIDmode)
3572 {
3573 if (TARGET_64BIT)
3574 return GEN_INT (cum->maybe_vaarg
3575 ? (cum->sse_nregs < 0
3576 ? SSE_REGPARM_MAX
3577 : cum->sse_regno)
3578 : -1);
3579 else
3580 return constm1_rtx;
3581 }
3582 if (TARGET_64BIT)
3583 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3584 cum->sse_nregs,
3585 &x86_64_int_parameter_registers [cum->regno],
3586 cum->sse_regno);
3587 else
3588 switch (mode)
3589 {
3590 /* For now, pass fp/complex values on the stack. */
3591 default:
3592 break;
3593
3594 case BLKmode:
3595 if (bytes < 0)
3596 break;
3597 /* FALLTHRU */
3598 case DImode:
3599 case SImode:
3600 case HImode:
3601 case QImode:
3602 if (words <= cum->nregs)
3603 {
3604 int regno = cum->regno;
3605
3606 /* Fastcall allocates the first two DWORD (SImode) or
3607 smaller arguments to ECX and EDX. */
3608 if (cum->fastcall)
3609 {
3610 if (mode == BLKmode || mode == DImode)
3611 break;
3612
3613 /* ECX not EAX is the first allocated register. */
3614 if (regno == 0)
3615 regno = 2;
3616 }
3617 ret = gen_rtx_REG (mode, regno);
3618 }
3619 break;
3620 case DFmode:
3621 if (cum->float_in_sse < 2)
3622 break;
3623 case SFmode:
3624 if (cum->float_in_sse < 1)
3625 break;
3626 /* FALLTHRU */
3627 case TImode:
3628 case V16QImode:
3629 case V8HImode:
3630 case V4SImode:
3631 case V2DImode:
3632 case V4SFmode:
3633 case V2DFmode:
3634 if (!type || !AGGREGATE_TYPE_P (type))
3635 {
3636 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3637 {
3638 warnedsse = true;
3639 warning (0, "SSE vector argument without SSE enabled "
3640 "changes the ABI");
3641 }
3642 if (cum->sse_nregs)
3643 ret = gen_reg_or_parallel (mode, orig_mode,
3644 cum->sse_regno + FIRST_SSE_REG);
3645 }
3646 break;
3647 case V8QImode:
3648 case V4HImode:
3649 case V2SImode:
3650 case V2SFmode:
3651 if (!type || !AGGREGATE_TYPE_P (type))
3652 {
3653 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3654 {
3655 warnedmmx = true;
3656 warning (0, "MMX vector argument without MMX enabled "
3657 "changes the ABI");
3658 }
3659 if (cum->mmx_nregs)
3660 ret = gen_reg_or_parallel (mode, orig_mode,
3661 cum->mmx_regno + FIRST_MMX_REG);
3662 }
3663 break;
3664 }
3665
3666 if (TARGET_DEBUG_ARG)
3667 {
3668 fprintf (stderr,
3669 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3670 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3671
3672 if (ret)
3673 print_simple_rtl (stderr, ret);
3674 else
3675 fprintf (stderr, ", stack");
3676
3677 fprintf (stderr, " )\n");
3678 }
3679
3680 return ret;
3681 }
3682
3683 /* A C expression that indicates when an argument must be passed by
3684 reference. If nonzero for an argument, a copy of that argument is
3685 made in memory and a pointer to the argument is passed instead of
3686 the argument itself. The pointer is passed in whatever way is
3687 appropriate for passing a pointer to that type. */
3688
3689 static bool
3690 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3691 enum machine_mode mode ATTRIBUTE_UNUSED,
3692 tree type, bool named ATTRIBUTE_UNUSED)
3693 {
3694 if (!TARGET_64BIT)
3695 return 0;
3696
3697 if (type && int_size_in_bytes (type) == -1)
3698 {
3699 if (TARGET_DEBUG_ARG)
3700 fprintf (stderr, "function_arg_pass_by_reference\n");
3701 return 1;
3702 }
3703
3704 return 0;
3705 }
3706
3707 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3708 ABI. Only called if TARGET_SSE. */
3709 static bool
3710 contains_128bit_aligned_vector_p (tree type)
3711 {
3712 enum machine_mode mode = TYPE_MODE (type);
3713 if (SSE_REG_MODE_P (mode)
3714 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3715 return true;
3716 if (TYPE_ALIGN (type) < 128)
3717 return false;
3718
3719 if (AGGREGATE_TYPE_P (type))
3720 {
3721 /* Walk the aggregates recursively. */
3722 switch (TREE_CODE (type))
3723 {
3724 case RECORD_TYPE:
3725 case UNION_TYPE:
3726 case QUAL_UNION_TYPE:
3727 {
3728 tree field;
3729
3730 if (TYPE_BINFO (type))
3731 {
3732 tree binfo, base_binfo;
3733 int i;
3734
3735 for (binfo = TYPE_BINFO (type), i = 0;
3736 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3737 if (contains_128bit_aligned_vector_p
3738 (BINFO_TYPE (base_binfo)))
3739 return true;
3740 }
3741 /* And now merge the fields of structure. */
3742 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3743 {
3744 if (TREE_CODE (field) == FIELD_DECL
3745 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3746 return true;
3747 }
3748 break;
3749 }
3750
3751 case ARRAY_TYPE:
3752 /* Just for use if some languages passes arrays by value. */
3753 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3754 return true;
3755 break;
3756
3757 default:
3758 gcc_unreachable ();
3759 }
3760 }
3761 return false;
3762 }
3763
3764 /* Gives the alignment boundary, in bits, of an argument with the
3765 specified mode and type. */
3766
3767 int
3768 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3769 {
3770 int align;
3771 if (type)
3772 align = TYPE_ALIGN (type);
3773 else
3774 align = GET_MODE_ALIGNMENT (mode);
3775 if (align < PARM_BOUNDARY)
3776 align = PARM_BOUNDARY;
3777 if (!TARGET_64BIT)
3778 {
3779 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3780 make an exception for SSE modes since these require 128bit
3781 alignment.
3782
3783 The handling here differs from field_alignment. ICC aligns MMX
3784 arguments to 4 byte boundaries, while structure fields are aligned
3785 to 8 byte boundaries. */
3786 if (!TARGET_SSE)
3787 align = PARM_BOUNDARY;
3788 else if (!type)
3789 {
3790 if (!SSE_REG_MODE_P (mode))
3791 align = PARM_BOUNDARY;
3792 }
3793 else
3794 {
3795 if (!contains_128bit_aligned_vector_p (type))
3796 align = PARM_BOUNDARY;
3797 }
3798 }
3799 if (align > 128)
3800 align = 128;
3801 return align;
3802 }
3803
3804 /* Return true if N is a possible register number of function value. */
3805 bool
3806 ix86_function_value_regno_p (int regno)
3807 {
3808 if (regno == 0
3809 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3810 || (regno == FIRST_SSE_REG && TARGET_SSE))
3811 return true;
3812
3813 if (!TARGET_64BIT
3814 && (regno == FIRST_MMX_REG && TARGET_MMX))
3815 return true;
3816
3817 return false;
3818 }
3819
3820 /* Define how to find the value returned by a function.
3821 VALTYPE is the data type of the value (as a tree).
3822 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3823 otherwise, FUNC is 0. */
3824 rtx
3825 ix86_function_value (tree valtype, tree fntype_or_decl,
3826 bool outgoing ATTRIBUTE_UNUSED)
3827 {
3828 enum machine_mode natmode = type_natural_mode (valtype);
3829
3830 if (TARGET_64BIT)
3831 {
3832 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3833 1, REGPARM_MAX, SSE_REGPARM_MAX,
3834 x86_64_int_return_registers, 0);
3835 /* For zero sized structures, construct_container return NULL, but we
3836 need to keep rest of compiler happy by returning meaningful value. */
3837 if (!ret)
3838 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3839 return ret;
3840 }
3841 else
3842 {
3843 tree fn = NULL_TREE, fntype;
3844 if (fntype_or_decl
3845 && DECL_P (fntype_or_decl))
3846 fn = fntype_or_decl;
3847 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3848 return gen_rtx_REG (TYPE_MODE (valtype),
3849 ix86_value_regno (natmode, fn, fntype));
3850 }
3851 }
3852
3853 /* Return true iff type is returned in memory. */
3854 int
3855 ix86_return_in_memory (tree type)
3856 {
3857 int needed_intregs, needed_sseregs, size;
3858 enum machine_mode mode = type_natural_mode (type);
3859
3860 if (TARGET_64BIT)
3861 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3862
3863 if (mode == BLKmode)
3864 return 1;
3865
3866 size = int_size_in_bytes (type);
3867
3868 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3869 return 0;
3870
3871 if (VECTOR_MODE_P (mode) || mode == TImode)
3872 {
3873 /* User-created vectors small enough to fit in EAX. */
3874 if (size < 8)
3875 return 0;
3876
3877 /* MMX/3dNow values are returned in MM0,
3878 except when it doesn't exits. */
3879 if (size == 8)
3880 return (TARGET_MMX ? 0 : 1);
3881
3882 /* SSE values are returned in XMM0, except when it doesn't exist. */
3883 if (size == 16)
3884 return (TARGET_SSE ? 0 : 1);
3885 }
3886
3887 if (mode == XFmode)
3888 return 0;
3889
3890 if (mode == TDmode)
3891 return 1;
3892
3893 if (size > 12)
3894 return 1;
3895 return 0;
3896 }
3897
3898 /* When returning SSE vector types, we have a choice of either
3899 (1) being abi incompatible with a -march switch, or
3900 (2) generating an error.
3901 Given no good solution, I think the safest thing is one warning.
3902 The user won't be able to use -Werror, but....
3903
3904 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3905 called in response to actually generating a caller or callee that
3906 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3907 via aggregate_value_p for general type probing from tree-ssa. */
3908
3909 static rtx
3910 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3911 {
3912 static bool warnedsse, warnedmmx;
3913
3914 if (type)
3915 {
3916 /* Look at the return type of the function, not the function type. */
3917 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3918
3919 if (!TARGET_SSE && !warnedsse)
3920 {
3921 if (mode == TImode
3922 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3923 {
3924 warnedsse = true;
3925 warning (0, "SSE vector return without SSE enabled "
3926 "changes the ABI");
3927 }
3928 }
3929
3930 if (!TARGET_MMX && !warnedmmx)
3931 {
3932 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3933 {
3934 warnedmmx = true;
3935 warning (0, "MMX vector return without MMX enabled "
3936 "changes the ABI");
3937 }
3938 }
3939 }
3940
3941 return NULL;
3942 }
3943
3944 /* Define how to find the value returned by a library function
3945 assuming the value has mode MODE. */
3946 rtx
3947 ix86_libcall_value (enum machine_mode mode)
3948 {
3949 if (TARGET_64BIT)
3950 {
3951 switch (mode)
3952 {
3953 case SFmode:
3954 case SCmode:
3955 case DFmode:
3956 case DCmode:
3957 case TFmode:
3958 case SDmode:
3959 case DDmode:
3960 case TDmode:
3961 return gen_rtx_REG (mode, FIRST_SSE_REG);
3962 case XFmode:
3963 case XCmode:
3964 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3965 case TCmode:
3966 return NULL;
3967 default:
3968 return gen_rtx_REG (mode, 0);
3969 }
3970 }
3971 else
3972 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3973 }
3974
3975 /* Given a mode, return the register to use for a return value. */
3976
3977 static int
3978 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3979 {
3980 gcc_assert (!TARGET_64BIT);
3981
3982 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3983 we prevent this case when mmx is not available. */
3984 if ((VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8))
3985 return FIRST_MMX_REG;
3986
3987 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3988 we prevent this case when sse is not available. */
3989 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3990 return FIRST_SSE_REG;
3991
3992 /* Decimal floating point values can go in %eax, unlike other float modes. */
3993 if (DECIMAL_FLOAT_MODE_P (mode))
3994 return 0;
3995
3996 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3997 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
3998 return 0;
3999
4000 /* Floating point return values in %st(0), except for local functions when
4001 SSE math is enabled or for functions with sseregparm attribute. */
4002 if ((func || fntype)
4003 && (mode == SFmode || mode == DFmode))
4004 {
4005 int sse_level = ix86_function_sseregparm (fntype, func);
4006 if ((sse_level >= 1 && mode == SFmode)
4007 || (sse_level == 2 && mode == DFmode))
4008 return FIRST_SSE_REG;
4009 }
4010
4011 return FIRST_FLOAT_REG;
4012 }
4013 \f
4014 /* Create the va_list data type. */
4015
4016 static tree
4017 ix86_build_builtin_va_list (void)
4018 {
4019 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4020
4021 /* For i386 we use plain pointer to argument area. */
4022 if (!TARGET_64BIT)
4023 return build_pointer_type (char_type_node);
4024
4025 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4026 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4027
4028 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4029 unsigned_type_node);
4030 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4031 unsigned_type_node);
4032 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4033 ptr_type_node);
4034 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4035 ptr_type_node);
4036
4037 va_list_gpr_counter_field = f_gpr;
4038 va_list_fpr_counter_field = f_fpr;
4039
4040 DECL_FIELD_CONTEXT (f_gpr) = record;
4041 DECL_FIELD_CONTEXT (f_fpr) = record;
4042 DECL_FIELD_CONTEXT (f_ovf) = record;
4043 DECL_FIELD_CONTEXT (f_sav) = record;
4044
4045 TREE_CHAIN (record) = type_decl;
4046 TYPE_NAME (record) = type_decl;
4047 TYPE_FIELDS (record) = f_gpr;
4048 TREE_CHAIN (f_gpr) = f_fpr;
4049 TREE_CHAIN (f_fpr) = f_ovf;
4050 TREE_CHAIN (f_ovf) = f_sav;
4051
4052 layout_type (record);
4053
4054 /* The correct type is an array type of one element. */
4055 return build_array_type (record, build_index_type (size_zero_node));
4056 }
4057
4058 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4059
4060 static void
4061 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4062 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4063 int no_rtl)
4064 {
4065 CUMULATIVE_ARGS next_cum;
4066 rtx save_area = NULL_RTX, mem;
4067 rtx label;
4068 rtx label_ref;
4069 rtx tmp_reg;
4070 rtx nsse_reg;
4071 int set;
4072 tree fntype;
4073 int stdarg_p;
4074 int i;
4075
4076 if (!TARGET_64BIT)
4077 return;
4078
4079 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4080 return;
4081
4082 /* Indicate to allocate space on the stack for varargs save area. */
4083 ix86_save_varrargs_registers = 1;
4084
4085 cfun->stack_alignment_needed = 128;
4086
4087 fntype = TREE_TYPE (current_function_decl);
4088 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4089 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4090 != void_type_node));
4091
4092 /* For varargs, we do not want to skip the dummy va_dcl argument.
4093 For stdargs, we do want to skip the last named argument. */
4094 next_cum = *cum;
4095 if (stdarg_p)
4096 function_arg_advance (&next_cum, mode, type, 1);
4097
4098 if (!no_rtl)
4099 save_area = frame_pointer_rtx;
4100
4101 set = get_varargs_alias_set ();
4102
4103 for (i = next_cum.regno;
4104 i < ix86_regparm
4105 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4106 i++)
4107 {
4108 mem = gen_rtx_MEM (Pmode,
4109 plus_constant (save_area, i * UNITS_PER_WORD));
4110 MEM_NOTRAP_P (mem) = 1;
4111 set_mem_alias_set (mem, set);
4112 emit_move_insn (mem, gen_rtx_REG (Pmode,
4113 x86_64_int_parameter_registers[i]));
4114 }
4115
4116 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4117 {
4118 /* Now emit code to save SSE registers. The AX parameter contains number
4119 of SSE parameter registers used to call this function. We use
4120 sse_prologue_save insn template that produces computed jump across
4121 SSE saves. We need some preparation work to get this working. */
4122
4123 label = gen_label_rtx ();
4124 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4125
4126 /* Compute address to jump to :
4127 label - 5*eax + nnamed_sse_arguments*5 */
4128 tmp_reg = gen_reg_rtx (Pmode);
4129 nsse_reg = gen_reg_rtx (Pmode);
4130 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4131 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4132 gen_rtx_MULT (Pmode, nsse_reg,
4133 GEN_INT (4))));
4134 if (next_cum.sse_regno)
4135 emit_move_insn
4136 (nsse_reg,
4137 gen_rtx_CONST (DImode,
4138 gen_rtx_PLUS (DImode,
4139 label_ref,
4140 GEN_INT (next_cum.sse_regno * 4))));
4141 else
4142 emit_move_insn (nsse_reg, label_ref);
4143 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4144
4145 /* Compute address of memory block we save into. We always use pointer
4146 pointing 127 bytes after first byte to store - this is needed to keep
4147 instruction size limited by 4 bytes. */
4148 tmp_reg = gen_reg_rtx (Pmode);
4149 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4150 plus_constant (save_area,
4151 8 * REGPARM_MAX + 127)));
4152 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4153 MEM_NOTRAP_P (mem) = 1;
4154 set_mem_alias_set (mem, set);
4155 set_mem_align (mem, BITS_PER_WORD);
4156
4157 /* And finally do the dirty job! */
4158 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4159 GEN_INT (next_cum.sse_regno), label));
4160 }
4161
4162 }
4163
4164 /* Implement va_start. */
4165
4166 void
4167 ix86_va_start (tree valist, rtx nextarg)
4168 {
4169 HOST_WIDE_INT words, n_gpr, n_fpr;
4170 tree f_gpr, f_fpr, f_ovf, f_sav;
4171 tree gpr, fpr, ovf, sav, t;
4172 tree type;
4173
4174 /* Only 64bit target needs something special. */
4175 if (!TARGET_64BIT)
4176 {
4177 std_expand_builtin_va_start (valist, nextarg);
4178 return;
4179 }
4180
4181 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4182 f_fpr = TREE_CHAIN (f_gpr);
4183 f_ovf = TREE_CHAIN (f_fpr);
4184 f_sav = TREE_CHAIN (f_ovf);
4185
4186 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4187 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4188 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4189 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4190 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4191
4192 /* Count number of gp and fp argument registers used. */
4193 words = current_function_args_info.words;
4194 n_gpr = current_function_args_info.regno;
4195 n_fpr = current_function_args_info.sse_regno;
4196
4197 if (TARGET_DEBUG_ARG)
4198 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4199 (int) words, (int) n_gpr, (int) n_fpr);
4200
4201 if (cfun->va_list_gpr_size)
4202 {
4203 type = TREE_TYPE (gpr);
4204 t = build2 (MODIFY_EXPR, type, gpr,
4205 build_int_cst (type, n_gpr * 8));
4206 TREE_SIDE_EFFECTS (t) = 1;
4207 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4208 }
4209
4210 if (cfun->va_list_fpr_size)
4211 {
4212 type = TREE_TYPE (fpr);
4213 t = build2 (MODIFY_EXPR, type, fpr,
4214 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4215 TREE_SIDE_EFFECTS (t) = 1;
4216 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4217 }
4218
4219 /* Find the overflow area. */
4220 type = TREE_TYPE (ovf);
4221 t = make_tree (type, virtual_incoming_args_rtx);
4222 if (words != 0)
4223 t = build2 (PLUS_EXPR, type, t,
4224 build_int_cst (type, words * UNITS_PER_WORD));
4225 t = build2 (MODIFY_EXPR, type, ovf, t);
4226 TREE_SIDE_EFFECTS (t) = 1;
4227 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4228
4229 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4230 {
4231 /* Find the register save area.
4232 Prologue of the function save it right above stack frame. */
4233 type = TREE_TYPE (sav);
4234 t = make_tree (type, frame_pointer_rtx);
4235 t = build2 (MODIFY_EXPR, type, sav, t);
4236 TREE_SIDE_EFFECTS (t) = 1;
4237 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4238 }
4239 }
4240
4241 /* Implement va_arg. */
4242
4243 tree
4244 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4245 {
4246 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4247 tree f_gpr, f_fpr, f_ovf, f_sav;
4248 tree gpr, fpr, ovf, sav, t;
4249 int size, rsize;
4250 tree lab_false, lab_over = NULL_TREE;
4251 tree addr, t2;
4252 rtx container;
4253 int indirect_p = 0;
4254 tree ptrtype;
4255 enum machine_mode nat_mode;
4256
4257 /* Only 64bit target needs something special. */
4258 if (!TARGET_64BIT)
4259 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4260
4261 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4262 f_fpr = TREE_CHAIN (f_gpr);
4263 f_ovf = TREE_CHAIN (f_fpr);
4264 f_sav = TREE_CHAIN (f_ovf);
4265
4266 valist = build_va_arg_indirect_ref (valist);
4267 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4268 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4269 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4270 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4271
4272 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4273 if (indirect_p)
4274 type = build_pointer_type (type);
4275 size = int_size_in_bytes (type);
4276 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4277
4278 nat_mode = type_natural_mode (type);
4279 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4280 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4281
4282 /* Pull the value out of the saved registers. */
4283
4284 addr = create_tmp_var (ptr_type_node, "addr");
4285 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4286
4287 if (container)
4288 {
4289 int needed_intregs, needed_sseregs;
4290 bool need_temp;
4291 tree int_addr, sse_addr;
4292
4293 lab_false = create_artificial_label ();
4294 lab_over = create_artificial_label ();
4295
4296 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4297
4298 need_temp = (!REG_P (container)
4299 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4300 || TYPE_ALIGN (type) > 128));
4301
4302 /* In case we are passing structure, verify that it is consecutive block
4303 on the register save area. If not we need to do moves. */
4304 if (!need_temp && !REG_P (container))
4305 {
4306 /* Verify that all registers are strictly consecutive */
4307 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4308 {
4309 int i;
4310
4311 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4312 {
4313 rtx slot = XVECEXP (container, 0, i);
4314 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4315 || INTVAL (XEXP (slot, 1)) != i * 16)
4316 need_temp = 1;
4317 }
4318 }
4319 else
4320 {
4321 int i;
4322
4323 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4324 {
4325 rtx slot = XVECEXP (container, 0, i);
4326 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4327 || INTVAL (XEXP (slot, 1)) != i * 8)
4328 need_temp = 1;
4329 }
4330 }
4331 }
4332 if (!need_temp)
4333 {
4334 int_addr = addr;
4335 sse_addr = addr;
4336 }
4337 else
4338 {
4339 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4340 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4341 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4342 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4343 }
4344
4345 /* First ensure that we fit completely in registers. */
4346 if (needed_intregs)
4347 {
4348 t = build_int_cst (TREE_TYPE (gpr),
4349 (REGPARM_MAX - needed_intregs + 1) * 8);
4350 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4351 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4352 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4353 gimplify_and_add (t, pre_p);
4354 }
4355 if (needed_sseregs)
4356 {
4357 t = build_int_cst (TREE_TYPE (fpr),
4358 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4359 + REGPARM_MAX * 8);
4360 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4361 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4362 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4363 gimplify_and_add (t, pre_p);
4364 }
4365
4366 /* Compute index to start of area used for integer regs. */
4367 if (needed_intregs)
4368 {
4369 /* int_addr = gpr + sav; */
4370 t = fold_convert (ptr_type_node, gpr);
4371 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4372 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4373 gimplify_and_add (t, pre_p);
4374 }
4375 if (needed_sseregs)
4376 {
4377 /* sse_addr = fpr + sav; */
4378 t = fold_convert (ptr_type_node, fpr);
4379 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4380 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4381 gimplify_and_add (t, pre_p);
4382 }
4383 if (need_temp)
4384 {
4385 int i;
4386 tree temp = create_tmp_var (type, "va_arg_tmp");
4387
4388 /* addr = &temp; */
4389 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4390 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4391 gimplify_and_add (t, pre_p);
4392
4393 for (i = 0; i < XVECLEN (container, 0); i++)
4394 {
4395 rtx slot = XVECEXP (container, 0, i);
4396 rtx reg = XEXP (slot, 0);
4397 enum machine_mode mode = GET_MODE (reg);
4398 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4399 tree addr_type = build_pointer_type (piece_type);
4400 tree src_addr, src;
4401 int src_offset;
4402 tree dest_addr, dest;
4403
4404 if (SSE_REGNO_P (REGNO (reg)))
4405 {
4406 src_addr = sse_addr;
4407 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4408 }
4409 else
4410 {
4411 src_addr = int_addr;
4412 src_offset = REGNO (reg) * 8;
4413 }
4414 src_addr = fold_convert (addr_type, src_addr);
4415 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4416 size_int (src_offset)));
4417 src = build_va_arg_indirect_ref (src_addr);
4418
4419 dest_addr = fold_convert (addr_type, addr);
4420 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4421 size_int (INTVAL (XEXP (slot, 1)))));
4422 dest = build_va_arg_indirect_ref (dest_addr);
4423
4424 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4425 gimplify_and_add (t, pre_p);
4426 }
4427 }
4428
4429 if (needed_intregs)
4430 {
4431 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4432 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4433 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4434 gimplify_and_add (t, pre_p);
4435 }
4436 if (needed_sseregs)
4437 {
4438 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4439 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4440 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4441 gimplify_and_add (t, pre_p);
4442 }
4443
4444 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4445 gimplify_and_add (t, pre_p);
4446
4447 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4448 append_to_statement_list (t, pre_p);
4449 }
4450
4451 /* ... otherwise out of the overflow area. */
4452
4453 /* Care for on-stack alignment if needed. */
4454 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4455 || integer_zerop (TYPE_SIZE (type)))
4456 t = ovf;
4457 else
4458 {
4459 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4460 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4461 build_int_cst (TREE_TYPE (ovf), align - 1));
4462 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4463 build_int_cst (TREE_TYPE (t), -align));
4464 }
4465 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4466
4467 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4468 gimplify_and_add (t2, pre_p);
4469
4470 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4471 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4472 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4473 gimplify_and_add (t, pre_p);
4474
4475 if (container)
4476 {
4477 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4478 append_to_statement_list (t, pre_p);
4479 }
4480
4481 ptrtype = build_pointer_type (type);
4482 addr = fold_convert (ptrtype, addr);
4483
4484 if (indirect_p)
4485 addr = build_va_arg_indirect_ref (addr);
4486 return build_va_arg_indirect_ref (addr);
4487 }
4488 \f
4489 /* Return nonzero if OPNUM's MEM should be matched
4490 in movabs* patterns. */
4491
4492 int
4493 ix86_check_movabs (rtx insn, int opnum)
4494 {
4495 rtx set, mem;
4496
4497 set = PATTERN (insn);
4498 if (GET_CODE (set) == PARALLEL)
4499 set = XVECEXP (set, 0, 0);
4500 gcc_assert (GET_CODE (set) == SET);
4501 mem = XEXP (set, opnum);
4502 while (GET_CODE (mem) == SUBREG)
4503 mem = SUBREG_REG (mem);
4504 gcc_assert (GET_CODE (mem) == MEM);
4505 return (volatile_ok || !MEM_VOLATILE_P (mem));
4506 }
4507 \f
4508 /* Initialize the table of extra 80387 mathematical constants. */
4509
4510 static void
4511 init_ext_80387_constants (void)
4512 {
4513 static const char * cst[5] =
4514 {
4515 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4516 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4517 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4518 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4519 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4520 };
4521 int i;
4522
4523 for (i = 0; i < 5; i++)
4524 {
4525 real_from_string (&ext_80387_constants_table[i], cst[i]);
4526 /* Ensure each constant is rounded to XFmode precision. */
4527 real_convert (&ext_80387_constants_table[i],
4528 XFmode, &ext_80387_constants_table[i]);
4529 }
4530
4531 ext_80387_constants_init = 1;
4532 }
4533
4534 /* Return true if the constant is something that can be loaded with
4535 a special instruction. */
4536
4537 int
4538 standard_80387_constant_p (rtx x)
4539 {
4540 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4541 return -1;
4542
4543 if (x == CONST0_RTX (GET_MODE (x)))
4544 return 1;
4545 if (x == CONST1_RTX (GET_MODE (x)))
4546 return 2;
4547
4548 /* For XFmode constants, try to find a special 80387 instruction when
4549 optimizing for size or on those CPUs that benefit from them. */
4550 if (GET_MODE (x) == XFmode
4551 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4552 {
4553 REAL_VALUE_TYPE r;
4554 int i;
4555
4556 if (! ext_80387_constants_init)
4557 init_ext_80387_constants ();
4558
4559 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4560 for (i = 0; i < 5; i++)
4561 if (real_identical (&r, &ext_80387_constants_table[i]))
4562 return i + 3;
4563 }
4564
4565 return 0;
4566 }
4567
4568 /* Return the opcode of the special instruction to be used to load
4569 the constant X. */
4570
4571 const char *
4572 standard_80387_constant_opcode (rtx x)
4573 {
4574 switch (standard_80387_constant_p (x))
4575 {
4576 case 1:
4577 return "fldz";
4578 case 2:
4579 return "fld1";
4580 case 3:
4581 return "fldlg2";
4582 case 4:
4583 return "fldln2";
4584 case 5:
4585 return "fldl2e";
4586 case 6:
4587 return "fldl2t";
4588 case 7:
4589 return "fldpi";
4590 default:
4591 gcc_unreachable ();
4592 }
4593 }
4594
4595 /* Return the CONST_DOUBLE representing the 80387 constant that is
4596 loaded by the specified special instruction. The argument IDX
4597 matches the return value from standard_80387_constant_p. */
4598
4599 rtx
4600 standard_80387_constant_rtx (int idx)
4601 {
4602 int i;
4603
4604 if (! ext_80387_constants_init)
4605 init_ext_80387_constants ();
4606
4607 switch (idx)
4608 {
4609 case 3:
4610 case 4:
4611 case 5:
4612 case 6:
4613 case 7:
4614 i = idx - 3;
4615 break;
4616
4617 default:
4618 gcc_unreachable ();
4619 }
4620
4621 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4622 XFmode);
4623 }
4624
4625 /* Return 1 if mode is a valid mode for sse. */
4626 static int
4627 standard_sse_mode_p (enum machine_mode mode)
4628 {
4629 switch (mode)
4630 {
4631 case V16QImode:
4632 case V8HImode:
4633 case V4SImode:
4634 case V2DImode:
4635 case V4SFmode:
4636 case V2DFmode:
4637 return 1;
4638
4639 default:
4640 return 0;
4641 }
4642 }
4643
4644 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4645 */
4646 int
4647 standard_sse_constant_p (rtx x)
4648 {
4649 enum machine_mode mode = GET_MODE (x);
4650
4651 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4652 return 1;
4653 if (vector_all_ones_operand (x, mode)
4654 && standard_sse_mode_p (mode))
4655 return TARGET_SSE2 ? 2 : -1;
4656
4657 return 0;
4658 }
4659
4660 /* Return the opcode of the special instruction to be used to load
4661 the constant X. */
4662
4663 const char *
4664 standard_sse_constant_opcode (rtx insn, rtx x)
4665 {
4666 switch (standard_sse_constant_p (x))
4667 {
4668 case 1:
4669 if (get_attr_mode (insn) == MODE_V4SF)
4670 return "xorps\t%0, %0";
4671 else if (get_attr_mode (insn) == MODE_V2DF)
4672 return "xorpd\t%0, %0";
4673 else
4674 return "pxor\t%0, %0";
4675 case 2:
4676 return "pcmpeqd\t%0, %0";
4677 }
4678 gcc_unreachable ();
4679 }
4680
4681 /* Returns 1 if OP contains a symbol reference */
4682
4683 int
4684 symbolic_reference_mentioned_p (rtx op)
4685 {
4686 const char *fmt;
4687 int i;
4688
4689 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4690 return 1;
4691
4692 fmt = GET_RTX_FORMAT (GET_CODE (op));
4693 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4694 {
4695 if (fmt[i] == 'E')
4696 {
4697 int j;
4698
4699 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4700 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4701 return 1;
4702 }
4703
4704 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4705 return 1;
4706 }
4707
4708 return 0;
4709 }
4710
4711 /* Return 1 if it is appropriate to emit `ret' instructions in the
4712 body of a function. Do this only if the epilogue is simple, needing a
4713 couple of insns. Prior to reloading, we can't tell how many registers
4714 must be saved, so return 0 then. Return 0 if there is no frame
4715 marker to de-allocate. */
4716
4717 int
4718 ix86_can_use_return_insn_p (void)
4719 {
4720 struct ix86_frame frame;
4721
4722 if (! reload_completed || frame_pointer_needed)
4723 return 0;
4724
4725 /* Don't allow more than 32 pop, since that's all we can do
4726 with one instruction. */
4727 if (current_function_pops_args
4728 && current_function_args_size >= 32768)
4729 return 0;
4730
4731 ix86_compute_frame_layout (&frame);
4732 return frame.to_allocate == 0 && frame.nregs == 0;
4733 }
4734 \f
4735 /* Value should be nonzero if functions must have frame pointers.
4736 Zero means the frame pointer need not be set up (and parms may
4737 be accessed via the stack pointer) in functions that seem suitable. */
4738
4739 int
4740 ix86_frame_pointer_required (void)
4741 {
4742 /* If we accessed previous frames, then the generated code expects
4743 to be able to access the saved ebp value in our frame. */
4744 if (cfun->machine->accesses_prev_frame)
4745 return 1;
4746
4747 /* Several x86 os'es need a frame pointer for other reasons,
4748 usually pertaining to setjmp. */
4749 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4750 return 1;
4751
4752 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4753 the frame pointer by default. Turn it back on now if we've not
4754 got a leaf function. */
4755 if (TARGET_OMIT_LEAF_FRAME_POINTER
4756 && (!current_function_is_leaf
4757 || ix86_current_function_calls_tls_descriptor))
4758 return 1;
4759
4760 if (current_function_profile)
4761 return 1;
4762
4763 return 0;
4764 }
4765
4766 /* Record that the current function accesses previous call frames. */
4767
4768 void
4769 ix86_setup_frame_addresses (void)
4770 {
4771 cfun->machine->accesses_prev_frame = 1;
4772 }
4773 \f
4774 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4775 # define USE_HIDDEN_LINKONCE 1
4776 #else
4777 # define USE_HIDDEN_LINKONCE 0
4778 #endif
4779
4780 static int pic_labels_used;
4781
4782 /* Fills in the label name that should be used for a pc thunk for
4783 the given register. */
4784
4785 static void
4786 get_pc_thunk_name (char name[32], unsigned int regno)
4787 {
4788 if (USE_HIDDEN_LINKONCE)
4789 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4790 else
4791 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4792 }
4793
4794
4795 /* This function generates code for -fpic that loads %ebx with
4796 the return address of the caller and then returns. */
4797
4798 void
4799 ix86_file_end (void)
4800 {
4801 rtx xops[2];
4802 int regno;
4803
4804 for (regno = 0; regno < 8; ++regno)
4805 {
4806 char name[32];
4807
4808 if (! ((pic_labels_used >> regno) & 1))
4809 continue;
4810
4811 get_pc_thunk_name (name, regno);
4812
4813 #if TARGET_MACHO
4814 if (TARGET_MACHO)
4815 {
4816 switch_to_section (darwin_sections[text_coal_section]);
4817 fputs ("\t.weak_definition\t", asm_out_file);
4818 assemble_name (asm_out_file, name);
4819 fputs ("\n\t.private_extern\t", asm_out_file);
4820 assemble_name (asm_out_file, name);
4821 fputs ("\n", asm_out_file);
4822 ASM_OUTPUT_LABEL (asm_out_file, name);
4823 }
4824 else
4825 #endif
4826 if (USE_HIDDEN_LINKONCE)
4827 {
4828 tree decl;
4829
4830 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4831 error_mark_node);
4832 TREE_PUBLIC (decl) = 1;
4833 TREE_STATIC (decl) = 1;
4834 DECL_ONE_ONLY (decl) = 1;
4835
4836 (*targetm.asm_out.unique_section) (decl, 0);
4837 switch_to_section (get_named_section (decl, NULL, 0));
4838
4839 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4840 fputs ("\t.hidden\t", asm_out_file);
4841 assemble_name (asm_out_file, name);
4842 fputc ('\n', asm_out_file);
4843 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4844 }
4845 else
4846 {
4847 switch_to_section (text_section);
4848 ASM_OUTPUT_LABEL (asm_out_file, name);
4849 }
4850
4851 xops[0] = gen_rtx_REG (SImode, regno);
4852 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4853 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4854 output_asm_insn ("ret", xops);
4855 }
4856
4857 if (NEED_INDICATE_EXEC_STACK)
4858 file_end_indicate_exec_stack ();
4859 }
4860
4861 /* Emit code for the SET_GOT patterns. */
4862
4863 const char *
4864 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4865 {
4866 rtx xops[3];
4867
4868 xops[0] = dest;
4869 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4870
4871 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4872 {
4873 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4874
4875 if (!flag_pic)
4876 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4877 else
4878 output_asm_insn ("call\t%a2", xops);
4879
4880 #if TARGET_MACHO
4881 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4882 is what will be referenced by the Mach-O PIC subsystem. */
4883 if (!label)
4884 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4885 #endif
4886
4887 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4888 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4889
4890 if (flag_pic)
4891 output_asm_insn ("pop{l}\t%0", xops);
4892 }
4893 else
4894 {
4895 char name[32];
4896 get_pc_thunk_name (name, REGNO (dest));
4897 pic_labels_used |= 1 << REGNO (dest);
4898
4899 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4900 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4901 output_asm_insn ("call\t%X2", xops);
4902 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4903 is what will be referenced by the Mach-O PIC subsystem. */
4904 #if TARGET_MACHO
4905 if (!label)
4906 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4907 else
4908 targetm.asm_out.internal_label (asm_out_file, "L",
4909 CODE_LABEL_NUMBER (label));
4910 #endif
4911 }
4912
4913 if (TARGET_MACHO)
4914 return "";
4915
4916 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4917 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4918 else
4919 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4920
4921 return "";
4922 }
4923
4924 /* Generate an "push" pattern for input ARG. */
4925
4926 static rtx
4927 gen_push (rtx arg)
4928 {
4929 return gen_rtx_SET (VOIDmode,
4930 gen_rtx_MEM (Pmode,
4931 gen_rtx_PRE_DEC (Pmode,
4932 stack_pointer_rtx)),
4933 arg);
4934 }
4935
4936 /* Return >= 0 if there is an unused call-clobbered register available
4937 for the entire function. */
4938
4939 static unsigned int
4940 ix86_select_alt_pic_regnum (void)
4941 {
4942 if (current_function_is_leaf && !current_function_profile
4943 && !ix86_current_function_calls_tls_descriptor)
4944 {
4945 int i;
4946 for (i = 2; i >= 0; --i)
4947 if (!regs_ever_live[i])
4948 return i;
4949 }
4950
4951 return INVALID_REGNUM;
4952 }
4953
4954 /* Return 1 if we need to save REGNO. */
4955 static int
4956 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4957 {
4958 if (pic_offset_table_rtx
4959 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4960 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4961 || current_function_profile
4962 || current_function_calls_eh_return
4963 || current_function_uses_const_pool))
4964 {
4965 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4966 return 0;
4967 return 1;
4968 }
4969
4970 if (current_function_calls_eh_return && maybe_eh_return)
4971 {
4972 unsigned i;
4973 for (i = 0; ; i++)
4974 {
4975 unsigned test = EH_RETURN_DATA_REGNO (i);
4976 if (test == INVALID_REGNUM)
4977 break;
4978 if (test == regno)
4979 return 1;
4980 }
4981 }
4982
4983 if (cfun->machine->force_align_arg_pointer
4984 && regno == REGNO (cfun->machine->force_align_arg_pointer))
4985 return 1;
4986
4987 return (regs_ever_live[regno]
4988 && !call_used_regs[regno]
4989 && !fixed_regs[regno]
4990 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4991 }
4992
4993 /* Return number of registers to be saved on the stack. */
4994
4995 static int
4996 ix86_nsaved_regs (void)
4997 {
4998 int nregs = 0;
4999 int regno;
5000
5001 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5002 if (ix86_save_reg (regno, true))
5003 nregs++;
5004 return nregs;
5005 }
5006
5007 /* Return the offset between two registers, one to be eliminated, and the other
5008 its replacement, at the start of a routine. */
5009
5010 HOST_WIDE_INT
5011 ix86_initial_elimination_offset (int from, int to)
5012 {
5013 struct ix86_frame frame;
5014 ix86_compute_frame_layout (&frame);
5015
5016 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5017 return frame.hard_frame_pointer_offset;
5018 else if (from == FRAME_POINTER_REGNUM
5019 && to == HARD_FRAME_POINTER_REGNUM)
5020 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5021 else
5022 {
5023 gcc_assert (to == STACK_POINTER_REGNUM);
5024
5025 if (from == ARG_POINTER_REGNUM)
5026 return frame.stack_pointer_offset;
5027
5028 gcc_assert (from == FRAME_POINTER_REGNUM);
5029 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5030 }
5031 }
5032
5033 /* Fill structure ix86_frame about frame of currently computed function. */
5034
5035 static void
5036 ix86_compute_frame_layout (struct ix86_frame *frame)
5037 {
5038 HOST_WIDE_INT total_size;
5039 unsigned int stack_alignment_needed;
5040 HOST_WIDE_INT offset;
5041 unsigned int preferred_alignment;
5042 HOST_WIDE_INT size = get_frame_size ();
5043
5044 frame->nregs = ix86_nsaved_regs ();
5045 total_size = size;
5046
5047 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5048 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5049
5050 /* During reload iteration the amount of registers saved can change.
5051 Recompute the value as needed. Do not recompute when amount of registers
5052 didn't change as reload does multiple calls to the function and does not
5053 expect the decision to change within single iteration. */
5054 if (!optimize_size
5055 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5056 {
5057 int count = frame->nregs;
5058
5059 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5060 /* The fast prologue uses move instead of push to save registers. This
5061 is significantly longer, but also executes faster as modern hardware
5062 can execute the moves in parallel, but can't do that for push/pop.
5063
5064 Be careful about choosing what prologue to emit: When function takes
5065 many instructions to execute we may use slow version as well as in
5066 case function is known to be outside hot spot (this is known with
5067 feedback only). Weight the size of function by number of registers
5068 to save as it is cheap to use one or two push instructions but very
5069 slow to use many of them. */
5070 if (count)
5071 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5072 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5073 || (flag_branch_probabilities
5074 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5075 cfun->machine->use_fast_prologue_epilogue = false;
5076 else
5077 cfun->machine->use_fast_prologue_epilogue
5078 = !expensive_function_p (count);
5079 }
5080 if (TARGET_PROLOGUE_USING_MOVE
5081 && cfun->machine->use_fast_prologue_epilogue)
5082 frame->save_regs_using_mov = true;
5083 else
5084 frame->save_regs_using_mov = false;
5085
5086
5087 /* Skip return address and saved base pointer. */
5088 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5089
5090 frame->hard_frame_pointer_offset = offset;
5091
5092 /* Do some sanity checking of stack_alignment_needed and
5093 preferred_alignment, since i386 port is the only using those features
5094 that may break easily. */
5095
5096 gcc_assert (!size || stack_alignment_needed);
5097 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5098 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5099 gcc_assert (stack_alignment_needed
5100 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5101
5102 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5103 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5104
5105 /* Register save area */
5106 offset += frame->nregs * UNITS_PER_WORD;
5107
5108 /* Va-arg area */
5109 if (ix86_save_varrargs_registers)
5110 {
5111 offset += X86_64_VARARGS_SIZE;
5112 frame->va_arg_size = X86_64_VARARGS_SIZE;
5113 }
5114 else
5115 frame->va_arg_size = 0;
5116
5117 /* Align start of frame for local function. */
5118 frame->padding1 = ((offset + stack_alignment_needed - 1)
5119 & -stack_alignment_needed) - offset;
5120
5121 offset += frame->padding1;
5122
5123 /* Frame pointer points here. */
5124 frame->frame_pointer_offset = offset;
5125
5126 offset += size;
5127
5128 /* Add outgoing arguments area. Can be skipped if we eliminated
5129 all the function calls as dead code.
5130 Skipping is however impossible when function calls alloca. Alloca
5131 expander assumes that last current_function_outgoing_args_size
5132 of stack frame are unused. */
5133 if (ACCUMULATE_OUTGOING_ARGS
5134 && (!current_function_is_leaf || current_function_calls_alloca
5135 || ix86_current_function_calls_tls_descriptor))
5136 {
5137 offset += current_function_outgoing_args_size;
5138 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5139 }
5140 else
5141 frame->outgoing_arguments_size = 0;
5142
5143 /* Align stack boundary. Only needed if we're calling another function
5144 or using alloca. */
5145 if (!current_function_is_leaf || current_function_calls_alloca
5146 || ix86_current_function_calls_tls_descriptor)
5147 frame->padding2 = ((offset + preferred_alignment - 1)
5148 & -preferred_alignment) - offset;
5149 else
5150 frame->padding2 = 0;
5151
5152 offset += frame->padding2;
5153
5154 /* We've reached end of stack frame. */
5155 frame->stack_pointer_offset = offset;
5156
5157 /* Size prologue needs to allocate. */
5158 frame->to_allocate =
5159 (size + frame->padding1 + frame->padding2
5160 + frame->outgoing_arguments_size + frame->va_arg_size);
5161
5162 if ((!frame->to_allocate && frame->nregs <= 1)
5163 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5164 frame->save_regs_using_mov = false;
5165
5166 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5167 && current_function_is_leaf
5168 && !ix86_current_function_calls_tls_descriptor)
5169 {
5170 frame->red_zone_size = frame->to_allocate;
5171 if (frame->save_regs_using_mov)
5172 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5173 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5174 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5175 }
5176 else
5177 frame->red_zone_size = 0;
5178 frame->to_allocate -= frame->red_zone_size;
5179 frame->stack_pointer_offset -= frame->red_zone_size;
5180 #if 0
5181 fprintf (stderr, "nregs: %i\n", frame->nregs);
5182 fprintf (stderr, "size: %i\n", size);
5183 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5184 fprintf (stderr, "padding1: %i\n", frame->padding1);
5185 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5186 fprintf (stderr, "padding2: %i\n", frame->padding2);
5187 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5188 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5189 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5190 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5191 frame->hard_frame_pointer_offset);
5192 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5193 #endif
5194 }
5195
5196 /* Emit code to save registers in the prologue. */
5197
5198 static void
5199 ix86_emit_save_regs (void)
5200 {
5201 unsigned int regno;
5202 rtx insn;
5203
5204 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5205 if (ix86_save_reg (regno, true))
5206 {
5207 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5208 RTX_FRAME_RELATED_P (insn) = 1;
5209 }
5210 }
5211
5212 /* Emit code to save registers using MOV insns. First register
5213 is restored from POINTER + OFFSET. */
5214 static void
5215 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5216 {
5217 unsigned int regno;
5218 rtx insn;
5219
5220 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5221 if (ix86_save_reg (regno, true))
5222 {
5223 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5224 Pmode, offset),
5225 gen_rtx_REG (Pmode, regno));
5226 RTX_FRAME_RELATED_P (insn) = 1;
5227 offset += UNITS_PER_WORD;
5228 }
5229 }
5230
5231 /* Expand prologue or epilogue stack adjustment.
5232 The pattern exist to put a dependency on all ebp-based memory accesses.
5233 STYLE should be negative if instructions should be marked as frame related,
5234 zero if %r11 register is live and cannot be freely used and positive
5235 otherwise. */
5236
5237 static void
5238 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5239 {
5240 rtx insn;
5241
5242 if (! TARGET_64BIT)
5243 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5244 else if (x86_64_immediate_operand (offset, DImode))
5245 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5246 else
5247 {
5248 rtx r11;
5249 /* r11 is used by indirect sibcall return as well, set before the
5250 epilogue and used after the epilogue. ATM indirect sibcall
5251 shouldn't be used together with huge frame sizes in one
5252 function because of the frame_size check in sibcall.c. */
5253 gcc_assert (style);
5254 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5255 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5256 if (style < 0)
5257 RTX_FRAME_RELATED_P (insn) = 1;
5258 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5259 offset));
5260 }
5261 if (style < 0)
5262 RTX_FRAME_RELATED_P (insn) = 1;
5263 }
5264
5265 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5266
5267 static rtx
5268 ix86_internal_arg_pointer (void)
5269 {
5270 bool has_force_align_arg_pointer =
5271 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5272 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5273 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5274 && DECL_NAME (current_function_decl)
5275 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5276 && DECL_FILE_SCOPE_P (current_function_decl))
5277 || ix86_force_align_arg_pointer
5278 || has_force_align_arg_pointer)
5279 {
5280 /* Nested functions can't realign the stack due to a register
5281 conflict. */
5282 if (DECL_CONTEXT (current_function_decl)
5283 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5284 {
5285 if (ix86_force_align_arg_pointer)
5286 warning (0, "-mstackrealign ignored for nested functions");
5287 if (has_force_align_arg_pointer)
5288 error ("%s not supported for nested functions",
5289 ix86_force_align_arg_pointer_string);
5290 return virtual_incoming_args_rtx;
5291 }
5292 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5293 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5294 }
5295 else
5296 return virtual_incoming_args_rtx;
5297 }
5298
5299 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5300 This is called from dwarf2out.c to emit call frame instructions
5301 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5302 static void
5303 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5304 {
5305 rtx unspec = SET_SRC (pattern);
5306 gcc_assert (GET_CODE (unspec) == UNSPEC);
5307
5308 switch (index)
5309 {
5310 case UNSPEC_REG_SAVE:
5311 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5312 SET_DEST (pattern));
5313 break;
5314 case UNSPEC_DEF_CFA:
5315 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5316 INTVAL (XVECEXP (unspec, 0, 0)));
5317 break;
5318 default:
5319 gcc_unreachable ();
5320 }
5321 }
5322
5323 /* Expand the prologue into a bunch of separate insns. */
5324
5325 void
5326 ix86_expand_prologue (void)
5327 {
5328 rtx insn;
5329 bool pic_reg_used;
5330 struct ix86_frame frame;
5331 HOST_WIDE_INT allocate;
5332
5333 ix86_compute_frame_layout (&frame);
5334
5335 if (cfun->machine->force_align_arg_pointer)
5336 {
5337 rtx x, y;
5338
5339 /* Grab the argument pointer. */
5340 x = plus_constant (stack_pointer_rtx, 4);
5341 y = cfun->machine->force_align_arg_pointer;
5342 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5343 RTX_FRAME_RELATED_P (insn) = 1;
5344
5345 /* The unwind info consists of two parts: install the fafp as the cfa,
5346 and record the fafp as the "save register" of the stack pointer.
5347 The later is there in order that the unwinder can see where it
5348 should restore the stack pointer across the and insn. */
5349 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5350 x = gen_rtx_SET (VOIDmode, y, x);
5351 RTX_FRAME_RELATED_P (x) = 1;
5352 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5353 UNSPEC_REG_SAVE);
5354 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5355 RTX_FRAME_RELATED_P (y) = 1;
5356 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5357 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5358 REG_NOTES (insn) = x;
5359
5360 /* Align the stack. */
5361 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5362 GEN_INT (-16)));
5363
5364 /* And here we cheat like madmen with the unwind info. We force the
5365 cfa register back to sp+4, which is exactly what it was at the
5366 start of the function. Re-pushing the return address results in
5367 the return at the same spot relative to the cfa, and thus is
5368 correct wrt the unwind info. */
5369 x = cfun->machine->force_align_arg_pointer;
5370 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5371 insn = emit_insn (gen_push (x));
5372 RTX_FRAME_RELATED_P (insn) = 1;
5373
5374 x = GEN_INT (4);
5375 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5376 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5377 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5378 REG_NOTES (insn) = x;
5379 }
5380
5381 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5382 slower on all targets. Also sdb doesn't like it. */
5383
5384 if (frame_pointer_needed)
5385 {
5386 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5387 RTX_FRAME_RELATED_P (insn) = 1;
5388
5389 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5390 RTX_FRAME_RELATED_P (insn) = 1;
5391 }
5392
5393 allocate = frame.to_allocate;
5394
5395 if (!frame.save_regs_using_mov)
5396 ix86_emit_save_regs ();
5397 else
5398 allocate += frame.nregs * UNITS_PER_WORD;
5399
5400 /* When using red zone we may start register saving before allocating
5401 the stack frame saving one cycle of the prologue. */
5402 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5403 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5404 : stack_pointer_rtx,
5405 -frame.nregs * UNITS_PER_WORD);
5406
5407 if (allocate == 0)
5408 ;
5409 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5410 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5411 GEN_INT (-allocate), -1);
5412 else
5413 {
5414 /* Only valid for Win32. */
5415 rtx eax = gen_rtx_REG (SImode, 0);
5416 bool eax_live = ix86_eax_live_at_start_p ();
5417 rtx t;
5418
5419 gcc_assert (!TARGET_64BIT);
5420
5421 if (eax_live)
5422 {
5423 emit_insn (gen_push (eax));
5424 allocate -= 4;
5425 }
5426
5427 emit_move_insn (eax, GEN_INT (allocate));
5428
5429 insn = emit_insn (gen_allocate_stack_worker (eax));
5430 RTX_FRAME_RELATED_P (insn) = 1;
5431 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5432 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5433 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5434 t, REG_NOTES (insn));
5435
5436 if (eax_live)
5437 {
5438 if (frame_pointer_needed)
5439 t = plus_constant (hard_frame_pointer_rtx,
5440 allocate
5441 - frame.to_allocate
5442 - frame.nregs * UNITS_PER_WORD);
5443 else
5444 t = plus_constant (stack_pointer_rtx, allocate);
5445 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5446 }
5447 }
5448
5449 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5450 {
5451 if (!frame_pointer_needed || !frame.to_allocate)
5452 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5453 else
5454 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5455 -frame.nregs * UNITS_PER_WORD);
5456 }
5457
5458 pic_reg_used = false;
5459 if (pic_offset_table_rtx
5460 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5461 || current_function_profile))
5462 {
5463 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5464
5465 if (alt_pic_reg_used != INVALID_REGNUM)
5466 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5467
5468 pic_reg_used = true;
5469 }
5470
5471 if (pic_reg_used)
5472 {
5473 if (TARGET_64BIT)
5474 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5475 else
5476 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5477
5478 /* Even with accurate pre-reload life analysis, we can wind up
5479 deleting all references to the pic register after reload.
5480 Consider if cross-jumping unifies two sides of a branch
5481 controlled by a comparison vs the only read from a global.
5482 In which case, allow the set_got to be deleted, though we're
5483 too late to do anything about the ebx save in the prologue. */
5484 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5485 }
5486
5487 /* Prevent function calls from be scheduled before the call to mcount.
5488 In the pic_reg_used case, make sure that the got load isn't deleted. */
5489 if (current_function_profile)
5490 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5491 }
5492
5493 /* Emit code to restore saved registers using MOV insns. First register
5494 is restored from POINTER + OFFSET. */
5495 static void
5496 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5497 int maybe_eh_return)
5498 {
5499 int regno;
5500 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5501
5502 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5503 if (ix86_save_reg (regno, maybe_eh_return))
5504 {
5505 /* Ensure that adjust_address won't be forced to produce pointer
5506 out of range allowed by x86-64 instruction set. */
5507 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5508 {
5509 rtx r11;
5510
5511 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5512 emit_move_insn (r11, GEN_INT (offset));
5513 emit_insn (gen_adddi3 (r11, r11, pointer));
5514 base_address = gen_rtx_MEM (Pmode, r11);
5515 offset = 0;
5516 }
5517 emit_move_insn (gen_rtx_REG (Pmode, regno),
5518 adjust_address (base_address, Pmode, offset));
5519 offset += UNITS_PER_WORD;
5520 }
5521 }
5522
5523 /* Restore function stack, frame, and registers. */
5524
5525 void
5526 ix86_expand_epilogue (int style)
5527 {
5528 int regno;
5529 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5530 struct ix86_frame frame;
5531 HOST_WIDE_INT offset;
5532
5533 ix86_compute_frame_layout (&frame);
5534
5535 /* Calculate start of saved registers relative to ebp. Special care
5536 must be taken for the normal return case of a function using
5537 eh_return: the eax and edx registers are marked as saved, but not
5538 restored along this path. */
5539 offset = frame.nregs;
5540 if (current_function_calls_eh_return && style != 2)
5541 offset -= 2;
5542 offset *= -UNITS_PER_WORD;
5543
5544 /* If we're only restoring one register and sp is not valid then
5545 using a move instruction to restore the register since it's
5546 less work than reloading sp and popping the register.
5547
5548 The default code result in stack adjustment using add/lea instruction,
5549 while this code results in LEAVE instruction (or discrete equivalent),
5550 so it is profitable in some other cases as well. Especially when there
5551 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5552 and there is exactly one register to pop. This heuristic may need some
5553 tuning in future. */
5554 if ((!sp_valid && frame.nregs <= 1)
5555 || (TARGET_EPILOGUE_USING_MOVE
5556 && cfun->machine->use_fast_prologue_epilogue
5557 && (frame.nregs > 1 || frame.to_allocate))
5558 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5559 || (frame_pointer_needed && TARGET_USE_LEAVE
5560 && cfun->machine->use_fast_prologue_epilogue
5561 && frame.nregs == 1)
5562 || current_function_calls_eh_return)
5563 {
5564 /* Restore registers. We can use ebp or esp to address the memory
5565 locations. If both are available, default to ebp, since offsets
5566 are known to be small. Only exception is esp pointing directly to the
5567 end of block of saved registers, where we may simplify addressing
5568 mode. */
5569
5570 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5571 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5572 frame.to_allocate, style == 2);
5573 else
5574 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5575 offset, style == 2);
5576
5577 /* eh_return epilogues need %ecx added to the stack pointer. */
5578 if (style == 2)
5579 {
5580 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5581
5582 if (frame_pointer_needed)
5583 {
5584 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5585 tmp = plus_constant (tmp, UNITS_PER_WORD);
5586 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5587
5588 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5589 emit_move_insn (hard_frame_pointer_rtx, tmp);
5590
5591 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5592 const0_rtx, style);
5593 }
5594 else
5595 {
5596 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5597 tmp = plus_constant (tmp, (frame.to_allocate
5598 + frame.nregs * UNITS_PER_WORD));
5599 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5600 }
5601 }
5602 else if (!frame_pointer_needed)
5603 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5604 GEN_INT (frame.to_allocate
5605 + frame.nregs * UNITS_PER_WORD),
5606 style);
5607 /* If not an i386, mov & pop is faster than "leave". */
5608 else if (TARGET_USE_LEAVE || optimize_size
5609 || !cfun->machine->use_fast_prologue_epilogue)
5610 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5611 else
5612 {
5613 pro_epilogue_adjust_stack (stack_pointer_rtx,
5614 hard_frame_pointer_rtx,
5615 const0_rtx, style);
5616 if (TARGET_64BIT)
5617 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5618 else
5619 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5620 }
5621 }
5622 else
5623 {
5624 /* First step is to deallocate the stack frame so that we can
5625 pop the registers. */
5626 if (!sp_valid)
5627 {
5628 gcc_assert (frame_pointer_needed);
5629 pro_epilogue_adjust_stack (stack_pointer_rtx,
5630 hard_frame_pointer_rtx,
5631 GEN_INT (offset), style);
5632 }
5633 else if (frame.to_allocate)
5634 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5635 GEN_INT (frame.to_allocate), style);
5636
5637 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5638 if (ix86_save_reg (regno, false))
5639 {
5640 if (TARGET_64BIT)
5641 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5642 else
5643 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5644 }
5645 if (frame_pointer_needed)
5646 {
5647 /* Leave results in shorter dependency chains on CPUs that are
5648 able to grok it fast. */
5649 if (TARGET_USE_LEAVE)
5650 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5651 else if (TARGET_64BIT)
5652 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5653 else
5654 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5655 }
5656 }
5657
5658 if (cfun->machine->force_align_arg_pointer)
5659 {
5660 emit_insn (gen_addsi3 (stack_pointer_rtx,
5661 cfun->machine->force_align_arg_pointer,
5662 GEN_INT (-4)));
5663 }
5664
5665 /* Sibcall epilogues don't want a return instruction. */
5666 if (style == 0)
5667 return;
5668
5669 if (current_function_pops_args && current_function_args_size)
5670 {
5671 rtx popc = GEN_INT (current_function_pops_args);
5672
5673 /* i386 can only pop 64K bytes. If asked to pop more, pop
5674 return address, do explicit add, and jump indirectly to the
5675 caller. */
5676
5677 if (current_function_pops_args >= 65536)
5678 {
5679 rtx ecx = gen_rtx_REG (SImode, 2);
5680
5681 /* There is no "pascal" calling convention in 64bit ABI. */
5682 gcc_assert (!TARGET_64BIT);
5683
5684 emit_insn (gen_popsi1 (ecx));
5685 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5686 emit_jump_insn (gen_return_indirect_internal (ecx));
5687 }
5688 else
5689 emit_jump_insn (gen_return_pop_internal (popc));
5690 }
5691 else
5692 emit_jump_insn (gen_return_internal ());
5693 }
5694
5695 /* Reset from the function's potential modifications. */
5696
5697 static void
5698 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5699 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5700 {
5701 if (pic_offset_table_rtx)
5702 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5703 #if TARGET_MACHO
5704 /* Mach-O doesn't support labels at the end of objects, so if
5705 it looks like we might want one, insert a NOP. */
5706 {
5707 rtx insn = get_last_insn ();
5708 while (insn
5709 && NOTE_P (insn)
5710 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5711 insn = PREV_INSN (insn);
5712 if (insn
5713 && (LABEL_P (insn)
5714 || (NOTE_P (insn)
5715 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5716 fputs ("\tnop\n", file);
5717 }
5718 #endif
5719
5720 }
5721 \f
5722 /* Extract the parts of an RTL expression that is a valid memory address
5723 for an instruction. Return 0 if the structure of the address is
5724 grossly off. Return -1 if the address contains ASHIFT, so it is not
5725 strictly valid, but still used for computing length of lea instruction. */
5726
5727 int
5728 ix86_decompose_address (rtx addr, struct ix86_address *out)
5729 {
5730 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5731 rtx base_reg, index_reg;
5732 HOST_WIDE_INT scale = 1;
5733 rtx scale_rtx = NULL_RTX;
5734 int retval = 1;
5735 enum ix86_address_seg seg = SEG_DEFAULT;
5736
5737 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5738 base = addr;
5739 else if (GET_CODE (addr) == PLUS)
5740 {
5741 rtx addends[4], op;
5742 int n = 0, i;
5743
5744 op = addr;
5745 do
5746 {
5747 if (n >= 4)
5748 return 0;
5749 addends[n++] = XEXP (op, 1);
5750 op = XEXP (op, 0);
5751 }
5752 while (GET_CODE (op) == PLUS);
5753 if (n >= 4)
5754 return 0;
5755 addends[n] = op;
5756
5757 for (i = n; i >= 0; --i)
5758 {
5759 op = addends[i];
5760 switch (GET_CODE (op))
5761 {
5762 case MULT:
5763 if (index)
5764 return 0;
5765 index = XEXP (op, 0);
5766 scale_rtx = XEXP (op, 1);
5767 break;
5768
5769 case UNSPEC:
5770 if (XINT (op, 1) == UNSPEC_TP
5771 && TARGET_TLS_DIRECT_SEG_REFS
5772 && seg == SEG_DEFAULT)
5773 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5774 else
5775 return 0;
5776 break;
5777
5778 case REG:
5779 case SUBREG:
5780 if (!base)
5781 base = op;
5782 else if (!index)
5783 index = op;
5784 else
5785 return 0;
5786 break;
5787
5788 case CONST:
5789 case CONST_INT:
5790 case SYMBOL_REF:
5791 case LABEL_REF:
5792 if (disp)
5793 return 0;
5794 disp = op;
5795 break;
5796
5797 default:
5798 return 0;
5799 }
5800 }
5801 }
5802 else if (GET_CODE (addr) == MULT)
5803 {
5804 index = XEXP (addr, 0); /* index*scale */
5805 scale_rtx = XEXP (addr, 1);
5806 }
5807 else if (GET_CODE (addr) == ASHIFT)
5808 {
5809 rtx tmp;
5810
5811 /* We're called for lea too, which implements ashift on occasion. */
5812 index = XEXP (addr, 0);
5813 tmp = XEXP (addr, 1);
5814 if (GET_CODE (tmp) != CONST_INT)
5815 return 0;
5816 scale = INTVAL (tmp);
5817 if ((unsigned HOST_WIDE_INT) scale > 3)
5818 return 0;
5819 scale = 1 << scale;
5820 retval = -1;
5821 }
5822 else
5823 disp = addr; /* displacement */
5824
5825 /* Extract the integral value of scale. */
5826 if (scale_rtx)
5827 {
5828 if (GET_CODE (scale_rtx) != CONST_INT)
5829 return 0;
5830 scale = INTVAL (scale_rtx);
5831 }
5832
5833 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5834 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5835
5836 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5837 if (base_reg && index_reg && scale == 1
5838 && (index_reg == arg_pointer_rtx
5839 || index_reg == frame_pointer_rtx
5840 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5841 {
5842 rtx tmp;
5843 tmp = base, base = index, index = tmp;
5844 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5845 }
5846
5847 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5848 if ((base_reg == hard_frame_pointer_rtx
5849 || base_reg == frame_pointer_rtx
5850 || base_reg == arg_pointer_rtx) && !disp)
5851 disp = const0_rtx;
5852
5853 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5854 Avoid this by transforming to [%esi+0]. */
5855 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5856 && base_reg && !index_reg && !disp
5857 && REG_P (base_reg)
5858 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5859 disp = const0_rtx;
5860
5861 /* Special case: encode reg+reg instead of reg*2. */
5862 if (!base && index && scale && scale == 2)
5863 base = index, base_reg = index_reg, scale = 1;
5864
5865 /* Special case: scaling cannot be encoded without base or displacement. */
5866 if (!base && !disp && index && scale != 1)
5867 disp = const0_rtx;
5868
5869 out->base = base;
5870 out->index = index;
5871 out->disp = disp;
5872 out->scale = scale;
5873 out->seg = seg;
5874
5875 return retval;
5876 }
5877 \f
5878 /* Return cost of the memory address x.
5879 For i386, it is better to use a complex address than let gcc copy
5880 the address into a reg and make a new pseudo. But not if the address
5881 requires to two regs - that would mean more pseudos with longer
5882 lifetimes. */
5883 static int
5884 ix86_address_cost (rtx x)
5885 {
5886 struct ix86_address parts;
5887 int cost = 1;
5888 int ok = ix86_decompose_address (x, &parts);
5889
5890 gcc_assert (ok);
5891
5892 if (parts.base && GET_CODE (parts.base) == SUBREG)
5893 parts.base = SUBREG_REG (parts.base);
5894 if (parts.index && GET_CODE (parts.index) == SUBREG)
5895 parts.index = SUBREG_REG (parts.index);
5896
5897 /* More complex memory references are better. */
5898 if (parts.disp && parts.disp != const0_rtx)
5899 cost--;
5900 if (parts.seg != SEG_DEFAULT)
5901 cost--;
5902
5903 /* Attempt to minimize number of registers in the address. */
5904 if ((parts.base
5905 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5906 || (parts.index
5907 && (!REG_P (parts.index)
5908 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5909 cost++;
5910
5911 if (parts.base
5912 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5913 && parts.index
5914 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5915 && parts.base != parts.index)
5916 cost++;
5917
5918 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5919 since it's predecode logic can't detect the length of instructions
5920 and it degenerates to vector decoded. Increase cost of such
5921 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5922 to split such addresses or even refuse such addresses at all.
5923
5924 Following addressing modes are affected:
5925 [base+scale*index]
5926 [scale*index+disp]
5927 [base+index]
5928
5929 The first and last case may be avoidable by explicitly coding the zero in
5930 memory address, but I don't have AMD-K6 machine handy to check this
5931 theory. */
5932
5933 if (TARGET_K6
5934 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5935 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5936 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5937 cost += 10;
5938
5939 return cost;
5940 }
5941 \f
5942 /* If X is a machine specific address (i.e. a symbol or label being
5943 referenced as a displacement from the GOT implemented using an
5944 UNSPEC), then return the base term. Otherwise return X. */
5945
5946 rtx
5947 ix86_find_base_term (rtx x)
5948 {
5949 rtx term;
5950
5951 if (TARGET_64BIT)
5952 {
5953 if (GET_CODE (x) != CONST)
5954 return x;
5955 term = XEXP (x, 0);
5956 if (GET_CODE (term) == PLUS
5957 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5958 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5959 term = XEXP (term, 0);
5960 if (GET_CODE (term) != UNSPEC
5961 || XINT (term, 1) != UNSPEC_GOTPCREL)
5962 return x;
5963
5964 term = XVECEXP (term, 0, 0);
5965
5966 if (GET_CODE (term) != SYMBOL_REF
5967 && GET_CODE (term) != LABEL_REF)
5968 return x;
5969
5970 return term;
5971 }
5972
5973 term = ix86_delegitimize_address (x);
5974
5975 if (GET_CODE (term) != SYMBOL_REF
5976 && GET_CODE (term) != LABEL_REF)
5977 return x;
5978
5979 return term;
5980 }
5981
5982 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5983 this is used for to form addresses to local data when -fPIC is in
5984 use. */
5985
5986 static bool
5987 darwin_local_data_pic (rtx disp)
5988 {
5989 if (GET_CODE (disp) == MINUS)
5990 {
5991 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5992 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5993 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5994 {
5995 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5996 if (! strcmp (sym_name, "<pic base>"))
5997 return true;
5998 }
5999 }
6000
6001 return false;
6002 }
6003 \f
6004 /* Determine if a given RTX is a valid constant. We already know this
6005 satisfies CONSTANT_P. */
6006
6007 bool
6008 legitimate_constant_p (rtx x)
6009 {
6010 switch (GET_CODE (x))
6011 {
6012 case CONST:
6013 x = XEXP (x, 0);
6014
6015 if (GET_CODE (x) == PLUS)
6016 {
6017 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6018 return false;
6019 x = XEXP (x, 0);
6020 }
6021
6022 if (TARGET_MACHO && darwin_local_data_pic (x))
6023 return true;
6024
6025 /* Only some unspecs are valid as "constants". */
6026 if (GET_CODE (x) == UNSPEC)
6027 switch (XINT (x, 1))
6028 {
6029 case UNSPEC_GOTOFF:
6030 return TARGET_64BIT;
6031 case UNSPEC_TPOFF:
6032 case UNSPEC_NTPOFF:
6033 x = XVECEXP (x, 0, 0);
6034 return (GET_CODE (x) == SYMBOL_REF
6035 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6036 case UNSPEC_DTPOFF:
6037 x = XVECEXP (x, 0, 0);
6038 return (GET_CODE (x) == SYMBOL_REF
6039 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6040 default:
6041 return false;
6042 }
6043
6044 /* We must have drilled down to a symbol. */
6045 if (GET_CODE (x) == LABEL_REF)
6046 return true;
6047 if (GET_CODE (x) != SYMBOL_REF)
6048 return false;
6049 /* FALLTHRU */
6050
6051 case SYMBOL_REF:
6052 /* TLS symbols are never valid. */
6053 if (SYMBOL_REF_TLS_MODEL (x))
6054 return false;
6055 break;
6056
6057 case CONST_DOUBLE:
6058 if (GET_MODE (x) == TImode
6059 && x != CONST0_RTX (TImode)
6060 && !TARGET_64BIT)
6061 return false;
6062 break;
6063
6064 case CONST_VECTOR:
6065 if (x == CONST0_RTX (GET_MODE (x)))
6066 return true;
6067 return false;
6068
6069 default:
6070 break;
6071 }
6072
6073 /* Otherwise we handle everything else in the move patterns. */
6074 return true;
6075 }
6076
6077 /* Determine if it's legal to put X into the constant pool. This
6078 is not possible for the address of thread-local symbols, which
6079 is checked above. */
6080
6081 static bool
6082 ix86_cannot_force_const_mem (rtx x)
6083 {
6084 /* We can always put integral constants and vectors in memory. */
6085 switch (GET_CODE (x))
6086 {
6087 case CONST_INT:
6088 case CONST_DOUBLE:
6089 case CONST_VECTOR:
6090 return false;
6091
6092 default:
6093 break;
6094 }
6095 return !legitimate_constant_p (x);
6096 }
6097
6098 /* Determine if a given RTX is a valid constant address. */
6099
6100 bool
6101 constant_address_p (rtx x)
6102 {
6103 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6104 }
6105
6106 /* Nonzero if the constant value X is a legitimate general operand
6107 when generating PIC code. It is given that flag_pic is on and
6108 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6109
6110 bool
6111 legitimate_pic_operand_p (rtx x)
6112 {
6113 rtx inner;
6114
6115 switch (GET_CODE (x))
6116 {
6117 case CONST:
6118 inner = XEXP (x, 0);
6119 if (GET_CODE (inner) == PLUS
6120 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6121 inner = XEXP (inner, 0);
6122
6123 /* Only some unspecs are valid as "constants". */
6124 if (GET_CODE (inner) == UNSPEC)
6125 switch (XINT (inner, 1))
6126 {
6127 case UNSPEC_GOTOFF:
6128 return TARGET_64BIT;
6129 case UNSPEC_TPOFF:
6130 x = XVECEXP (inner, 0, 0);
6131 return (GET_CODE (x) == SYMBOL_REF
6132 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6133 default:
6134 return false;
6135 }
6136 /* FALLTHRU */
6137
6138 case SYMBOL_REF:
6139 case LABEL_REF:
6140 return legitimate_pic_address_disp_p (x);
6141
6142 default:
6143 return true;
6144 }
6145 }
6146
6147 /* Determine if a given CONST RTX is a valid memory displacement
6148 in PIC mode. */
6149
6150 int
6151 legitimate_pic_address_disp_p (rtx disp)
6152 {
6153 bool saw_plus;
6154
6155 /* In 64bit mode we can allow direct addresses of symbols and labels
6156 when they are not dynamic symbols. */
6157 if (TARGET_64BIT)
6158 {
6159 rtx op0 = disp, op1;
6160
6161 switch (GET_CODE (disp))
6162 {
6163 case LABEL_REF:
6164 return true;
6165
6166 case CONST:
6167 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6168 break;
6169 op0 = XEXP (XEXP (disp, 0), 0);
6170 op1 = XEXP (XEXP (disp, 0), 1);
6171 if (GET_CODE (op1) != CONST_INT
6172 || INTVAL (op1) >= 16*1024*1024
6173 || INTVAL (op1) < -16*1024*1024)
6174 break;
6175 if (GET_CODE (op0) == LABEL_REF)
6176 return true;
6177 if (GET_CODE (op0) != SYMBOL_REF)
6178 break;
6179 /* FALLTHRU */
6180
6181 case SYMBOL_REF:
6182 /* TLS references should always be enclosed in UNSPEC. */
6183 if (SYMBOL_REF_TLS_MODEL (op0))
6184 return false;
6185 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6186 return true;
6187 break;
6188
6189 default:
6190 break;
6191 }
6192 }
6193 if (GET_CODE (disp) != CONST)
6194 return 0;
6195 disp = XEXP (disp, 0);
6196
6197 if (TARGET_64BIT)
6198 {
6199 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6200 of GOT tables. We should not need these anyway. */
6201 if (GET_CODE (disp) != UNSPEC
6202 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6203 && XINT (disp, 1) != UNSPEC_GOTOFF))
6204 return 0;
6205
6206 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6207 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6208 return 0;
6209 return 1;
6210 }
6211
6212 saw_plus = false;
6213 if (GET_CODE (disp) == PLUS)
6214 {
6215 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6216 return 0;
6217 disp = XEXP (disp, 0);
6218 saw_plus = true;
6219 }
6220
6221 if (TARGET_MACHO && darwin_local_data_pic (disp))
6222 return 1;
6223
6224 if (GET_CODE (disp) != UNSPEC)
6225 return 0;
6226
6227 switch (XINT (disp, 1))
6228 {
6229 case UNSPEC_GOT:
6230 if (saw_plus)
6231 return false;
6232 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6233 case UNSPEC_GOTOFF:
6234 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6235 While ABI specify also 32bit relocation but we don't produce it in
6236 small PIC model at all. */
6237 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6238 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6239 && !TARGET_64BIT)
6240 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6241 return false;
6242 case UNSPEC_GOTTPOFF:
6243 case UNSPEC_GOTNTPOFF:
6244 case UNSPEC_INDNTPOFF:
6245 if (saw_plus)
6246 return false;
6247 disp = XVECEXP (disp, 0, 0);
6248 return (GET_CODE (disp) == SYMBOL_REF
6249 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6250 case UNSPEC_NTPOFF:
6251 disp = XVECEXP (disp, 0, 0);
6252 return (GET_CODE (disp) == SYMBOL_REF
6253 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6254 case UNSPEC_DTPOFF:
6255 disp = XVECEXP (disp, 0, 0);
6256 return (GET_CODE (disp) == SYMBOL_REF
6257 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6258 }
6259
6260 return 0;
6261 }
6262
6263 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6264 memory address for an instruction. The MODE argument is the machine mode
6265 for the MEM expression that wants to use this address.
6266
6267 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6268 convert common non-canonical forms to canonical form so that they will
6269 be recognized. */
6270
6271 int
6272 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6273 {
6274 struct ix86_address parts;
6275 rtx base, index, disp;
6276 HOST_WIDE_INT scale;
6277 const char *reason = NULL;
6278 rtx reason_rtx = NULL_RTX;
6279
6280 if (TARGET_DEBUG_ADDR)
6281 {
6282 fprintf (stderr,
6283 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6284 GET_MODE_NAME (mode), strict);
6285 debug_rtx (addr);
6286 }
6287
6288 if (ix86_decompose_address (addr, &parts) <= 0)
6289 {
6290 reason = "decomposition failed";
6291 goto report_error;
6292 }
6293
6294 base = parts.base;
6295 index = parts.index;
6296 disp = parts.disp;
6297 scale = parts.scale;
6298
6299 /* Validate base register.
6300
6301 Don't allow SUBREG's that span more than a word here. It can lead to spill
6302 failures when the base is one word out of a two word structure, which is
6303 represented internally as a DImode int. */
6304
6305 if (base)
6306 {
6307 rtx reg;
6308 reason_rtx = base;
6309
6310 if (REG_P (base))
6311 reg = base;
6312 else if (GET_CODE (base) == SUBREG
6313 && REG_P (SUBREG_REG (base))
6314 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6315 <= UNITS_PER_WORD)
6316 reg = SUBREG_REG (base);
6317 else
6318 {
6319 reason = "base is not a register";
6320 goto report_error;
6321 }
6322
6323 if (GET_MODE (base) != Pmode)
6324 {
6325 reason = "base is not in Pmode";
6326 goto report_error;
6327 }
6328
6329 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6330 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6331 {
6332 reason = "base is not valid";
6333 goto report_error;
6334 }
6335 }
6336
6337 /* Validate index register.
6338
6339 Don't allow SUBREG's that span more than a word here -- same as above. */
6340
6341 if (index)
6342 {
6343 rtx reg;
6344 reason_rtx = index;
6345
6346 if (REG_P (index))
6347 reg = index;
6348 else if (GET_CODE (index) == SUBREG
6349 && REG_P (SUBREG_REG (index))
6350 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6351 <= UNITS_PER_WORD)
6352 reg = SUBREG_REG (index);
6353 else
6354 {
6355 reason = "index is not a register";
6356 goto report_error;
6357 }
6358
6359 if (GET_MODE (index) != Pmode)
6360 {
6361 reason = "index is not in Pmode";
6362 goto report_error;
6363 }
6364
6365 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6366 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6367 {
6368 reason = "index is not valid";
6369 goto report_error;
6370 }
6371 }
6372
6373 /* Validate scale factor. */
6374 if (scale != 1)
6375 {
6376 reason_rtx = GEN_INT (scale);
6377 if (!index)
6378 {
6379 reason = "scale without index";
6380 goto report_error;
6381 }
6382
6383 if (scale != 2 && scale != 4 && scale != 8)
6384 {
6385 reason = "scale is not a valid multiplier";
6386 goto report_error;
6387 }
6388 }
6389
6390 /* Validate displacement. */
6391 if (disp)
6392 {
6393 reason_rtx = disp;
6394
6395 if (GET_CODE (disp) == CONST
6396 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6397 switch (XINT (XEXP (disp, 0), 1))
6398 {
6399 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6400 used. While ABI specify also 32bit relocations, we don't produce
6401 them at all and use IP relative instead. */
6402 case UNSPEC_GOT:
6403 case UNSPEC_GOTOFF:
6404 gcc_assert (flag_pic);
6405 if (!TARGET_64BIT)
6406 goto is_legitimate_pic;
6407 reason = "64bit address unspec";
6408 goto report_error;
6409
6410 case UNSPEC_GOTPCREL:
6411 gcc_assert (flag_pic);
6412 goto is_legitimate_pic;
6413
6414 case UNSPEC_GOTTPOFF:
6415 case UNSPEC_GOTNTPOFF:
6416 case UNSPEC_INDNTPOFF:
6417 case UNSPEC_NTPOFF:
6418 case UNSPEC_DTPOFF:
6419 break;
6420
6421 default:
6422 reason = "invalid address unspec";
6423 goto report_error;
6424 }
6425
6426 else if (flag_pic && (SYMBOLIC_CONST (disp)
6427 #if TARGET_MACHO
6428 && !machopic_operand_p (disp)
6429 #endif
6430 ))
6431 {
6432 is_legitimate_pic:
6433 if (TARGET_64BIT && (index || base))
6434 {
6435 /* foo@dtpoff(%rX) is ok. */
6436 if (GET_CODE (disp) != CONST
6437 || GET_CODE (XEXP (disp, 0)) != PLUS
6438 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6439 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6440 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6441 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6442 {
6443 reason = "non-constant pic memory reference";
6444 goto report_error;
6445 }
6446 }
6447 else if (! legitimate_pic_address_disp_p (disp))
6448 {
6449 reason = "displacement is an invalid pic construct";
6450 goto report_error;
6451 }
6452
6453 /* This code used to verify that a symbolic pic displacement
6454 includes the pic_offset_table_rtx register.
6455
6456 While this is good idea, unfortunately these constructs may
6457 be created by "adds using lea" optimization for incorrect
6458 code like:
6459
6460 int a;
6461 int foo(int i)
6462 {
6463 return *(&a+i);
6464 }
6465
6466 This code is nonsensical, but results in addressing
6467 GOT table with pic_offset_table_rtx base. We can't
6468 just refuse it easily, since it gets matched by
6469 "addsi3" pattern, that later gets split to lea in the
6470 case output register differs from input. While this
6471 can be handled by separate addsi pattern for this case
6472 that never results in lea, this seems to be easier and
6473 correct fix for crash to disable this test. */
6474 }
6475 else if (GET_CODE (disp) != LABEL_REF
6476 && GET_CODE (disp) != CONST_INT
6477 && (GET_CODE (disp) != CONST
6478 || !legitimate_constant_p (disp))
6479 && (GET_CODE (disp) != SYMBOL_REF
6480 || !legitimate_constant_p (disp)))
6481 {
6482 reason = "displacement is not constant";
6483 goto report_error;
6484 }
6485 else if (TARGET_64BIT
6486 && !x86_64_immediate_operand (disp, VOIDmode))
6487 {
6488 reason = "displacement is out of range";
6489 goto report_error;
6490 }
6491 }
6492
6493 /* Everything looks valid. */
6494 if (TARGET_DEBUG_ADDR)
6495 fprintf (stderr, "Success.\n");
6496 return TRUE;
6497
6498 report_error:
6499 if (TARGET_DEBUG_ADDR)
6500 {
6501 fprintf (stderr, "Error: %s\n", reason);
6502 debug_rtx (reason_rtx);
6503 }
6504 return FALSE;
6505 }
6506 \f
6507 /* Return a unique alias set for the GOT. */
6508
6509 static HOST_WIDE_INT
6510 ix86_GOT_alias_set (void)
6511 {
6512 static HOST_WIDE_INT set = -1;
6513 if (set == -1)
6514 set = new_alias_set ();
6515 return set;
6516 }
6517
6518 /* Return a legitimate reference for ORIG (an address) using the
6519 register REG. If REG is 0, a new pseudo is generated.
6520
6521 There are two types of references that must be handled:
6522
6523 1. Global data references must load the address from the GOT, via
6524 the PIC reg. An insn is emitted to do this load, and the reg is
6525 returned.
6526
6527 2. Static data references, constant pool addresses, and code labels
6528 compute the address as an offset from the GOT, whose base is in
6529 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6530 differentiate them from global data objects. The returned
6531 address is the PIC reg + an unspec constant.
6532
6533 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6534 reg also appears in the address. */
6535
6536 static rtx
6537 legitimize_pic_address (rtx orig, rtx reg)
6538 {
6539 rtx addr = orig;
6540 rtx new = orig;
6541 rtx base;
6542
6543 #if TARGET_MACHO
6544 if (reg == 0)
6545 reg = gen_reg_rtx (Pmode);
6546 /* Use the generic Mach-O PIC machinery. */
6547 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6548 #endif
6549
6550 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6551 new = addr;
6552 else if (TARGET_64BIT
6553 && ix86_cmodel != CM_SMALL_PIC
6554 && local_symbolic_operand (addr, Pmode))
6555 {
6556 rtx tmpreg;
6557 /* This symbol may be referenced via a displacement from the PIC
6558 base address (@GOTOFF). */
6559
6560 if (reload_in_progress)
6561 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6562 if (GET_CODE (addr) == CONST)
6563 addr = XEXP (addr, 0);
6564 if (GET_CODE (addr) == PLUS)
6565 {
6566 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6567 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6568 }
6569 else
6570 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6571 new = gen_rtx_CONST (Pmode, new);
6572 if (!reg)
6573 tmpreg = gen_reg_rtx (Pmode);
6574 else
6575 tmpreg = reg;
6576 emit_move_insn (tmpreg, new);
6577
6578 if (reg != 0)
6579 {
6580 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6581 tmpreg, 1, OPTAB_DIRECT);
6582 new = reg;
6583 }
6584 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6585 }
6586 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6587 {
6588 /* This symbol may be referenced via a displacement from the PIC
6589 base address (@GOTOFF). */
6590
6591 if (reload_in_progress)
6592 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6593 if (GET_CODE (addr) == CONST)
6594 addr = XEXP (addr, 0);
6595 if (GET_CODE (addr) == PLUS)
6596 {
6597 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6598 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6599 }
6600 else
6601 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6602 new = gen_rtx_CONST (Pmode, new);
6603 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6604
6605 if (reg != 0)
6606 {
6607 emit_move_insn (reg, new);
6608 new = reg;
6609 }
6610 }
6611 else if (GET_CODE (addr) == SYMBOL_REF)
6612 {
6613 if (TARGET_64BIT)
6614 {
6615 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6616 new = gen_rtx_CONST (Pmode, new);
6617 new = gen_const_mem (Pmode, new);
6618 set_mem_alias_set (new, ix86_GOT_alias_set ());
6619
6620 if (reg == 0)
6621 reg = gen_reg_rtx (Pmode);
6622 /* Use directly gen_movsi, otherwise the address is loaded
6623 into register for CSE. We don't want to CSE this addresses,
6624 instead we CSE addresses from the GOT table, so skip this. */
6625 emit_insn (gen_movsi (reg, new));
6626 new = reg;
6627 }
6628 else
6629 {
6630 /* This symbol must be referenced via a load from the
6631 Global Offset Table (@GOT). */
6632
6633 if (reload_in_progress)
6634 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6635 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6636 new = gen_rtx_CONST (Pmode, new);
6637 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6638 new = gen_const_mem (Pmode, new);
6639 set_mem_alias_set (new, ix86_GOT_alias_set ());
6640
6641 if (reg == 0)
6642 reg = gen_reg_rtx (Pmode);
6643 emit_move_insn (reg, new);
6644 new = reg;
6645 }
6646 }
6647 else
6648 {
6649 if (GET_CODE (addr) == CONST_INT
6650 && !x86_64_immediate_operand (addr, VOIDmode))
6651 {
6652 if (reg)
6653 {
6654 emit_move_insn (reg, addr);
6655 new = reg;
6656 }
6657 else
6658 new = force_reg (Pmode, addr);
6659 }
6660 else if (GET_CODE (addr) == CONST)
6661 {
6662 addr = XEXP (addr, 0);
6663
6664 /* We must match stuff we generate before. Assume the only
6665 unspecs that can get here are ours. Not that we could do
6666 anything with them anyway.... */
6667 if (GET_CODE (addr) == UNSPEC
6668 || (GET_CODE (addr) == PLUS
6669 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6670 return orig;
6671 gcc_assert (GET_CODE (addr) == PLUS);
6672 }
6673 if (GET_CODE (addr) == PLUS)
6674 {
6675 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6676
6677 /* Check first to see if this is a constant offset from a @GOTOFF
6678 symbol reference. */
6679 if (local_symbolic_operand (op0, Pmode)
6680 && GET_CODE (op1) == CONST_INT)
6681 {
6682 if (!TARGET_64BIT)
6683 {
6684 if (reload_in_progress)
6685 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6686 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6687 UNSPEC_GOTOFF);
6688 new = gen_rtx_PLUS (Pmode, new, op1);
6689 new = gen_rtx_CONST (Pmode, new);
6690 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6691
6692 if (reg != 0)
6693 {
6694 emit_move_insn (reg, new);
6695 new = reg;
6696 }
6697 }
6698 else
6699 {
6700 if (INTVAL (op1) < -16*1024*1024
6701 || INTVAL (op1) >= 16*1024*1024)
6702 {
6703 if (!x86_64_immediate_operand (op1, Pmode))
6704 op1 = force_reg (Pmode, op1);
6705 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6706 }
6707 }
6708 }
6709 else
6710 {
6711 base = legitimize_pic_address (XEXP (addr, 0), reg);
6712 new = legitimize_pic_address (XEXP (addr, 1),
6713 base == reg ? NULL_RTX : reg);
6714
6715 if (GET_CODE (new) == CONST_INT)
6716 new = plus_constant (base, INTVAL (new));
6717 else
6718 {
6719 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6720 {
6721 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6722 new = XEXP (new, 1);
6723 }
6724 new = gen_rtx_PLUS (Pmode, base, new);
6725 }
6726 }
6727 }
6728 }
6729 return new;
6730 }
6731 \f
6732 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6733
6734 static rtx
6735 get_thread_pointer (int to_reg)
6736 {
6737 rtx tp, reg, insn;
6738
6739 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6740 if (!to_reg)
6741 return tp;
6742
6743 reg = gen_reg_rtx (Pmode);
6744 insn = gen_rtx_SET (VOIDmode, reg, tp);
6745 insn = emit_insn (insn);
6746
6747 return reg;
6748 }
6749
6750 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6751 false if we expect this to be used for a memory address and true if
6752 we expect to load the address into a register. */
6753
6754 static rtx
6755 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6756 {
6757 rtx dest, base, off, pic, tp;
6758 int type;
6759
6760 switch (model)
6761 {
6762 case TLS_MODEL_GLOBAL_DYNAMIC:
6763 dest = gen_reg_rtx (Pmode);
6764 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6765
6766 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6767 {
6768 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6769
6770 start_sequence ();
6771 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6772 insns = get_insns ();
6773 end_sequence ();
6774
6775 emit_libcall_block (insns, dest, rax, x);
6776 }
6777 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6778 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6779 else
6780 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6781
6782 if (TARGET_GNU2_TLS)
6783 {
6784 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6785
6786 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6787 }
6788 break;
6789
6790 case TLS_MODEL_LOCAL_DYNAMIC:
6791 base = gen_reg_rtx (Pmode);
6792 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6793
6794 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6795 {
6796 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6797
6798 start_sequence ();
6799 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6800 insns = get_insns ();
6801 end_sequence ();
6802
6803 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6804 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6805 emit_libcall_block (insns, base, rax, note);
6806 }
6807 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6808 emit_insn (gen_tls_local_dynamic_base_64 (base));
6809 else
6810 emit_insn (gen_tls_local_dynamic_base_32 (base));
6811
6812 if (TARGET_GNU2_TLS)
6813 {
6814 rtx x = ix86_tls_module_base ();
6815
6816 set_unique_reg_note (get_last_insn (), REG_EQUIV,
6817 gen_rtx_MINUS (Pmode, x, tp));
6818 }
6819
6820 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6821 off = gen_rtx_CONST (Pmode, off);
6822
6823 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6824
6825 if (TARGET_GNU2_TLS)
6826 {
6827 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
6828
6829 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6830 }
6831
6832 break;
6833
6834 case TLS_MODEL_INITIAL_EXEC:
6835 if (TARGET_64BIT)
6836 {
6837 pic = NULL;
6838 type = UNSPEC_GOTNTPOFF;
6839 }
6840 else if (flag_pic)
6841 {
6842 if (reload_in_progress)
6843 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6844 pic = pic_offset_table_rtx;
6845 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6846 }
6847 else if (!TARGET_ANY_GNU_TLS)
6848 {
6849 pic = gen_reg_rtx (Pmode);
6850 emit_insn (gen_set_got (pic));
6851 type = UNSPEC_GOTTPOFF;
6852 }
6853 else
6854 {
6855 pic = NULL;
6856 type = UNSPEC_INDNTPOFF;
6857 }
6858
6859 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6860 off = gen_rtx_CONST (Pmode, off);
6861 if (pic)
6862 off = gen_rtx_PLUS (Pmode, pic, off);
6863 off = gen_const_mem (Pmode, off);
6864 set_mem_alias_set (off, ix86_GOT_alias_set ());
6865
6866 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6867 {
6868 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6869 off = force_reg (Pmode, off);
6870 return gen_rtx_PLUS (Pmode, base, off);
6871 }
6872 else
6873 {
6874 base = get_thread_pointer (true);
6875 dest = gen_reg_rtx (Pmode);
6876 emit_insn (gen_subsi3 (dest, base, off));
6877 }
6878 break;
6879
6880 case TLS_MODEL_LOCAL_EXEC:
6881 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6882 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6883 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6884 off = gen_rtx_CONST (Pmode, off);
6885
6886 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6887 {
6888 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6889 return gen_rtx_PLUS (Pmode, base, off);
6890 }
6891 else
6892 {
6893 base = get_thread_pointer (true);
6894 dest = gen_reg_rtx (Pmode);
6895 emit_insn (gen_subsi3 (dest, base, off));
6896 }
6897 break;
6898
6899 default:
6900 gcc_unreachable ();
6901 }
6902
6903 return dest;
6904 }
6905
6906 /* Try machine-dependent ways of modifying an illegitimate address
6907 to be legitimate. If we find one, return the new, valid address.
6908 This macro is used in only one place: `memory_address' in explow.c.
6909
6910 OLDX is the address as it was before break_out_memory_refs was called.
6911 In some cases it is useful to look at this to decide what needs to be done.
6912
6913 MODE and WIN are passed so that this macro can use
6914 GO_IF_LEGITIMATE_ADDRESS.
6915
6916 It is always safe for this macro to do nothing. It exists to recognize
6917 opportunities to optimize the output.
6918
6919 For the 80386, we handle X+REG by loading X into a register R and
6920 using R+REG. R will go in a general reg and indexing will be used.
6921 However, if REG is a broken-out memory address or multiplication,
6922 nothing needs to be done because REG can certainly go in a general reg.
6923
6924 When -fpic is used, special handling is needed for symbolic references.
6925 See comments by legitimize_pic_address in i386.c for details. */
6926
6927 rtx
6928 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6929 {
6930 int changed = 0;
6931 unsigned log;
6932
6933 if (TARGET_DEBUG_ADDR)
6934 {
6935 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6936 GET_MODE_NAME (mode));
6937 debug_rtx (x);
6938 }
6939
6940 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6941 if (log)
6942 return legitimize_tls_address (x, log, false);
6943 if (GET_CODE (x) == CONST
6944 && GET_CODE (XEXP (x, 0)) == PLUS
6945 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6946 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6947 {
6948 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6949 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6950 }
6951
6952 if (flag_pic && SYMBOLIC_CONST (x))
6953 return legitimize_pic_address (x, 0);
6954
6955 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6956 if (GET_CODE (x) == ASHIFT
6957 && GET_CODE (XEXP (x, 1)) == CONST_INT
6958 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
6959 {
6960 changed = 1;
6961 log = INTVAL (XEXP (x, 1));
6962 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6963 GEN_INT (1 << log));
6964 }
6965
6966 if (GET_CODE (x) == PLUS)
6967 {
6968 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6969
6970 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6971 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6972 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
6973 {
6974 changed = 1;
6975 log = INTVAL (XEXP (XEXP (x, 0), 1));
6976 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6977 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6978 GEN_INT (1 << log));
6979 }
6980
6981 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6982 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6983 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
6984 {
6985 changed = 1;
6986 log = INTVAL (XEXP (XEXP (x, 1), 1));
6987 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6988 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6989 GEN_INT (1 << log));
6990 }
6991
6992 /* Put multiply first if it isn't already. */
6993 if (GET_CODE (XEXP (x, 1)) == MULT)
6994 {
6995 rtx tmp = XEXP (x, 0);
6996 XEXP (x, 0) = XEXP (x, 1);
6997 XEXP (x, 1) = tmp;
6998 changed = 1;
6999 }
7000
7001 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7002 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7003 created by virtual register instantiation, register elimination, and
7004 similar optimizations. */
7005 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7006 {
7007 changed = 1;
7008 x = gen_rtx_PLUS (Pmode,
7009 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7010 XEXP (XEXP (x, 1), 0)),
7011 XEXP (XEXP (x, 1), 1));
7012 }
7013
7014 /* Canonicalize
7015 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7016 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7017 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7018 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7019 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7020 && CONSTANT_P (XEXP (x, 1)))
7021 {
7022 rtx constant;
7023 rtx other = NULL_RTX;
7024
7025 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7026 {
7027 constant = XEXP (x, 1);
7028 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7029 }
7030 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7031 {
7032 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7033 other = XEXP (x, 1);
7034 }
7035 else
7036 constant = 0;
7037
7038 if (constant)
7039 {
7040 changed = 1;
7041 x = gen_rtx_PLUS (Pmode,
7042 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7043 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7044 plus_constant (other, INTVAL (constant)));
7045 }
7046 }
7047
7048 if (changed && legitimate_address_p (mode, x, FALSE))
7049 return x;
7050
7051 if (GET_CODE (XEXP (x, 0)) == MULT)
7052 {
7053 changed = 1;
7054 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7055 }
7056
7057 if (GET_CODE (XEXP (x, 1)) == MULT)
7058 {
7059 changed = 1;
7060 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7061 }
7062
7063 if (changed
7064 && GET_CODE (XEXP (x, 1)) == REG
7065 && GET_CODE (XEXP (x, 0)) == REG)
7066 return x;
7067
7068 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7069 {
7070 changed = 1;
7071 x = legitimize_pic_address (x, 0);
7072 }
7073
7074 if (changed && legitimate_address_p (mode, x, FALSE))
7075 return x;
7076
7077 if (GET_CODE (XEXP (x, 0)) == REG)
7078 {
7079 rtx temp = gen_reg_rtx (Pmode);
7080 rtx val = force_operand (XEXP (x, 1), temp);
7081 if (val != temp)
7082 emit_move_insn (temp, val);
7083
7084 XEXP (x, 1) = temp;
7085 return x;
7086 }
7087
7088 else if (GET_CODE (XEXP (x, 1)) == REG)
7089 {
7090 rtx temp = gen_reg_rtx (Pmode);
7091 rtx val = force_operand (XEXP (x, 0), temp);
7092 if (val != temp)
7093 emit_move_insn (temp, val);
7094
7095 XEXP (x, 0) = temp;
7096 return x;
7097 }
7098 }
7099
7100 return x;
7101 }
7102 \f
7103 /* Print an integer constant expression in assembler syntax. Addition
7104 and subtraction are the only arithmetic that may appear in these
7105 expressions. FILE is the stdio stream to write to, X is the rtx, and
7106 CODE is the operand print code from the output string. */
7107
7108 static void
7109 output_pic_addr_const (FILE *file, rtx x, int code)
7110 {
7111 char buf[256];
7112
7113 switch (GET_CODE (x))
7114 {
7115 case PC:
7116 gcc_assert (flag_pic);
7117 putc ('.', file);
7118 break;
7119
7120 case SYMBOL_REF:
7121 output_addr_const (file, x);
7122 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7123 fputs ("@PLT", file);
7124 break;
7125
7126 case LABEL_REF:
7127 x = XEXP (x, 0);
7128 /* FALLTHRU */
7129 case CODE_LABEL:
7130 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7131 assemble_name (asm_out_file, buf);
7132 break;
7133
7134 case CONST_INT:
7135 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7136 break;
7137
7138 case CONST:
7139 /* This used to output parentheses around the expression,
7140 but that does not work on the 386 (either ATT or BSD assembler). */
7141 output_pic_addr_const (file, XEXP (x, 0), code);
7142 break;
7143
7144 case CONST_DOUBLE:
7145 if (GET_MODE (x) == VOIDmode)
7146 {
7147 /* We can use %d if the number is <32 bits and positive. */
7148 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7149 fprintf (file, "0x%lx%08lx",
7150 (unsigned long) CONST_DOUBLE_HIGH (x),
7151 (unsigned long) CONST_DOUBLE_LOW (x));
7152 else
7153 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7154 }
7155 else
7156 /* We can't handle floating point constants;
7157 PRINT_OPERAND must handle them. */
7158 output_operand_lossage ("floating constant misused");
7159 break;
7160
7161 case PLUS:
7162 /* Some assemblers need integer constants to appear first. */
7163 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7164 {
7165 output_pic_addr_const (file, XEXP (x, 0), code);
7166 putc ('+', file);
7167 output_pic_addr_const (file, XEXP (x, 1), code);
7168 }
7169 else
7170 {
7171 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7172 output_pic_addr_const (file, XEXP (x, 1), code);
7173 putc ('+', file);
7174 output_pic_addr_const (file, XEXP (x, 0), code);
7175 }
7176 break;
7177
7178 case MINUS:
7179 if (!TARGET_MACHO)
7180 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7181 output_pic_addr_const (file, XEXP (x, 0), code);
7182 putc ('-', file);
7183 output_pic_addr_const (file, XEXP (x, 1), code);
7184 if (!TARGET_MACHO)
7185 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7186 break;
7187
7188 case UNSPEC:
7189 gcc_assert (XVECLEN (x, 0) == 1);
7190 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7191 switch (XINT (x, 1))
7192 {
7193 case UNSPEC_GOT:
7194 fputs ("@GOT", file);
7195 break;
7196 case UNSPEC_GOTOFF:
7197 fputs ("@GOTOFF", file);
7198 break;
7199 case UNSPEC_GOTPCREL:
7200 fputs ("@GOTPCREL(%rip)", file);
7201 break;
7202 case UNSPEC_GOTTPOFF:
7203 /* FIXME: This might be @TPOFF in Sun ld too. */
7204 fputs ("@GOTTPOFF", file);
7205 break;
7206 case UNSPEC_TPOFF:
7207 fputs ("@TPOFF", file);
7208 break;
7209 case UNSPEC_NTPOFF:
7210 if (TARGET_64BIT)
7211 fputs ("@TPOFF", file);
7212 else
7213 fputs ("@NTPOFF", file);
7214 break;
7215 case UNSPEC_DTPOFF:
7216 fputs ("@DTPOFF", file);
7217 break;
7218 case UNSPEC_GOTNTPOFF:
7219 if (TARGET_64BIT)
7220 fputs ("@GOTTPOFF(%rip)", file);
7221 else
7222 fputs ("@GOTNTPOFF", file);
7223 break;
7224 case UNSPEC_INDNTPOFF:
7225 fputs ("@INDNTPOFF", file);
7226 break;
7227 default:
7228 output_operand_lossage ("invalid UNSPEC as operand");
7229 break;
7230 }
7231 break;
7232
7233 default:
7234 output_operand_lossage ("invalid expression as operand");
7235 }
7236 }
7237
7238 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7239 We need to emit DTP-relative relocations. */
7240
7241 static void
7242 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7243 {
7244 fputs (ASM_LONG, file);
7245 output_addr_const (file, x);
7246 fputs ("@DTPOFF", file);
7247 switch (size)
7248 {
7249 case 4:
7250 break;
7251 case 8:
7252 fputs (", 0", file);
7253 break;
7254 default:
7255 gcc_unreachable ();
7256 }
7257 }
7258
7259 /* In the name of slightly smaller debug output, and to cater to
7260 general assembler lossage, recognize PIC+GOTOFF and turn it back
7261 into a direct symbol reference.
7262
7263 On Darwin, this is necessary to avoid a crash, because Darwin
7264 has a different PIC label for each routine but the DWARF debugging
7265 information is not associated with any particular routine, so it's
7266 necessary to remove references to the PIC label from RTL stored by
7267 the DWARF output code. */
7268
7269 static rtx
7270 ix86_delegitimize_address (rtx orig_x)
7271 {
7272 rtx x = orig_x;
7273 /* reg_addend is NULL or a multiple of some register. */
7274 rtx reg_addend = NULL_RTX;
7275 /* const_addend is NULL or a const_int. */
7276 rtx const_addend = NULL_RTX;
7277 /* This is the result, or NULL. */
7278 rtx result = NULL_RTX;
7279
7280 if (GET_CODE (x) == MEM)
7281 x = XEXP (x, 0);
7282
7283 if (TARGET_64BIT)
7284 {
7285 if (GET_CODE (x) != CONST
7286 || GET_CODE (XEXP (x, 0)) != UNSPEC
7287 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7288 || GET_CODE (orig_x) != MEM)
7289 return orig_x;
7290 return XVECEXP (XEXP (x, 0), 0, 0);
7291 }
7292
7293 if (GET_CODE (x) != PLUS
7294 || GET_CODE (XEXP (x, 1)) != CONST)
7295 return orig_x;
7296
7297 if (GET_CODE (XEXP (x, 0)) == REG
7298 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7299 /* %ebx + GOT/GOTOFF */
7300 ;
7301 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7302 {
7303 /* %ebx + %reg * scale + GOT/GOTOFF */
7304 reg_addend = XEXP (x, 0);
7305 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7306 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7307 reg_addend = XEXP (reg_addend, 1);
7308 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7309 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7310 reg_addend = XEXP (reg_addend, 0);
7311 else
7312 return orig_x;
7313 if (GET_CODE (reg_addend) != REG
7314 && GET_CODE (reg_addend) != MULT
7315 && GET_CODE (reg_addend) != ASHIFT)
7316 return orig_x;
7317 }
7318 else
7319 return orig_x;
7320
7321 x = XEXP (XEXP (x, 1), 0);
7322 if (GET_CODE (x) == PLUS
7323 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7324 {
7325 const_addend = XEXP (x, 1);
7326 x = XEXP (x, 0);
7327 }
7328
7329 if (GET_CODE (x) == UNSPEC
7330 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7331 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7332 result = XVECEXP (x, 0, 0);
7333
7334 if (TARGET_MACHO && darwin_local_data_pic (x)
7335 && GET_CODE (orig_x) != MEM)
7336 result = XEXP (x, 0);
7337
7338 if (! result)
7339 return orig_x;
7340
7341 if (const_addend)
7342 result = gen_rtx_PLUS (Pmode, result, const_addend);
7343 if (reg_addend)
7344 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7345 return result;
7346 }
7347 \f
7348 static void
7349 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7350 int fp, FILE *file)
7351 {
7352 const char *suffix;
7353
7354 if (mode == CCFPmode || mode == CCFPUmode)
7355 {
7356 enum rtx_code second_code, bypass_code;
7357 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7358 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7359 code = ix86_fp_compare_code_to_integer (code);
7360 mode = CCmode;
7361 }
7362 if (reverse)
7363 code = reverse_condition (code);
7364
7365 switch (code)
7366 {
7367 case EQ:
7368 suffix = "e";
7369 break;
7370 case NE:
7371 suffix = "ne";
7372 break;
7373 case GT:
7374 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7375 suffix = "g";
7376 break;
7377 case GTU:
7378 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7379 Those same assemblers have the same but opposite lossage on cmov. */
7380 gcc_assert (mode == CCmode);
7381 suffix = fp ? "nbe" : "a";
7382 break;
7383 case LT:
7384 switch (mode)
7385 {
7386 case CCNOmode:
7387 case CCGOCmode:
7388 suffix = "s";
7389 break;
7390
7391 case CCmode:
7392 case CCGCmode:
7393 suffix = "l";
7394 break;
7395
7396 default:
7397 gcc_unreachable ();
7398 }
7399 break;
7400 case LTU:
7401 gcc_assert (mode == CCmode);
7402 suffix = "b";
7403 break;
7404 case GE:
7405 switch (mode)
7406 {
7407 case CCNOmode:
7408 case CCGOCmode:
7409 suffix = "ns";
7410 break;
7411
7412 case CCmode:
7413 case CCGCmode:
7414 suffix = "ge";
7415 break;
7416
7417 default:
7418 gcc_unreachable ();
7419 }
7420 break;
7421 case GEU:
7422 /* ??? As above. */
7423 gcc_assert (mode == CCmode);
7424 suffix = fp ? "nb" : "ae";
7425 break;
7426 case LE:
7427 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7428 suffix = "le";
7429 break;
7430 case LEU:
7431 gcc_assert (mode == CCmode);
7432 suffix = "be";
7433 break;
7434 case UNORDERED:
7435 suffix = fp ? "u" : "p";
7436 break;
7437 case ORDERED:
7438 suffix = fp ? "nu" : "np";
7439 break;
7440 default:
7441 gcc_unreachable ();
7442 }
7443 fputs (suffix, file);
7444 }
7445
7446 /* Print the name of register X to FILE based on its machine mode and number.
7447 If CODE is 'w', pretend the mode is HImode.
7448 If CODE is 'b', pretend the mode is QImode.
7449 If CODE is 'k', pretend the mode is SImode.
7450 If CODE is 'q', pretend the mode is DImode.
7451 If CODE is 'h', pretend the reg is the 'high' byte register.
7452 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7453
7454 void
7455 print_reg (rtx x, int code, FILE *file)
7456 {
7457 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7458 && REGNO (x) != FRAME_POINTER_REGNUM
7459 && REGNO (x) != FLAGS_REG
7460 && REGNO (x) != FPSR_REG);
7461
7462 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7463 putc ('%', file);
7464
7465 if (code == 'w' || MMX_REG_P (x))
7466 code = 2;
7467 else if (code == 'b')
7468 code = 1;
7469 else if (code == 'k')
7470 code = 4;
7471 else if (code == 'q')
7472 code = 8;
7473 else if (code == 'y')
7474 code = 3;
7475 else if (code == 'h')
7476 code = 0;
7477 else
7478 code = GET_MODE_SIZE (GET_MODE (x));
7479
7480 /* Irritatingly, AMD extended registers use different naming convention
7481 from the normal registers. */
7482 if (REX_INT_REG_P (x))
7483 {
7484 gcc_assert (TARGET_64BIT);
7485 switch (code)
7486 {
7487 case 0:
7488 error ("extended registers have no high halves");
7489 break;
7490 case 1:
7491 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7492 break;
7493 case 2:
7494 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7495 break;
7496 case 4:
7497 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7498 break;
7499 case 8:
7500 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7501 break;
7502 default:
7503 error ("unsupported operand size for extended register");
7504 break;
7505 }
7506 return;
7507 }
7508 switch (code)
7509 {
7510 case 3:
7511 if (STACK_TOP_P (x))
7512 {
7513 fputs ("st(0)", file);
7514 break;
7515 }
7516 /* FALLTHRU */
7517 case 8:
7518 case 4:
7519 case 12:
7520 if (! ANY_FP_REG_P (x))
7521 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7522 /* FALLTHRU */
7523 case 16:
7524 case 2:
7525 normal:
7526 fputs (hi_reg_name[REGNO (x)], file);
7527 break;
7528 case 1:
7529 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7530 goto normal;
7531 fputs (qi_reg_name[REGNO (x)], file);
7532 break;
7533 case 0:
7534 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7535 goto normal;
7536 fputs (qi_high_reg_name[REGNO (x)], file);
7537 break;
7538 default:
7539 gcc_unreachable ();
7540 }
7541 }
7542
7543 /* Locate some local-dynamic symbol still in use by this function
7544 so that we can print its name in some tls_local_dynamic_base
7545 pattern. */
7546
7547 static const char *
7548 get_some_local_dynamic_name (void)
7549 {
7550 rtx insn;
7551
7552 if (cfun->machine->some_ld_name)
7553 return cfun->machine->some_ld_name;
7554
7555 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7556 if (INSN_P (insn)
7557 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7558 return cfun->machine->some_ld_name;
7559
7560 gcc_unreachable ();
7561 }
7562
7563 static int
7564 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7565 {
7566 rtx x = *px;
7567
7568 if (GET_CODE (x) == SYMBOL_REF
7569 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7570 {
7571 cfun->machine->some_ld_name = XSTR (x, 0);
7572 return 1;
7573 }
7574
7575 return 0;
7576 }
7577
7578 /* Meaning of CODE:
7579 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7580 C -- print opcode suffix for set/cmov insn.
7581 c -- like C, but print reversed condition
7582 F,f -- likewise, but for floating-point.
7583 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7584 otherwise nothing
7585 R -- print the prefix for register names.
7586 z -- print the opcode suffix for the size of the current operand.
7587 * -- print a star (in certain assembler syntax)
7588 A -- print an absolute memory reference.
7589 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7590 s -- print a shift double count, followed by the assemblers argument
7591 delimiter.
7592 b -- print the QImode name of the register for the indicated operand.
7593 %b0 would print %al if operands[0] is reg 0.
7594 w -- likewise, print the HImode name of the register.
7595 k -- likewise, print the SImode name of the register.
7596 q -- likewise, print the DImode name of the register.
7597 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7598 y -- print "st(0)" instead of "st" as a register.
7599 D -- print condition for SSE cmp instruction.
7600 P -- if PIC, print an @PLT suffix.
7601 X -- don't print any sort of PIC '@' suffix for a symbol.
7602 & -- print some in-use local-dynamic symbol name.
7603 H -- print a memory address offset by 8; used for sse high-parts
7604 */
7605
7606 void
7607 print_operand (FILE *file, rtx x, int code)
7608 {
7609 if (code)
7610 {
7611 switch (code)
7612 {
7613 case '*':
7614 if (ASSEMBLER_DIALECT == ASM_ATT)
7615 putc ('*', file);
7616 return;
7617
7618 case '&':
7619 assemble_name (file, get_some_local_dynamic_name ());
7620 return;
7621
7622 case 'A':
7623 switch (ASSEMBLER_DIALECT)
7624 {
7625 case ASM_ATT:
7626 putc ('*', file);
7627 break;
7628
7629 case ASM_INTEL:
7630 /* Intel syntax. For absolute addresses, registers should not
7631 be surrounded by braces. */
7632 if (GET_CODE (x) != REG)
7633 {
7634 putc ('[', file);
7635 PRINT_OPERAND (file, x, 0);
7636 putc (']', file);
7637 return;
7638 }
7639 break;
7640
7641 default:
7642 gcc_unreachable ();
7643 }
7644
7645 PRINT_OPERAND (file, x, 0);
7646 return;
7647
7648
7649 case 'L':
7650 if (ASSEMBLER_DIALECT == ASM_ATT)
7651 putc ('l', file);
7652 return;
7653
7654 case 'W':
7655 if (ASSEMBLER_DIALECT == ASM_ATT)
7656 putc ('w', file);
7657 return;
7658
7659 case 'B':
7660 if (ASSEMBLER_DIALECT == ASM_ATT)
7661 putc ('b', file);
7662 return;
7663
7664 case 'Q':
7665 if (ASSEMBLER_DIALECT == ASM_ATT)
7666 putc ('l', file);
7667 return;
7668
7669 case 'S':
7670 if (ASSEMBLER_DIALECT == ASM_ATT)
7671 putc ('s', file);
7672 return;
7673
7674 case 'T':
7675 if (ASSEMBLER_DIALECT == ASM_ATT)
7676 putc ('t', file);
7677 return;
7678
7679 case 'z':
7680 /* 387 opcodes don't get size suffixes if the operands are
7681 registers. */
7682 if (STACK_REG_P (x))
7683 return;
7684
7685 /* Likewise if using Intel opcodes. */
7686 if (ASSEMBLER_DIALECT == ASM_INTEL)
7687 return;
7688
7689 /* This is the size of op from size of operand. */
7690 switch (GET_MODE_SIZE (GET_MODE (x)))
7691 {
7692 case 2:
7693 #ifdef HAVE_GAS_FILDS_FISTS
7694 putc ('s', file);
7695 #endif
7696 return;
7697
7698 case 4:
7699 if (GET_MODE (x) == SFmode)
7700 {
7701 putc ('s', file);
7702 return;
7703 }
7704 else
7705 putc ('l', file);
7706 return;
7707
7708 case 12:
7709 case 16:
7710 putc ('t', file);
7711 return;
7712
7713 case 8:
7714 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7715 {
7716 #ifdef GAS_MNEMONICS
7717 putc ('q', file);
7718 #else
7719 putc ('l', file);
7720 putc ('l', file);
7721 #endif
7722 }
7723 else
7724 putc ('l', file);
7725 return;
7726
7727 default:
7728 gcc_unreachable ();
7729 }
7730
7731 case 'b':
7732 case 'w':
7733 case 'k':
7734 case 'q':
7735 case 'h':
7736 case 'y':
7737 case 'X':
7738 case 'P':
7739 break;
7740
7741 case 's':
7742 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7743 {
7744 PRINT_OPERAND (file, x, 0);
7745 putc (',', file);
7746 }
7747 return;
7748
7749 case 'D':
7750 /* Little bit of braindamage here. The SSE compare instructions
7751 does use completely different names for the comparisons that the
7752 fp conditional moves. */
7753 switch (GET_CODE (x))
7754 {
7755 case EQ:
7756 case UNEQ:
7757 fputs ("eq", file);
7758 break;
7759 case LT:
7760 case UNLT:
7761 fputs ("lt", file);
7762 break;
7763 case LE:
7764 case UNLE:
7765 fputs ("le", file);
7766 break;
7767 case UNORDERED:
7768 fputs ("unord", file);
7769 break;
7770 case NE:
7771 case LTGT:
7772 fputs ("neq", file);
7773 break;
7774 case UNGE:
7775 case GE:
7776 fputs ("nlt", file);
7777 break;
7778 case UNGT:
7779 case GT:
7780 fputs ("nle", file);
7781 break;
7782 case ORDERED:
7783 fputs ("ord", file);
7784 break;
7785 default:
7786 gcc_unreachable ();
7787 }
7788 return;
7789 case 'O':
7790 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7791 if (ASSEMBLER_DIALECT == ASM_ATT)
7792 {
7793 switch (GET_MODE (x))
7794 {
7795 case HImode: putc ('w', file); break;
7796 case SImode:
7797 case SFmode: putc ('l', file); break;
7798 case DImode:
7799 case DFmode: putc ('q', file); break;
7800 default: gcc_unreachable ();
7801 }
7802 putc ('.', file);
7803 }
7804 #endif
7805 return;
7806 case 'C':
7807 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7808 return;
7809 case 'F':
7810 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7811 if (ASSEMBLER_DIALECT == ASM_ATT)
7812 putc ('.', file);
7813 #endif
7814 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7815 return;
7816
7817 /* Like above, but reverse condition */
7818 case 'c':
7819 /* Check to see if argument to %c is really a constant
7820 and not a condition code which needs to be reversed. */
7821 if (!COMPARISON_P (x))
7822 {
7823 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7824 return;
7825 }
7826 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7827 return;
7828 case 'f':
7829 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7830 if (ASSEMBLER_DIALECT == ASM_ATT)
7831 putc ('.', file);
7832 #endif
7833 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7834 return;
7835
7836 case 'H':
7837 /* It doesn't actually matter what mode we use here, as we're
7838 only going to use this for printing. */
7839 x = adjust_address_nv (x, DImode, 8);
7840 break;
7841
7842 case '+':
7843 {
7844 rtx x;
7845
7846 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7847 return;
7848
7849 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7850 if (x)
7851 {
7852 int pred_val = INTVAL (XEXP (x, 0));
7853
7854 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7855 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7856 {
7857 int taken = pred_val > REG_BR_PROB_BASE / 2;
7858 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7859
7860 /* Emit hints only in the case default branch prediction
7861 heuristics would fail. */
7862 if (taken != cputaken)
7863 {
7864 /* We use 3e (DS) prefix for taken branches and
7865 2e (CS) prefix for not taken branches. */
7866 if (taken)
7867 fputs ("ds ; ", file);
7868 else
7869 fputs ("cs ; ", file);
7870 }
7871 }
7872 }
7873 return;
7874 }
7875 default:
7876 output_operand_lossage ("invalid operand code '%c'", code);
7877 }
7878 }
7879
7880 if (GET_CODE (x) == REG)
7881 print_reg (x, code, file);
7882
7883 else if (GET_CODE (x) == MEM)
7884 {
7885 /* No `byte ptr' prefix for call instructions. */
7886 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7887 {
7888 const char * size;
7889 switch (GET_MODE_SIZE (GET_MODE (x)))
7890 {
7891 case 1: size = "BYTE"; break;
7892 case 2: size = "WORD"; break;
7893 case 4: size = "DWORD"; break;
7894 case 8: size = "QWORD"; break;
7895 case 12: size = "XWORD"; break;
7896 case 16: size = "XMMWORD"; break;
7897 default:
7898 gcc_unreachable ();
7899 }
7900
7901 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7902 if (code == 'b')
7903 size = "BYTE";
7904 else if (code == 'w')
7905 size = "WORD";
7906 else if (code == 'k')
7907 size = "DWORD";
7908
7909 fputs (size, file);
7910 fputs (" PTR ", file);
7911 }
7912
7913 x = XEXP (x, 0);
7914 /* Avoid (%rip) for call operands. */
7915 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7916 && GET_CODE (x) != CONST_INT)
7917 output_addr_const (file, x);
7918 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7919 output_operand_lossage ("invalid constraints for operand");
7920 else
7921 output_address (x);
7922 }
7923
7924 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7925 {
7926 REAL_VALUE_TYPE r;
7927 long l;
7928
7929 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7930 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7931
7932 if (ASSEMBLER_DIALECT == ASM_ATT)
7933 putc ('$', file);
7934 fprintf (file, "0x%08lx", l);
7935 }
7936
7937 /* These float cases don't actually occur as immediate operands. */
7938 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7939 {
7940 char dstr[30];
7941
7942 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7943 fprintf (file, "%s", dstr);
7944 }
7945
7946 else if (GET_CODE (x) == CONST_DOUBLE
7947 && GET_MODE (x) == XFmode)
7948 {
7949 char dstr[30];
7950
7951 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7952 fprintf (file, "%s", dstr);
7953 }
7954
7955 else
7956 {
7957 /* We have patterns that allow zero sets of memory, for instance.
7958 In 64-bit mode, we should probably support all 8-byte vectors,
7959 since we can in fact encode that into an immediate. */
7960 if (GET_CODE (x) == CONST_VECTOR)
7961 {
7962 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7963 x = const0_rtx;
7964 }
7965
7966 if (code != 'P')
7967 {
7968 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7969 {
7970 if (ASSEMBLER_DIALECT == ASM_ATT)
7971 putc ('$', file);
7972 }
7973 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7974 || GET_CODE (x) == LABEL_REF)
7975 {
7976 if (ASSEMBLER_DIALECT == ASM_ATT)
7977 putc ('$', file);
7978 else
7979 fputs ("OFFSET FLAT:", file);
7980 }
7981 }
7982 if (GET_CODE (x) == CONST_INT)
7983 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7984 else if (flag_pic)
7985 output_pic_addr_const (file, x, code);
7986 else
7987 output_addr_const (file, x);
7988 }
7989 }
7990 \f
7991 /* Print a memory operand whose address is ADDR. */
7992
7993 void
7994 print_operand_address (FILE *file, rtx addr)
7995 {
7996 struct ix86_address parts;
7997 rtx base, index, disp;
7998 int scale;
7999 int ok = ix86_decompose_address (addr, &parts);
8000
8001 gcc_assert (ok);
8002
8003 base = parts.base;
8004 index = parts.index;
8005 disp = parts.disp;
8006 scale = parts.scale;
8007
8008 switch (parts.seg)
8009 {
8010 case SEG_DEFAULT:
8011 break;
8012 case SEG_FS:
8013 case SEG_GS:
8014 if (USER_LABEL_PREFIX[0] == 0)
8015 putc ('%', file);
8016 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8017 break;
8018 default:
8019 gcc_unreachable ();
8020 }
8021
8022 if (!base && !index)
8023 {
8024 /* Displacement only requires special attention. */
8025
8026 if (GET_CODE (disp) == CONST_INT)
8027 {
8028 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8029 {
8030 if (USER_LABEL_PREFIX[0] == 0)
8031 putc ('%', file);
8032 fputs ("ds:", file);
8033 }
8034 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8035 }
8036 else if (flag_pic)
8037 output_pic_addr_const (file, disp, 0);
8038 else
8039 output_addr_const (file, disp);
8040
8041 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8042 if (TARGET_64BIT)
8043 {
8044 if (GET_CODE (disp) == CONST
8045 && GET_CODE (XEXP (disp, 0)) == PLUS
8046 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8047 disp = XEXP (XEXP (disp, 0), 0);
8048 if (GET_CODE (disp) == LABEL_REF
8049 || (GET_CODE (disp) == SYMBOL_REF
8050 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8051 fputs ("(%rip)", file);
8052 }
8053 }
8054 else
8055 {
8056 if (ASSEMBLER_DIALECT == ASM_ATT)
8057 {
8058 if (disp)
8059 {
8060 if (flag_pic)
8061 output_pic_addr_const (file, disp, 0);
8062 else if (GET_CODE (disp) == LABEL_REF)
8063 output_asm_label (disp);
8064 else
8065 output_addr_const (file, disp);
8066 }
8067
8068 putc ('(', file);
8069 if (base)
8070 print_reg (base, 0, file);
8071 if (index)
8072 {
8073 putc (',', file);
8074 print_reg (index, 0, file);
8075 if (scale != 1)
8076 fprintf (file, ",%d", scale);
8077 }
8078 putc (')', file);
8079 }
8080 else
8081 {
8082 rtx offset = NULL_RTX;
8083
8084 if (disp)
8085 {
8086 /* Pull out the offset of a symbol; print any symbol itself. */
8087 if (GET_CODE (disp) == CONST
8088 && GET_CODE (XEXP (disp, 0)) == PLUS
8089 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8090 {
8091 offset = XEXP (XEXP (disp, 0), 1);
8092 disp = gen_rtx_CONST (VOIDmode,
8093 XEXP (XEXP (disp, 0), 0));
8094 }
8095
8096 if (flag_pic)
8097 output_pic_addr_const (file, disp, 0);
8098 else if (GET_CODE (disp) == LABEL_REF)
8099 output_asm_label (disp);
8100 else if (GET_CODE (disp) == CONST_INT)
8101 offset = disp;
8102 else
8103 output_addr_const (file, disp);
8104 }
8105
8106 putc ('[', file);
8107 if (base)
8108 {
8109 print_reg (base, 0, file);
8110 if (offset)
8111 {
8112 if (INTVAL (offset) >= 0)
8113 putc ('+', file);
8114 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8115 }
8116 }
8117 else if (offset)
8118 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8119 else
8120 putc ('0', file);
8121
8122 if (index)
8123 {
8124 putc ('+', file);
8125 print_reg (index, 0, file);
8126 if (scale != 1)
8127 fprintf (file, "*%d", scale);
8128 }
8129 putc (']', file);
8130 }
8131 }
8132 }
8133
8134 bool
8135 output_addr_const_extra (FILE *file, rtx x)
8136 {
8137 rtx op;
8138
8139 if (GET_CODE (x) != UNSPEC)
8140 return false;
8141
8142 op = XVECEXP (x, 0, 0);
8143 switch (XINT (x, 1))
8144 {
8145 case UNSPEC_GOTTPOFF:
8146 output_addr_const (file, op);
8147 /* FIXME: This might be @TPOFF in Sun ld. */
8148 fputs ("@GOTTPOFF", file);
8149 break;
8150 case UNSPEC_TPOFF:
8151 output_addr_const (file, op);
8152 fputs ("@TPOFF", file);
8153 break;
8154 case UNSPEC_NTPOFF:
8155 output_addr_const (file, op);
8156 if (TARGET_64BIT)
8157 fputs ("@TPOFF", file);
8158 else
8159 fputs ("@NTPOFF", file);
8160 break;
8161 case UNSPEC_DTPOFF:
8162 output_addr_const (file, op);
8163 fputs ("@DTPOFF", file);
8164 break;
8165 case UNSPEC_GOTNTPOFF:
8166 output_addr_const (file, op);
8167 if (TARGET_64BIT)
8168 fputs ("@GOTTPOFF(%rip)", file);
8169 else
8170 fputs ("@GOTNTPOFF", file);
8171 break;
8172 case UNSPEC_INDNTPOFF:
8173 output_addr_const (file, op);
8174 fputs ("@INDNTPOFF", file);
8175 break;
8176
8177 default:
8178 return false;
8179 }
8180
8181 return true;
8182 }
8183 \f
8184 /* Split one or more DImode RTL references into pairs of SImode
8185 references. The RTL can be REG, offsettable MEM, integer constant, or
8186 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8187 split and "num" is its length. lo_half and hi_half are output arrays
8188 that parallel "operands". */
8189
8190 void
8191 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8192 {
8193 while (num--)
8194 {
8195 rtx op = operands[num];
8196
8197 /* simplify_subreg refuse to split volatile memory addresses,
8198 but we still have to handle it. */
8199 if (GET_CODE (op) == MEM)
8200 {
8201 lo_half[num] = adjust_address (op, SImode, 0);
8202 hi_half[num] = adjust_address (op, SImode, 4);
8203 }
8204 else
8205 {
8206 lo_half[num] = simplify_gen_subreg (SImode, op,
8207 GET_MODE (op) == VOIDmode
8208 ? DImode : GET_MODE (op), 0);
8209 hi_half[num] = simplify_gen_subreg (SImode, op,
8210 GET_MODE (op) == VOIDmode
8211 ? DImode : GET_MODE (op), 4);
8212 }
8213 }
8214 }
8215 /* Split one or more TImode RTL references into pairs of DImode
8216 references. The RTL can be REG, offsettable MEM, integer constant, or
8217 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8218 split and "num" is its length. lo_half and hi_half are output arrays
8219 that parallel "operands". */
8220
8221 void
8222 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8223 {
8224 while (num--)
8225 {
8226 rtx op = operands[num];
8227
8228 /* simplify_subreg refuse to split volatile memory addresses, but we
8229 still have to handle it. */
8230 if (GET_CODE (op) == MEM)
8231 {
8232 lo_half[num] = adjust_address (op, DImode, 0);
8233 hi_half[num] = adjust_address (op, DImode, 8);
8234 }
8235 else
8236 {
8237 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8238 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8239 }
8240 }
8241 }
8242 \f
8243 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8244 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8245 is the expression of the binary operation. The output may either be
8246 emitted here, or returned to the caller, like all output_* functions.
8247
8248 There is no guarantee that the operands are the same mode, as they
8249 might be within FLOAT or FLOAT_EXTEND expressions. */
8250
8251 #ifndef SYSV386_COMPAT
8252 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8253 wants to fix the assemblers because that causes incompatibility
8254 with gcc. No-one wants to fix gcc because that causes
8255 incompatibility with assemblers... You can use the option of
8256 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8257 #define SYSV386_COMPAT 1
8258 #endif
8259
8260 const char *
8261 output_387_binary_op (rtx insn, rtx *operands)
8262 {
8263 static char buf[30];
8264 const char *p;
8265 const char *ssep;
8266 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8267
8268 #ifdef ENABLE_CHECKING
8269 /* Even if we do not want to check the inputs, this documents input
8270 constraints. Which helps in understanding the following code. */
8271 if (STACK_REG_P (operands[0])
8272 && ((REG_P (operands[1])
8273 && REGNO (operands[0]) == REGNO (operands[1])
8274 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8275 || (REG_P (operands[2])
8276 && REGNO (operands[0]) == REGNO (operands[2])
8277 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8278 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8279 ; /* ok */
8280 else
8281 gcc_assert (is_sse);
8282 #endif
8283
8284 switch (GET_CODE (operands[3]))
8285 {
8286 case PLUS:
8287 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8288 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8289 p = "fiadd";
8290 else
8291 p = "fadd";
8292 ssep = "add";
8293 break;
8294
8295 case MINUS:
8296 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8297 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8298 p = "fisub";
8299 else
8300 p = "fsub";
8301 ssep = "sub";
8302 break;
8303
8304 case MULT:
8305 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8306 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8307 p = "fimul";
8308 else
8309 p = "fmul";
8310 ssep = "mul";
8311 break;
8312
8313 case DIV:
8314 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8315 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8316 p = "fidiv";
8317 else
8318 p = "fdiv";
8319 ssep = "div";
8320 break;
8321
8322 default:
8323 gcc_unreachable ();
8324 }
8325
8326 if (is_sse)
8327 {
8328 strcpy (buf, ssep);
8329 if (GET_MODE (operands[0]) == SFmode)
8330 strcat (buf, "ss\t{%2, %0|%0, %2}");
8331 else
8332 strcat (buf, "sd\t{%2, %0|%0, %2}");
8333 return buf;
8334 }
8335 strcpy (buf, p);
8336
8337 switch (GET_CODE (operands[3]))
8338 {
8339 case MULT:
8340 case PLUS:
8341 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8342 {
8343 rtx temp = operands[2];
8344 operands[2] = operands[1];
8345 operands[1] = temp;
8346 }
8347
8348 /* know operands[0] == operands[1]. */
8349
8350 if (GET_CODE (operands[2]) == MEM)
8351 {
8352 p = "%z2\t%2";
8353 break;
8354 }
8355
8356 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8357 {
8358 if (STACK_TOP_P (operands[0]))
8359 /* How is it that we are storing to a dead operand[2]?
8360 Well, presumably operands[1] is dead too. We can't
8361 store the result to st(0) as st(0) gets popped on this
8362 instruction. Instead store to operands[2] (which I
8363 think has to be st(1)). st(1) will be popped later.
8364 gcc <= 2.8.1 didn't have this check and generated
8365 assembly code that the Unixware assembler rejected. */
8366 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8367 else
8368 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8369 break;
8370 }
8371
8372 if (STACK_TOP_P (operands[0]))
8373 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8374 else
8375 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8376 break;
8377
8378 case MINUS:
8379 case DIV:
8380 if (GET_CODE (operands[1]) == MEM)
8381 {
8382 p = "r%z1\t%1";
8383 break;
8384 }
8385
8386 if (GET_CODE (operands[2]) == MEM)
8387 {
8388 p = "%z2\t%2";
8389 break;
8390 }
8391
8392 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8393 {
8394 #if SYSV386_COMPAT
8395 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8396 derived assemblers, confusingly reverse the direction of
8397 the operation for fsub{r} and fdiv{r} when the
8398 destination register is not st(0). The Intel assembler
8399 doesn't have this brain damage. Read !SYSV386_COMPAT to
8400 figure out what the hardware really does. */
8401 if (STACK_TOP_P (operands[0]))
8402 p = "{p\t%0, %2|rp\t%2, %0}";
8403 else
8404 p = "{rp\t%2, %0|p\t%0, %2}";
8405 #else
8406 if (STACK_TOP_P (operands[0]))
8407 /* As above for fmul/fadd, we can't store to st(0). */
8408 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8409 else
8410 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8411 #endif
8412 break;
8413 }
8414
8415 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8416 {
8417 #if SYSV386_COMPAT
8418 if (STACK_TOP_P (operands[0]))
8419 p = "{rp\t%0, %1|p\t%1, %0}";
8420 else
8421 p = "{p\t%1, %0|rp\t%0, %1}";
8422 #else
8423 if (STACK_TOP_P (operands[0]))
8424 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8425 else
8426 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8427 #endif
8428 break;
8429 }
8430
8431 if (STACK_TOP_P (operands[0]))
8432 {
8433 if (STACK_TOP_P (operands[1]))
8434 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8435 else
8436 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8437 break;
8438 }
8439 else if (STACK_TOP_P (operands[1]))
8440 {
8441 #if SYSV386_COMPAT
8442 p = "{\t%1, %0|r\t%0, %1}";
8443 #else
8444 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8445 #endif
8446 }
8447 else
8448 {
8449 #if SYSV386_COMPAT
8450 p = "{r\t%2, %0|\t%0, %2}";
8451 #else
8452 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8453 #endif
8454 }
8455 break;
8456
8457 default:
8458 gcc_unreachable ();
8459 }
8460
8461 strcat (buf, p);
8462 return buf;
8463 }
8464
8465 /* Return needed mode for entity in optimize_mode_switching pass. */
8466
8467 int
8468 ix86_mode_needed (int entity, rtx insn)
8469 {
8470 enum attr_i387_cw mode;
8471
8472 /* The mode UNINITIALIZED is used to store control word after a
8473 function call or ASM pattern. The mode ANY specify that function
8474 has no requirements on the control word and make no changes in the
8475 bits we are interested in. */
8476
8477 if (CALL_P (insn)
8478 || (NONJUMP_INSN_P (insn)
8479 && (asm_noperands (PATTERN (insn)) >= 0
8480 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8481 return I387_CW_UNINITIALIZED;
8482
8483 if (recog_memoized (insn) < 0)
8484 return I387_CW_ANY;
8485
8486 mode = get_attr_i387_cw (insn);
8487
8488 switch (entity)
8489 {
8490 case I387_TRUNC:
8491 if (mode == I387_CW_TRUNC)
8492 return mode;
8493 break;
8494
8495 case I387_FLOOR:
8496 if (mode == I387_CW_FLOOR)
8497 return mode;
8498 break;
8499
8500 case I387_CEIL:
8501 if (mode == I387_CW_CEIL)
8502 return mode;
8503 break;
8504
8505 case I387_MASK_PM:
8506 if (mode == I387_CW_MASK_PM)
8507 return mode;
8508 break;
8509
8510 default:
8511 gcc_unreachable ();
8512 }
8513
8514 return I387_CW_ANY;
8515 }
8516
8517 /* Output code to initialize control word copies used by trunc?f?i and
8518 rounding patterns. CURRENT_MODE is set to current control word,
8519 while NEW_MODE is set to new control word. */
8520
8521 void
8522 emit_i387_cw_initialization (int mode)
8523 {
8524 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8525 rtx new_mode;
8526
8527 int slot;
8528
8529 rtx reg = gen_reg_rtx (HImode);
8530
8531 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8532 emit_move_insn (reg, stored_mode);
8533
8534 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8535 {
8536 switch (mode)
8537 {
8538 case I387_CW_TRUNC:
8539 /* round toward zero (truncate) */
8540 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8541 slot = SLOT_CW_TRUNC;
8542 break;
8543
8544 case I387_CW_FLOOR:
8545 /* round down toward -oo */
8546 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8547 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8548 slot = SLOT_CW_FLOOR;
8549 break;
8550
8551 case I387_CW_CEIL:
8552 /* round up toward +oo */
8553 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8554 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8555 slot = SLOT_CW_CEIL;
8556 break;
8557
8558 case I387_CW_MASK_PM:
8559 /* mask precision exception for nearbyint() */
8560 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8561 slot = SLOT_CW_MASK_PM;
8562 break;
8563
8564 default:
8565 gcc_unreachable ();
8566 }
8567 }
8568 else
8569 {
8570 switch (mode)
8571 {
8572 case I387_CW_TRUNC:
8573 /* round toward zero (truncate) */
8574 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8575 slot = SLOT_CW_TRUNC;
8576 break;
8577
8578 case I387_CW_FLOOR:
8579 /* round down toward -oo */
8580 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8581 slot = SLOT_CW_FLOOR;
8582 break;
8583
8584 case I387_CW_CEIL:
8585 /* round up toward +oo */
8586 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8587 slot = SLOT_CW_CEIL;
8588 break;
8589
8590 case I387_CW_MASK_PM:
8591 /* mask precision exception for nearbyint() */
8592 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8593 slot = SLOT_CW_MASK_PM;
8594 break;
8595
8596 default:
8597 gcc_unreachable ();
8598 }
8599 }
8600
8601 gcc_assert (slot < MAX_386_STACK_LOCALS);
8602
8603 new_mode = assign_386_stack_local (HImode, slot);
8604 emit_move_insn (new_mode, reg);
8605 }
8606
8607 /* Output code for INSN to convert a float to a signed int. OPERANDS
8608 are the insn operands. The output may be [HSD]Imode and the input
8609 operand may be [SDX]Fmode. */
8610
8611 const char *
8612 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8613 {
8614 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8615 int dimode_p = GET_MODE (operands[0]) == DImode;
8616 int round_mode = get_attr_i387_cw (insn);
8617
8618 /* Jump through a hoop or two for DImode, since the hardware has no
8619 non-popping instruction. We used to do this a different way, but
8620 that was somewhat fragile and broke with post-reload splitters. */
8621 if ((dimode_p || fisttp) && !stack_top_dies)
8622 output_asm_insn ("fld\t%y1", operands);
8623
8624 gcc_assert (STACK_TOP_P (operands[1]));
8625 gcc_assert (GET_CODE (operands[0]) == MEM);
8626
8627 if (fisttp)
8628 output_asm_insn ("fisttp%z0\t%0", operands);
8629 else
8630 {
8631 if (round_mode != I387_CW_ANY)
8632 output_asm_insn ("fldcw\t%3", operands);
8633 if (stack_top_dies || dimode_p)
8634 output_asm_insn ("fistp%z0\t%0", operands);
8635 else
8636 output_asm_insn ("fist%z0\t%0", operands);
8637 if (round_mode != I387_CW_ANY)
8638 output_asm_insn ("fldcw\t%2", operands);
8639 }
8640
8641 return "";
8642 }
8643
8644 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8645 have the values zero or one, indicates the ffreep insn's operand
8646 from the OPERANDS array. */
8647
8648 static const char *
8649 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8650 {
8651 if (TARGET_USE_FFREEP)
8652 #if HAVE_AS_IX86_FFREEP
8653 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8654 #else
8655 switch (REGNO (operands[opno]))
8656 {
8657 case FIRST_STACK_REG + 0: return ".word\t0xc0df";
8658 case FIRST_STACK_REG + 1: return ".word\t0xc1df";
8659 case FIRST_STACK_REG + 2: return ".word\t0xc2df";
8660 case FIRST_STACK_REG + 3: return ".word\t0xc3df";
8661 case FIRST_STACK_REG + 4: return ".word\t0xc4df";
8662 case FIRST_STACK_REG + 5: return ".word\t0xc5df";
8663 case FIRST_STACK_REG + 6: return ".word\t0xc6df";
8664 case FIRST_STACK_REG + 7: return ".word\t0xc7df";
8665 }
8666 #endif
8667
8668 return opno ? "fstp\t%y1" : "fstp\t%y0";
8669 }
8670
8671
8672 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8673 should be used. UNORDERED_P is true when fucom should be used. */
8674
8675 const char *
8676 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8677 {
8678 int stack_top_dies;
8679 rtx cmp_op0, cmp_op1;
8680 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8681
8682 if (eflags_p)
8683 {
8684 cmp_op0 = operands[0];
8685 cmp_op1 = operands[1];
8686 }
8687 else
8688 {
8689 cmp_op0 = operands[1];
8690 cmp_op1 = operands[2];
8691 }
8692
8693 if (is_sse)
8694 {
8695 if (GET_MODE (operands[0]) == SFmode)
8696 if (unordered_p)
8697 return "ucomiss\t{%1, %0|%0, %1}";
8698 else
8699 return "comiss\t{%1, %0|%0, %1}";
8700 else
8701 if (unordered_p)
8702 return "ucomisd\t{%1, %0|%0, %1}";
8703 else
8704 return "comisd\t{%1, %0|%0, %1}";
8705 }
8706
8707 gcc_assert (STACK_TOP_P (cmp_op0));
8708
8709 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8710
8711 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8712 {
8713 if (stack_top_dies)
8714 {
8715 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8716 return output_387_ffreep (operands, 1);
8717 }
8718 else
8719 return "ftst\n\tfnstsw\t%0";
8720 }
8721
8722 if (STACK_REG_P (cmp_op1)
8723 && stack_top_dies
8724 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8725 && REGNO (cmp_op1) != FIRST_STACK_REG)
8726 {
8727 /* If both the top of the 387 stack dies, and the other operand
8728 is also a stack register that dies, then this must be a
8729 `fcompp' float compare */
8730
8731 if (eflags_p)
8732 {
8733 /* There is no double popping fcomi variant. Fortunately,
8734 eflags is immune from the fstp's cc clobbering. */
8735 if (unordered_p)
8736 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8737 else
8738 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8739 return output_387_ffreep (operands, 0);
8740 }
8741 else
8742 {
8743 if (unordered_p)
8744 return "fucompp\n\tfnstsw\t%0";
8745 else
8746 return "fcompp\n\tfnstsw\t%0";
8747 }
8748 }
8749 else
8750 {
8751 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8752
8753 static const char * const alt[16] =
8754 {
8755 "fcom%z2\t%y2\n\tfnstsw\t%0",
8756 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8757 "fucom%z2\t%y2\n\tfnstsw\t%0",
8758 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8759
8760 "ficom%z2\t%y2\n\tfnstsw\t%0",
8761 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8762 NULL,
8763 NULL,
8764
8765 "fcomi\t{%y1, %0|%0, %y1}",
8766 "fcomip\t{%y1, %0|%0, %y1}",
8767 "fucomi\t{%y1, %0|%0, %y1}",
8768 "fucomip\t{%y1, %0|%0, %y1}",
8769
8770 NULL,
8771 NULL,
8772 NULL,
8773 NULL
8774 };
8775
8776 int mask;
8777 const char *ret;
8778
8779 mask = eflags_p << 3;
8780 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8781 mask |= unordered_p << 1;
8782 mask |= stack_top_dies;
8783
8784 gcc_assert (mask < 16);
8785 ret = alt[mask];
8786 gcc_assert (ret);
8787
8788 return ret;
8789 }
8790 }
8791
8792 void
8793 ix86_output_addr_vec_elt (FILE *file, int value)
8794 {
8795 const char *directive = ASM_LONG;
8796
8797 #ifdef ASM_QUAD
8798 if (TARGET_64BIT)
8799 directive = ASM_QUAD;
8800 #else
8801 gcc_assert (!TARGET_64BIT);
8802 #endif
8803
8804 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8805 }
8806
8807 void
8808 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8809 {
8810 if (TARGET_64BIT)
8811 fprintf (file, "%s%s%d-%s%d\n",
8812 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8813 else if (HAVE_AS_GOTOFF_IN_DATA)
8814 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8815 #if TARGET_MACHO
8816 else if (TARGET_MACHO)
8817 {
8818 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8819 machopic_output_function_base_name (file);
8820 fprintf(file, "\n");
8821 }
8822 #endif
8823 else
8824 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8825 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8826 }
8827 \f
8828 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8829 for the target. */
8830
8831 void
8832 ix86_expand_clear (rtx dest)
8833 {
8834 rtx tmp;
8835
8836 /* We play register width games, which are only valid after reload. */
8837 gcc_assert (reload_completed);
8838
8839 /* Avoid HImode and its attendant prefix byte. */
8840 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8841 dest = gen_rtx_REG (SImode, REGNO (dest));
8842
8843 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8844
8845 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8846 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8847 {
8848 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8849 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8850 }
8851
8852 emit_insn (tmp);
8853 }
8854
8855 /* X is an unchanging MEM. If it is a constant pool reference, return
8856 the constant pool rtx, else NULL. */
8857
8858 rtx
8859 maybe_get_pool_constant (rtx x)
8860 {
8861 x = ix86_delegitimize_address (XEXP (x, 0));
8862
8863 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8864 return get_pool_constant (x);
8865
8866 return NULL_RTX;
8867 }
8868
8869 void
8870 ix86_expand_move (enum machine_mode mode, rtx operands[])
8871 {
8872 int strict = (reload_in_progress || reload_completed);
8873 rtx op0, op1;
8874 enum tls_model model;
8875
8876 op0 = operands[0];
8877 op1 = operands[1];
8878
8879 if (GET_CODE (op1) == SYMBOL_REF)
8880 {
8881 model = SYMBOL_REF_TLS_MODEL (op1);
8882 if (model)
8883 {
8884 op1 = legitimize_tls_address (op1, model, true);
8885 op1 = force_operand (op1, op0);
8886 if (op1 == op0)
8887 return;
8888 }
8889 }
8890 else if (GET_CODE (op1) == CONST
8891 && GET_CODE (XEXP (op1, 0)) == PLUS
8892 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8893 {
8894 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8895 if (model)
8896 {
8897 rtx addend = XEXP (XEXP (op1, 0), 1);
8898 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8899 op1 = force_operand (op1, NULL);
8900 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8901 op0, 1, OPTAB_DIRECT);
8902 if (op1 == op0)
8903 return;
8904 }
8905 }
8906
8907 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8908 {
8909 #if TARGET_MACHO
8910 if (MACHOPIC_PURE)
8911 {
8912 rtx temp = ((reload_in_progress
8913 || ((op0 && GET_CODE (op0) == REG)
8914 && mode == Pmode))
8915 ? op0 : gen_reg_rtx (Pmode));
8916 op1 = machopic_indirect_data_reference (op1, temp);
8917 op1 = machopic_legitimize_pic_address (op1, mode,
8918 temp == op1 ? 0 : temp);
8919 }
8920 else if (MACHOPIC_INDIRECT)
8921 op1 = machopic_indirect_data_reference (op1, 0);
8922 if (op0 == op1)
8923 return;
8924 #else
8925 if (GET_CODE (op0) == MEM)
8926 op1 = force_reg (Pmode, op1);
8927 else
8928 op1 = legitimize_address (op1, op1, Pmode);
8929 #endif /* TARGET_MACHO */
8930 }
8931 else
8932 {
8933 if (GET_CODE (op0) == MEM
8934 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8935 || !push_operand (op0, mode))
8936 && GET_CODE (op1) == MEM)
8937 op1 = force_reg (mode, op1);
8938
8939 if (push_operand (op0, mode)
8940 && ! general_no_elim_operand (op1, mode))
8941 op1 = copy_to_mode_reg (mode, op1);
8942
8943 /* Force large constants in 64bit compilation into register
8944 to get them CSEed. */
8945 if (TARGET_64BIT && mode == DImode
8946 && immediate_operand (op1, mode)
8947 && !x86_64_zext_immediate_operand (op1, VOIDmode)
8948 && !register_operand (op0, mode)
8949 && optimize && !reload_completed && !reload_in_progress)
8950 op1 = copy_to_mode_reg (mode, op1);
8951
8952 if (FLOAT_MODE_P (mode))
8953 {
8954 /* If we are loading a floating point constant to a register,
8955 force the value to memory now, since we'll get better code
8956 out the back end. */
8957
8958 if (strict)
8959 ;
8960 else if (GET_CODE (op1) == CONST_DOUBLE)
8961 {
8962 op1 = validize_mem (force_const_mem (mode, op1));
8963 if (!register_operand (op0, mode))
8964 {
8965 rtx temp = gen_reg_rtx (mode);
8966 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8967 emit_move_insn (op0, temp);
8968 return;
8969 }
8970 }
8971 }
8972 }
8973
8974 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8975 }
8976
8977 void
8978 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8979 {
8980 rtx op0 = operands[0], op1 = operands[1];
8981
8982 /* Force constants other than zero into memory. We do not know how
8983 the instructions used to build constants modify the upper 64 bits
8984 of the register, once we have that information we may be able
8985 to handle some of them more efficiently. */
8986 if ((reload_in_progress | reload_completed) == 0
8987 && register_operand (op0, mode)
8988 && CONSTANT_P (op1)
8989 && standard_sse_constant_p (op1) <= 0)
8990 op1 = validize_mem (force_const_mem (mode, op1));
8991
8992 /* Make operand1 a register if it isn't already. */
8993 if (!no_new_pseudos
8994 && !register_operand (op0, mode)
8995 && !register_operand (op1, mode))
8996 {
8997 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
8998 return;
8999 }
9000
9001 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9002 }
9003
9004 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9005 straight to ix86_expand_vector_move. */
9006
9007 void
9008 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9009 {
9010 rtx op0, op1, m;
9011
9012 op0 = operands[0];
9013 op1 = operands[1];
9014
9015 if (MEM_P (op1))
9016 {
9017 /* If we're optimizing for size, movups is the smallest. */
9018 if (optimize_size)
9019 {
9020 op0 = gen_lowpart (V4SFmode, op0);
9021 op1 = gen_lowpart (V4SFmode, op1);
9022 emit_insn (gen_sse_movups (op0, op1));
9023 return;
9024 }
9025
9026 /* ??? If we have typed data, then it would appear that using
9027 movdqu is the only way to get unaligned data loaded with
9028 integer type. */
9029 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9030 {
9031 op0 = gen_lowpart (V16QImode, op0);
9032 op1 = gen_lowpart (V16QImode, op1);
9033 emit_insn (gen_sse2_movdqu (op0, op1));
9034 return;
9035 }
9036
9037 if (TARGET_SSE2 && mode == V2DFmode)
9038 {
9039 rtx zero;
9040
9041 /* When SSE registers are split into halves, we can avoid
9042 writing to the top half twice. */
9043 if (TARGET_SSE_SPLIT_REGS)
9044 {
9045 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9046 zero = op0;
9047 }
9048 else
9049 {
9050 /* ??? Not sure about the best option for the Intel chips.
9051 The following would seem to satisfy; the register is
9052 entirely cleared, breaking the dependency chain. We
9053 then store to the upper half, with a dependency depth
9054 of one. A rumor has it that Intel recommends two movsd
9055 followed by an unpacklpd, but this is unconfirmed. And
9056 given that the dependency depth of the unpacklpd would
9057 still be one, I'm not sure why this would be better. */
9058 zero = CONST0_RTX (V2DFmode);
9059 }
9060
9061 m = adjust_address (op1, DFmode, 0);
9062 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9063 m = adjust_address (op1, DFmode, 8);
9064 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9065 }
9066 else
9067 {
9068 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9069 emit_move_insn (op0, CONST0_RTX (mode));
9070 else
9071 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9072
9073 if (mode != V4SFmode)
9074 op0 = gen_lowpart (V4SFmode, op0);
9075 m = adjust_address (op1, V2SFmode, 0);
9076 emit_insn (gen_sse_loadlps (op0, op0, m));
9077 m = adjust_address (op1, V2SFmode, 8);
9078 emit_insn (gen_sse_loadhps (op0, op0, m));
9079 }
9080 }
9081 else if (MEM_P (op0))
9082 {
9083 /* If we're optimizing for size, movups is the smallest. */
9084 if (optimize_size)
9085 {
9086 op0 = gen_lowpart (V4SFmode, op0);
9087 op1 = gen_lowpart (V4SFmode, op1);
9088 emit_insn (gen_sse_movups (op0, op1));
9089 return;
9090 }
9091
9092 /* ??? Similar to above, only less clear because of quote
9093 typeless stores unquote. */
9094 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9095 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9096 {
9097 op0 = gen_lowpart (V16QImode, op0);
9098 op1 = gen_lowpart (V16QImode, op1);
9099 emit_insn (gen_sse2_movdqu (op0, op1));
9100 return;
9101 }
9102
9103 if (TARGET_SSE2 && mode == V2DFmode)
9104 {
9105 m = adjust_address (op0, DFmode, 0);
9106 emit_insn (gen_sse2_storelpd (m, op1));
9107 m = adjust_address (op0, DFmode, 8);
9108 emit_insn (gen_sse2_storehpd (m, op1));
9109 }
9110 else
9111 {
9112 if (mode != V4SFmode)
9113 op1 = gen_lowpart (V4SFmode, op1);
9114 m = adjust_address (op0, V2SFmode, 0);
9115 emit_insn (gen_sse_storelps (m, op1));
9116 m = adjust_address (op0, V2SFmode, 8);
9117 emit_insn (gen_sse_storehps (m, op1));
9118 }
9119 }
9120 else
9121 gcc_unreachable ();
9122 }
9123
9124 /* Expand a push in MODE. This is some mode for which we do not support
9125 proper push instructions, at least from the registers that we expect
9126 the value to live in. */
9127
9128 void
9129 ix86_expand_push (enum machine_mode mode, rtx x)
9130 {
9131 rtx tmp;
9132
9133 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9134 GEN_INT (-GET_MODE_SIZE (mode)),
9135 stack_pointer_rtx, 1, OPTAB_DIRECT);
9136 if (tmp != stack_pointer_rtx)
9137 emit_move_insn (stack_pointer_rtx, tmp);
9138
9139 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9140 emit_move_insn (tmp, x);
9141 }
9142
9143 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9144 destination to use for the operation. If different from the true
9145 destination in operands[0], a copy operation will be required. */
9146
9147 rtx
9148 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9149 rtx operands[])
9150 {
9151 int matching_memory;
9152 rtx src1, src2, dst;
9153
9154 dst = operands[0];
9155 src1 = operands[1];
9156 src2 = operands[2];
9157
9158 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9159 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9160 && (rtx_equal_p (dst, src2)
9161 || immediate_operand (src1, mode)))
9162 {
9163 rtx temp = src1;
9164 src1 = src2;
9165 src2 = temp;
9166 }
9167
9168 /* If the destination is memory, and we do not have matching source
9169 operands, do things in registers. */
9170 matching_memory = 0;
9171 if (GET_CODE (dst) == MEM)
9172 {
9173 if (rtx_equal_p (dst, src1))
9174 matching_memory = 1;
9175 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9176 && rtx_equal_p (dst, src2))
9177 matching_memory = 2;
9178 else
9179 dst = gen_reg_rtx (mode);
9180 }
9181
9182 /* Both source operands cannot be in memory. */
9183 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9184 {
9185 if (matching_memory != 2)
9186 src2 = force_reg (mode, src2);
9187 else
9188 src1 = force_reg (mode, src1);
9189 }
9190
9191 /* If the operation is not commutable, source 1 cannot be a constant
9192 or non-matching memory. */
9193 if ((CONSTANT_P (src1)
9194 || (!matching_memory && GET_CODE (src1) == MEM))
9195 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9196 src1 = force_reg (mode, src1);
9197
9198 src1 = operands[1] = src1;
9199 src2 = operands[2] = src2;
9200 return dst;
9201 }
9202
9203 /* Similarly, but assume that the destination has already been
9204 set up properly. */
9205
9206 void
9207 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9208 enum machine_mode mode, rtx operands[])
9209 {
9210 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9211 gcc_assert (dst == operands[0]);
9212 }
9213
9214 /* Attempt to expand a binary operator. Make the expansion closer to the
9215 actual machine, then just general_operand, which will allow 3 separate
9216 memory references (one output, two input) in a single insn. */
9217
9218 void
9219 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9220 rtx operands[])
9221 {
9222 rtx src1, src2, dst, op, clob;
9223
9224 dst = ix86_fixup_binary_operands (code, mode, operands);
9225 src1 = operands[1];
9226 src2 = operands[2];
9227
9228 /* Emit the instruction. */
9229
9230 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9231 if (reload_in_progress)
9232 {
9233 /* Reload doesn't know about the flags register, and doesn't know that
9234 it doesn't want to clobber it. We can only do this with PLUS. */
9235 gcc_assert (code == PLUS);
9236 emit_insn (op);
9237 }
9238 else
9239 {
9240 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9241 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9242 }
9243
9244 /* Fix up the destination if needed. */
9245 if (dst != operands[0])
9246 emit_move_insn (operands[0], dst);
9247 }
9248
9249 /* Return TRUE or FALSE depending on whether the binary operator meets the
9250 appropriate constraints. */
9251
9252 int
9253 ix86_binary_operator_ok (enum rtx_code code,
9254 enum machine_mode mode ATTRIBUTE_UNUSED,
9255 rtx operands[3])
9256 {
9257 /* Both source operands cannot be in memory. */
9258 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9259 return 0;
9260 /* If the operation is not commutable, source 1 cannot be a constant. */
9261 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9262 return 0;
9263 /* If the destination is memory, we must have a matching source operand. */
9264 if (GET_CODE (operands[0]) == MEM
9265 && ! (rtx_equal_p (operands[0], operands[1])
9266 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9267 && rtx_equal_p (operands[0], operands[2]))))
9268 return 0;
9269 /* If the operation is not commutable and the source 1 is memory, we must
9270 have a matching destination. */
9271 if (GET_CODE (operands[1]) == MEM
9272 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9273 && ! rtx_equal_p (operands[0], operands[1]))
9274 return 0;
9275 return 1;
9276 }
9277
9278 /* Attempt to expand a unary operator. Make the expansion closer to the
9279 actual machine, then just general_operand, which will allow 2 separate
9280 memory references (one output, one input) in a single insn. */
9281
9282 void
9283 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9284 rtx operands[])
9285 {
9286 int matching_memory;
9287 rtx src, dst, op, clob;
9288
9289 dst = operands[0];
9290 src = operands[1];
9291
9292 /* If the destination is memory, and we do not have matching source
9293 operands, do things in registers. */
9294 matching_memory = 0;
9295 if (MEM_P (dst))
9296 {
9297 if (rtx_equal_p (dst, src))
9298 matching_memory = 1;
9299 else
9300 dst = gen_reg_rtx (mode);
9301 }
9302
9303 /* When source operand is memory, destination must match. */
9304 if (MEM_P (src) && !matching_memory)
9305 src = force_reg (mode, src);
9306
9307 /* Emit the instruction. */
9308
9309 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9310 if (reload_in_progress || code == NOT)
9311 {
9312 /* Reload doesn't know about the flags register, and doesn't know that
9313 it doesn't want to clobber it. */
9314 gcc_assert (code == NOT);
9315 emit_insn (op);
9316 }
9317 else
9318 {
9319 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9320 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9321 }
9322
9323 /* Fix up the destination if needed. */
9324 if (dst != operands[0])
9325 emit_move_insn (operands[0], dst);
9326 }
9327
9328 /* Return TRUE or FALSE depending on whether the unary operator meets the
9329 appropriate constraints. */
9330
9331 int
9332 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9333 enum machine_mode mode ATTRIBUTE_UNUSED,
9334 rtx operands[2] ATTRIBUTE_UNUSED)
9335 {
9336 /* If one of operands is memory, source and destination must match. */
9337 if ((GET_CODE (operands[0]) == MEM
9338 || GET_CODE (operands[1]) == MEM)
9339 && ! rtx_equal_p (operands[0], operands[1]))
9340 return FALSE;
9341 return TRUE;
9342 }
9343
9344 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9345 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9346 true, then replicate the mask for all elements of the vector register.
9347 If INVERT is true, then create a mask excluding the sign bit. */
9348
9349 rtx
9350 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9351 {
9352 enum machine_mode vec_mode;
9353 HOST_WIDE_INT hi, lo;
9354 int shift = 63;
9355 rtvec v;
9356 rtx mask;
9357
9358 /* Find the sign bit, sign extended to 2*HWI. */
9359 if (mode == SFmode)
9360 lo = 0x80000000, hi = lo < 0;
9361 else if (HOST_BITS_PER_WIDE_INT >= 64)
9362 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9363 else
9364 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9365
9366 if (invert)
9367 lo = ~lo, hi = ~hi;
9368
9369 /* Force this value into the low part of a fp vector constant. */
9370 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9371 mask = gen_lowpart (mode, mask);
9372
9373 if (mode == SFmode)
9374 {
9375 if (vect)
9376 v = gen_rtvec (4, mask, mask, mask, mask);
9377 else
9378 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9379 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9380 vec_mode = V4SFmode;
9381 }
9382 else
9383 {
9384 if (vect)
9385 v = gen_rtvec (2, mask, mask);
9386 else
9387 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9388 vec_mode = V2DFmode;
9389 }
9390
9391 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9392 }
9393
9394 /* Generate code for floating point ABS or NEG. */
9395
9396 void
9397 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9398 rtx operands[])
9399 {
9400 rtx mask, set, use, clob, dst, src;
9401 bool matching_memory;
9402 bool use_sse = false;
9403 bool vector_mode = VECTOR_MODE_P (mode);
9404 enum machine_mode elt_mode = mode;
9405
9406 if (vector_mode)
9407 {
9408 elt_mode = GET_MODE_INNER (mode);
9409 use_sse = true;
9410 }
9411 else if (TARGET_SSE_MATH)
9412 use_sse = SSE_FLOAT_MODE_P (mode);
9413
9414 /* NEG and ABS performed with SSE use bitwise mask operations.
9415 Create the appropriate mask now. */
9416 if (use_sse)
9417 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9418 else
9419 mask = NULL_RTX;
9420
9421 dst = operands[0];
9422 src = operands[1];
9423
9424 /* If the destination is memory, and we don't have matching source
9425 operands or we're using the x87, do things in registers. */
9426 matching_memory = false;
9427 if (MEM_P (dst))
9428 {
9429 if (use_sse && rtx_equal_p (dst, src))
9430 matching_memory = true;
9431 else
9432 dst = gen_reg_rtx (mode);
9433 }
9434 if (MEM_P (src) && !matching_memory)
9435 src = force_reg (mode, src);
9436
9437 if (vector_mode)
9438 {
9439 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9440 set = gen_rtx_SET (VOIDmode, dst, set);
9441 emit_insn (set);
9442 }
9443 else
9444 {
9445 set = gen_rtx_fmt_e (code, mode, src);
9446 set = gen_rtx_SET (VOIDmode, dst, set);
9447 if (mask)
9448 {
9449 use = gen_rtx_USE (VOIDmode, mask);
9450 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9451 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9452 gen_rtvec (3, set, use, clob)));
9453 }
9454 else
9455 emit_insn (set);
9456 }
9457
9458 if (dst != operands[0])
9459 emit_move_insn (operands[0], dst);
9460 }
9461
9462 /* Expand a copysign operation. Special case operand 0 being a constant. */
9463
9464 void
9465 ix86_expand_copysign (rtx operands[])
9466 {
9467 enum machine_mode mode, vmode;
9468 rtx dest, op0, op1, mask, nmask;
9469
9470 dest = operands[0];
9471 op0 = operands[1];
9472 op1 = operands[2];
9473
9474 mode = GET_MODE (dest);
9475 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9476
9477 if (GET_CODE (op0) == CONST_DOUBLE)
9478 {
9479 rtvec v;
9480
9481 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9482 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9483
9484 if (op0 == CONST0_RTX (mode))
9485 op0 = CONST0_RTX (vmode);
9486 else
9487 {
9488 if (mode == SFmode)
9489 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9490 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9491 else
9492 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9493 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9494 }
9495
9496 mask = ix86_build_signbit_mask (mode, 0, 0);
9497
9498 if (mode == SFmode)
9499 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9500 else
9501 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9502 }
9503 else
9504 {
9505 nmask = ix86_build_signbit_mask (mode, 0, 1);
9506 mask = ix86_build_signbit_mask (mode, 0, 0);
9507
9508 if (mode == SFmode)
9509 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9510 else
9511 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9512 }
9513 }
9514
9515 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9516 be a constant, and so has already been expanded into a vector constant. */
9517
9518 void
9519 ix86_split_copysign_const (rtx operands[])
9520 {
9521 enum machine_mode mode, vmode;
9522 rtx dest, op0, op1, mask, x;
9523
9524 dest = operands[0];
9525 op0 = operands[1];
9526 op1 = operands[2];
9527 mask = operands[3];
9528
9529 mode = GET_MODE (dest);
9530 vmode = GET_MODE (mask);
9531
9532 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9533 x = gen_rtx_AND (vmode, dest, mask);
9534 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9535
9536 if (op0 != CONST0_RTX (vmode))
9537 {
9538 x = gen_rtx_IOR (vmode, dest, op0);
9539 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9540 }
9541 }
9542
9543 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9544 so we have to do two masks. */
9545
9546 void
9547 ix86_split_copysign_var (rtx operands[])
9548 {
9549 enum machine_mode mode, vmode;
9550 rtx dest, scratch, op0, op1, mask, nmask, x;
9551
9552 dest = operands[0];
9553 scratch = operands[1];
9554 op0 = operands[2];
9555 op1 = operands[3];
9556 nmask = operands[4];
9557 mask = operands[5];
9558
9559 mode = GET_MODE (dest);
9560 vmode = GET_MODE (mask);
9561
9562 if (rtx_equal_p (op0, op1))
9563 {
9564 /* Shouldn't happen often (it's useless, obviously), but when it does
9565 we'd generate incorrect code if we continue below. */
9566 emit_move_insn (dest, op0);
9567 return;
9568 }
9569
9570 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9571 {
9572 gcc_assert (REGNO (op1) == REGNO (scratch));
9573
9574 x = gen_rtx_AND (vmode, scratch, mask);
9575 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9576
9577 dest = mask;
9578 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9579 x = gen_rtx_NOT (vmode, dest);
9580 x = gen_rtx_AND (vmode, x, op0);
9581 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9582 }
9583 else
9584 {
9585 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9586 {
9587 x = gen_rtx_AND (vmode, scratch, mask);
9588 }
9589 else /* alternative 2,4 */
9590 {
9591 gcc_assert (REGNO (mask) == REGNO (scratch));
9592 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9593 x = gen_rtx_AND (vmode, scratch, op1);
9594 }
9595 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9596
9597 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9598 {
9599 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9600 x = gen_rtx_AND (vmode, dest, nmask);
9601 }
9602 else /* alternative 3,4 */
9603 {
9604 gcc_assert (REGNO (nmask) == REGNO (dest));
9605 dest = nmask;
9606 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9607 x = gen_rtx_AND (vmode, dest, op0);
9608 }
9609 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9610 }
9611
9612 x = gen_rtx_IOR (vmode, dest, scratch);
9613 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9614 }
9615
9616 /* Return TRUE or FALSE depending on whether the first SET in INSN
9617 has source and destination with matching CC modes, and that the
9618 CC mode is at least as constrained as REQ_MODE. */
9619
9620 int
9621 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9622 {
9623 rtx set;
9624 enum machine_mode set_mode;
9625
9626 set = PATTERN (insn);
9627 if (GET_CODE (set) == PARALLEL)
9628 set = XVECEXP (set, 0, 0);
9629 gcc_assert (GET_CODE (set) == SET);
9630 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9631
9632 set_mode = GET_MODE (SET_DEST (set));
9633 switch (set_mode)
9634 {
9635 case CCNOmode:
9636 if (req_mode != CCNOmode
9637 && (req_mode != CCmode
9638 || XEXP (SET_SRC (set), 1) != const0_rtx))
9639 return 0;
9640 break;
9641 case CCmode:
9642 if (req_mode == CCGCmode)
9643 return 0;
9644 /* FALLTHRU */
9645 case CCGCmode:
9646 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9647 return 0;
9648 /* FALLTHRU */
9649 case CCGOCmode:
9650 if (req_mode == CCZmode)
9651 return 0;
9652 /* FALLTHRU */
9653 case CCZmode:
9654 break;
9655
9656 default:
9657 gcc_unreachable ();
9658 }
9659
9660 return (GET_MODE (SET_SRC (set)) == set_mode);
9661 }
9662
9663 /* Generate insn patterns to do an integer compare of OPERANDS. */
9664
9665 static rtx
9666 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9667 {
9668 enum machine_mode cmpmode;
9669 rtx tmp, flags;
9670
9671 cmpmode = SELECT_CC_MODE (code, op0, op1);
9672 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9673
9674 /* This is very simple, but making the interface the same as in the
9675 FP case makes the rest of the code easier. */
9676 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9677 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9678
9679 /* Return the test that should be put into the flags user, i.e.
9680 the bcc, scc, or cmov instruction. */
9681 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9682 }
9683
9684 /* Figure out whether to use ordered or unordered fp comparisons.
9685 Return the appropriate mode to use. */
9686
9687 enum machine_mode
9688 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9689 {
9690 /* ??? In order to make all comparisons reversible, we do all comparisons
9691 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9692 all forms trapping and nontrapping comparisons, we can make inequality
9693 comparisons trapping again, since it results in better code when using
9694 FCOM based compares. */
9695 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9696 }
9697
9698 enum machine_mode
9699 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9700 {
9701 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9702 return ix86_fp_compare_mode (code);
9703 switch (code)
9704 {
9705 /* Only zero flag is needed. */
9706 case EQ: /* ZF=0 */
9707 case NE: /* ZF!=0 */
9708 return CCZmode;
9709 /* Codes needing carry flag. */
9710 case GEU: /* CF=0 */
9711 case GTU: /* CF=0 & ZF=0 */
9712 case LTU: /* CF=1 */
9713 case LEU: /* CF=1 | ZF=1 */
9714 return CCmode;
9715 /* Codes possibly doable only with sign flag when
9716 comparing against zero. */
9717 case GE: /* SF=OF or SF=0 */
9718 case LT: /* SF<>OF or SF=1 */
9719 if (op1 == const0_rtx)
9720 return CCGOCmode;
9721 else
9722 /* For other cases Carry flag is not required. */
9723 return CCGCmode;
9724 /* Codes doable only with sign flag when comparing
9725 against zero, but we miss jump instruction for it
9726 so we need to use relational tests against overflow
9727 that thus needs to be zero. */
9728 case GT: /* ZF=0 & SF=OF */
9729 case LE: /* ZF=1 | SF<>OF */
9730 if (op1 == const0_rtx)
9731 return CCNOmode;
9732 else
9733 return CCGCmode;
9734 /* strcmp pattern do (use flags) and combine may ask us for proper
9735 mode. */
9736 case USE:
9737 return CCmode;
9738 default:
9739 gcc_unreachable ();
9740 }
9741 }
9742
9743 /* Return the fixed registers used for condition codes. */
9744
9745 static bool
9746 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9747 {
9748 *p1 = FLAGS_REG;
9749 *p2 = FPSR_REG;
9750 return true;
9751 }
9752
9753 /* If two condition code modes are compatible, return a condition code
9754 mode which is compatible with both. Otherwise, return
9755 VOIDmode. */
9756
9757 static enum machine_mode
9758 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9759 {
9760 if (m1 == m2)
9761 return m1;
9762
9763 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9764 return VOIDmode;
9765
9766 if ((m1 == CCGCmode && m2 == CCGOCmode)
9767 || (m1 == CCGOCmode && m2 == CCGCmode))
9768 return CCGCmode;
9769
9770 switch (m1)
9771 {
9772 default:
9773 gcc_unreachable ();
9774
9775 case CCmode:
9776 case CCGCmode:
9777 case CCGOCmode:
9778 case CCNOmode:
9779 case CCZmode:
9780 switch (m2)
9781 {
9782 default:
9783 return VOIDmode;
9784
9785 case CCmode:
9786 case CCGCmode:
9787 case CCGOCmode:
9788 case CCNOmode:
9789 case CCZmode:
9790 return CCmode;
9791 }
9792
9793 case CCFPmode:
9794 case CCFPUmode:
9795 /* These are only compatible with themselves, which we already
9796 checked above. */
9797 return VOIDmode;
9798 }
9799 }
9800
9801 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9802
9803 int
9804 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9805 {
9806 enum rtx_code swapped_code = swap_condition (code);
9807 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9808 || (ix86_fp_comparison_cost (swapped_code)
9809 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9810 }
9811
9812 /* Swap, force into registers, or otherwise massage the two operands
9813 to a fp comparison. The operands are updated in place; the new
9814 comparison code is returned. */
9815
9816 static enum rtx_code
9817 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9818 {
9819 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9820 rtx op0 = *pop0, op1 = *pop1;
9821 enum machine_mode op_mode = GET_MODE (op0);
9822 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9823
9824 /* All of the unordered compare instructions only work on registers.
9825 The same is true of the fcomi compare instructions. The XFmode
9826 compare instructions require registers except when comparing
9827 against zero or when converting operand 1 from fixed point to
9828 floating point. */
9829
9830 if (!is_sse
9831 && (fpcmp_mode == CCFPUmode
9832 || (op_mode == XFmode
9833 && ! (standard_80387_constant_p (op0) == 1
9834 || standard_80387_constant_p (op1) == 1)
9835 && GET_CODE (op1) != FLOAT)
9836 || ix86_use_fcomi_compare (code)))
9837 {
9838 op0 = force_reg (op_mode, op0);
9839 op1 = force_reg (op_mode, op1);
9840 }
9841 else
9842 {
9843 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9844 things around if they appear profitable, otherwise force op0
9845 into a register. */
9846
9847 if (standard_80387_constant_p (op0) == 0
9848 || (GET_CODE (op0) == MEM
9849 && ! (standard_80387_constant_p (op1) == 0
9850 || GET_CODE (op1) == MEM)))
9851 {
9852 rtx tmp;
9853 tmp = op0, op0 = op1, op1 = tmp;
9854 code = swap_condition (code);
9855 }
9856
9857 if (GET_CODE (op0) != REG)
9858 op0 = force_reg (op_mode, op0);
9859
9860 if (CONSTANT_P (op1))
9861 {
9862 int tmp = standard_80387_constant_p (op1);
9863 if (tmp == 0)
9864 op1 = validize_mem (force_const_mem (op_mode, op1));
9865 else if (tmp == 1)
9866 {
9867 if (TARGET_CMOVE)
9868 op1 = force_reg (op_mode, op1);
9869 }
9870 else
9871 op1 = force_reg (op_mode, op1);
9872 }
9873 }
9874
9875 /* Try to rearrange the comparison to make it cheaper. */
9876 if (ix86_fp_comparison_cost (code)
9877 > ix86_fp_comparison_cost (swap_condition (code))
9878 && (GET_CODE (op1) == REG || !no_new_pseudos))
9879 {
9880 rtx tmp;
9881 tmp = op0, op0 = op1, op1 = tmp;
9882 code = swap_condition (code);
9883 if (GET_CODE (op0) != REG)
9884 op0 = force_reg (op_mode, op0);
9885 }
9886
9887 *pop0 = op0;
9888 *pop1 = op1;
9889 return code;
9890 }
9891
9892 /* Convert comparison codes we use to represent FP comparison to integer
9893 code that will result in proper branch. Return UNKNOWN if no such code
9894 is available. */
9895
9896 enum rtx_code
9897 ix86_fp_compare_code_to_integer (enum rtx_code code)
9898 {
9899 switch (code)
9900 {
9901 case GT:
9902 return GTU;
9903 case GE:
9904 return GEU;
9905 case ORDERED:
9906 case UNORDERED:
9907 return code;
9908 break;
9909 case UNEQ:
9910 return EQ;
9911 break;
9912 case UNLT:
9913 return LTU;
9914 break;
9915 case UNLE:
9916 return LEU;
9917 break;
9918 case LTGT:
9919 return NE;
9920 break;
9921 default:
9922 return UNKNOWN;
9923 }
9924 }
9925
9926 /* Split comparison code CODE into comparisons we can do using branch
9927 instructions. BYPASS_CODE is comparison code for branch that will
9928 branch around FIRST_CODE and SECOND_CODE. If some of branches
9929 is not required, set value to UNKNOWN.
9930 We never require more than two branches. */
9931
9932 void
9933 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9934 enum rtx_code *first_code,
9935 enum rtx_code *second_code)
9936 {
9937 *first_code = code;
9938 *bypass_code = UNKNOWN;
9939 *second_code = UNKNOWN;
9940
9941 /* The fcomi comparison sets flags as follows:
9942
9943 cmp ZF PF CF
9944 > 0 0 0
9945 < 0 0 1
9946 = 1 0 0
9947 un 1 1 1 */
9948
9949 switch (code)
9950 {
9951 case GT: /* GTU - CF=0 & ZF=0 */
9952 case GE: /* GEU - CF=0 */
9953 case ORDERED: /* PF=0 */
9954 case UNORDERED: /* PF=1 */
9955 case UNEQ: /* EQ - ZF=1 */
9956 case UNLT: /* LTU - CF=1 */
9957 case UNLE: /* LEU - CF=1 | ZF=1 */
9958 case LTGT: /* EQ - ZF=0 */
9959 break;
9960 case LT: /* LTU - CF=1 - fails on unordered */
9961 *first_code = UNLT;
9962 *bypass_code = UNORDERED;
9963 break;
9964 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9965 *first_code = UNLE;
9966 *bypass_code = UNORDERED;
9967 break;
9968 case EQ: /* EQ - ZF=1 - fails on unordered */
9969 *first_code = UNEQ;
9970 *bypass_code = UNORDERED;
9971 break;
9972 case NE: /* NE - ZF=0 - fails on unordered */
9973 *first_code = LTGT;
9974 *second_code = UNORDERED;
9975 break;
9976 case UNGE: /* GEU - CF=0 - fails on unordered */
9977 *first_code = GE;
9978 *second_code = UNORDERED;
9979 break;
9980 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9981 *first_code = GT;
9982 *second_code = UNORDERED;
9983 break;
9984 default:
9985 gcc_unreachable ();
9986 }
9987 if (!TARGET_IEEE_FP)
9988 {
9989 *second_code = UNKNOWN;
9990 *bypass_code = UNKNOWN;
9991 }
9992 }
9993
9994 /* Return cost of comparison done fcom + arithmetics operations on AX.
9995 All following functions do use number of instructions as a cost metrics.
9996 In future this should be tweaked to compute bytes for optimize_size and
9997 take into account performance of various instructions on various CPUs. */
9998 static int
9999 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10000 {
10001 if (!TARGET_IEEE_FP)
10002 return 4;
10003 /* The cost of code output by ix86_expand_fp_compare. */
10004 switch (code)
10005 {
10006 case UNLE:
10007 case UNLT:
10008 case LTGT:
10009 case GT:
10010 case GE:
10011 case UNORDERED:
10012 case ORDERED:
10013 case UNEQ:
10014 return 4;
10015 break;
10016 case LT:
10017 case NE:
10018 case EQ:
10019 case UNGE:
10020 return 5;
10021 break;
10022 case LE:
10023 case UNGT:
10024 return 6;
10025 break;
10026 default:
10027 gcc_unreachable ();
10028 }
10029 }
10030
10031 /* Return cost of comparison done using fcomi operation.
10032 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10033 static int
10034 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10035 {
10036 enum rtx_code bypass_code, first_code, second_code;
10037 /* Return arbitrarily high cost when instruction is not supported - this
10038 prevents gcc from using it. */
10039 if (!TARGET_CMOVE)
10040 return 1024;
10041 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10042 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10043 }
10044
10045 /* Return cost of comparison done using sahf operation.
10046 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10047 static int
10048 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10049 {
10050 enum rtx_code bypass_code, first_code, second_code;
10051 /* Return arbitrarily high cost when instruction is not preferred - this
10052 avoids gcc from using it. */
10053 if (!TARGET_USE_SAHF && !optimize_size)
10054 return 1024;
10055 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10056 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10057 }
10058
10059 /* Compute cost of the comparison done using any method.
10060 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10061 static int
10062 ix86_fp_comparison_cost (enum rtx_code code)
10063 {
10064 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10065 int min;
10066
10067 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10068 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10069
10070 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10071 if (min > sahf_cost)
10072 min = sahf_cost;
10073 if (min > fcomi_cost)
10074 min = fcomi_cost;
10075 return min;
10076 }
10077
10078 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10079
10080 static rtx
10081 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10082 rtx *second_test, rtx *bypass_test)
10083 {
10084 enum machine_mode fpcmp_mode, intcmp_mode;
10085 rtx tmp, tmp2;
10086 int cost = ix86_fp_comparison_cost (code);
10087 enum rtx_code bypass_code, first_code, second_code;
10088
10089 fpcmp_mode = ix86_fp_compare_mode (code);
10090 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10091
10092 if (second_test)
10093 *second_test = NULL_RTX;
10094 if (bypass_test)
10095 *bypass_test = NULL_RTX;
10096
10097 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10098
10099 /* Do fcomi/sahf based test when profitable. */
10100 if ((bypass_code == UNKNOWN || bypass_test)
10101 && (second_code == UNKNOWN || second_test)
10102 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10103 {
10104 if (TARGET_CMOVE)
10105 {
10106 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10107 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10108 tmp);
10109 emit_insn (tmp);
10110 }
10111 else
10112 {
10113 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10114 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10115 if (!scratch)
10116 scratch = gen_reg_rtx (HImode);
10117 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10118 emit_insn (gen_x86_sahf_1 (scratch));
10119 }
10120
10121 /* The FP codes work out to act like unsigned. */
10122 intcmp_mode = fpcmp_mode;
10123 code = first_code;
10124 if (bypass_code != UNKNOWN)
10125 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10126 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10127 const0_rtx);
10128 if (second_code != UNKNOWN)
10129 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10130 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10131 const0_rtx);
10132 }
10133 else
10134 {
10135 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10136 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10137 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10138 if (!scratch)
10139 scratch = gen_reg_rtx (HImode);
10140 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10141
10142 /* In the unordered case, we have to check C2 for NaN's, which
10143 doesn't happen to work out to anything nice combination-wise.
10144 So do some bit twiddling on the value we've got in AH to come
10145 up with an appropriate set of condition codes. */
10146
10147 intcmp_mode = CCNOmode;
10148 switch (code)
10149 {
10150 case GT:
10151 case UNGT:
10152 if (code == GT || !TARGET_IEEE_FP)
10153 {
10154 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10155 code = EQ;
10156 }
10157 else
10158 {
10159 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10160 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10161 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10162 intcmp_mode = CCmode;
10163 code = GEU;
10164 }
10165 break;
10166 case LT:
10167 case UNLT:
10168 if (code == LT && TARGET_IEEE_FP)
10169 {
10170 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10171 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10172 intcmp_mode = CCmode;
10173 code = EQ;
10174 }
10175 else
10176 {
10177 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10178 code = NE;
10179 }
10180 break;
10181 case GE:
10182 case UNGE:
10183 if (code == GE || !TARGET_IEEE_FP)
10184 {
10185 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10186 code = EQ;
10187 }
10188 else
10189 {
10190 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10191 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10192 GEN_INT (0x01)));
10193 code = NE;
10194 }
10195 break;
10196 case LE:
10197 case UNLE:
10198 if (code == LE && TARGET_IEEE_FP)
10199 {
10200 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10201 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10202 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10203 intcmp_mode = CCmode;
10204 code = LTU;
10205 }
10206 else
10207 {
10208 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10209 code = NE;
10210 }
10211 break;
10212 case EQ:
10213 case UNEQ:
10214 if (code == EQ && TARGET_IEEE_FP)
10215 {
10216 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10217 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10218 intcmp_mode = CCmode;
10219 code = EQ;
10220 }
10221 else
10222 {
10223 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10224 code = NE;
10225 break;
10226 }
10227 break;
10228 case NE:
10229 case LTGT:
10230 if (code == NE && TARGET_IEEE_FP)
10231 {
10232 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10233 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10234 GEN_INT (0x40)));
10235 code = NE;
10236 }
10237 else
10238 {
10239 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10240 code = EQ;
10241 }
10242 break;
10243
10244 case UNORDERED:
10245 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10246 code = NE;
10247 break;
10248 case ORDERED:
10249 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10250 code = EQ;
10251 break;
10252
10253 default:
10254 gcc_unreachable ();
10255 }
10256 }
10257
10258 /* Return the test that should be put into the flags user, i.e.
10259 the bcc, scc, or cmov instruction. */
10260 return gen_rtx_fmt_ee (code, VOIDmode,
10261 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10262 const0_rtx);
10263 }
10264
10265 rtx
10266 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10267 {
10268 rtx op0, op1, ret;
10269 op0 = ix86_compare_op0;
10270 op1 = ix86_compare_op1;
10271
10272 if (second_test)
10273 *second_test = NULL_RTX;
10274 if (bypass_test)
10275 *bypass_test = NULL_RTX;
10276
10277 if (ix86_compare_emitted)
10278 {
10279 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10280 ix86_compare_emitted = NULL_RTX;
10281 }
10282 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10283 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10284 second_test, bypass_test);
10285 else
10286 ret = ix86_expand_int_compare (code, op0, op1);
10287
10288 return ret;
10289 }
10290
10291 /* Return true if the CODE will result in nontrivial jump sequence. */
10292 bool
10293 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10294 {
10295 enum rtx_code bypass_code, first_code, second_code;
10296 if (!TARGET_CMOVE)
10297 return true;
10298 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10299 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10300 }
10301
10302 void
10303 ix86_expand_branch (enum rtx_code code, rtx label)
10304 {
10305 rtx tmp;
10306
10307 /* If we have emitted a compare insn, go straight to simple.
10308 ix86_expand_compare won't emit anything if ix86_compare_emitted
10309 is non NULL. */
10310 if (ix86_compare_emitted)
10311 goto simple;
10312
10313 switch (GET_MODE (ix86_compare_op0))
10314 {
10315 case QImode:
10316 case HImode:
10317 case SImode:
10318 simple:
10319 tmp = ix86_expand_compare (code, NULL, NULL);
10320 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10321 gen_rtx_LABEL_REF (VOIDmode, label),
10322 pc_rtx);
10323 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10324 return;
10325
10326 case SFmode:
10327 case DFmode:
10328 case XFmode:
10329 {
10330 rtvec vec;
10331 int use_fcomi;
10332 enum rtx_code bypass_code, first_code, second_code;
10333
10334 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10335 &ix86_compare_op1);
10336
10337 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10338
10339 /* Check whether we will use the natural sequence with one jump. If
10340 so, we can expand jump early. Otherwise delay expansion by
10341 creating compound insn to not confuse optimizers. */
10342 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10343 && TARGET_CMOVE)
10344 {
10345 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10346 gen_rtx_LABEL_REF (VOIDmode, label),
10347 pc_rtx, NULL_RTX, NULL_RTX);
10348 }
10349 else
10350 {
10351 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10352 ix86_compare_op0, ix86_compare_op1);
10353 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10354 gen_rtx_LABEL_REF (VOIDmode, label),
10355 pc_rtx);
10356 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10357
10358 use_fcomi = ix86_use_fcomi_compare (code);
10359 vec = rtvec_alloc (3 + !use_fcomi);
10360 RTVEC_ELT (vec, 0) = tmp;
10361 RTVEC_ELT (vec, 1)
10362 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10363 RTVEC_ELT (vec, 2)
10364 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10365 if (! use_fcomi)
10366 RTVEC_ELT (vec, 3)
10367 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10368
10369 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10370 }
10371 return;
10372 }
10373
10374 case DImode:
10375 if (TARGET_64BIT)
10376 goto simple;
10377 case TImode:
10378 /* Expand DImode branch into multiple compare+branch. */
10379 {
10380 rtx lo[2], hi[2], label2;
10381 enum rtx_code code1, code2, code3;
10382 enum machine_mode submode;
10383
10384 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10385 {
10386 tmp = ix86_compare_op0;
10387 ix86_compare_op0 = ix86_compare_op1;
10388 ix86_compare_op1 = tmp;
10389 code = swap_condition (code);
10390 }
10391 if (GET_MODE (ix86_compare_op0) == DImode)
10392 {
10393 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10394 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10395 submode = SImode;
10396 }
10397 else
10398 {
10399 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10400 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10401 submode = DImode;
10402 }
10403
10404 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10405 avoid two branches. This costs one extra insn, so disable when
10406 optimizing for size. */
10407
10408 if ((code == EQ || code == NE)
10409 && (!optimize_size
10410 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10411 {
10412 rtx xor0, xor1;
10413
10414 xor1 = hi[0];
10415 if (hi[1] != const0_rtx)
10416 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10417 NULL_RTX, 0, OPTAB_WIDEN);
10418
10419 xor0 = lo[0];
10420 if (lo[1] != const0_rtx)
10421 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10422 NULL_RTX, 0, OPTAB_WIDEN);
10423
10424 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10425 NULL_RTX, 0, OPTAB_WIDEN);
10426
10427 ix86_compare_op0 = tmp;
10428 ix86_compare_op1 = const0_rtx;
10429 ix86_expand_branch (code, label);
10430 return;
10431 }
10432
10433 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10434 op1 is a constant and the low word is zero, then we can just
10435 examine the high word. */
10436
10437 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10438 switch (code)
10439 {
10440 case LT: case LTU: case GE: case GEU:
10441 ix86_compare_op0 = hi[0];
10442 ix86_compare_op1 = hi[1];
10443 ix86_expand_branch (code, label);
10444 return;
10445 default:
10446 break;
10447 }
10448
10449 /* Otherwise, we need two or three jumps. */
10450
10451 label2 = gen_label_rtx ();
10452
10453 code1 = code;
10454 code2 = swap_condition (code);
10455 code3 = unsigned_condition (code);
10456
10457 switch (code)
10458 {
10459 case LT: case GT: case LTU: case GTU:
10460 break;
10461
10462 case LE: code1 = LT; code2 = GT; break;
10463 case GE: code1 = GT; code2 = LT; break;
10464 case LEU: code1 = LTU; code2 = GTU; break;
10465 case GEU: code1 = GTU; code2 = LTU; break;
10466
10467 case EQ: code1 = UNKNOWN; code2 = NE; break;
10468 case NE: code2 = UNKNOWN; break;
10469
10470 default:
10471 gcc_unreachable ();
10472 }
10473
10474 /*
10475 * a < b =>
10476 * if (hi(a) < hi(b)) goto true;
10477 * if (hi(a) > hi(b)) goto false;
10478 * if (lo(a) < lo(b)) goto true;
10479 * false:
10480 */
10481
10482 ix86_compare_op0 = hi[0];
10483 ix86_compare_op1 = hi[1];
10484
10485 if (code1 != UNKNOWN)
10486 ix86_expand_branch (code1, label);
10487 if (code2 != UNKNOWN)
10488 ix86_expand_branch (code2, label2);
10489
10490 ix86_compare_op0 = lo[0];
10491 ix86_compare_op1 = lo[1];
10492 ix86_expand_branch (code3, label);
10493
10494 if (code2 != UNKNOWN)
10495 emit_label (label2);
10496 return;
10497 }
10498
10499 default:
10500 gcc_unreachable ();
10501 }
10502 }
10503
10504 /* Split branch based on floating point condition. */
10505 void
10506 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10507 rtx target1, rtx target2, rtx tmp, rtx pushed)
10508 {
10509 rtx second, bypass;
10510 rtx label = NULL_RTX;
10511 rtx condition;
10512 int bypass_probability = -1, second_probability = -1, probability = -1;
10513 rtx i;
10514
10515 if (target2 != pc_rtx)
10516 {
10517 rtx tmp = target2;
10518 code = reverse_condition_maybe_unordered (code);
10519 target2 = target1;
10520 target1 = tmp;
10521 }
10522
10523 condition = ix86_expand_fp_compare (code, op1, op2,
10524 tmp, &second, &bypass);
10525
10526 /* Remove pushed operand from stack. */
10527 if (pushed)
10528 ix86_free_from_memory (GET_MODE (pushed));
10529
10530 if (split_branch_probability >= 0)
10531 {
10532 /* Distribute the probabilities across the jumps.
10533 Assume the BYPASS and SECOND to be always test
10534 for UNORDERED. */
10535 probability = split_branch_probability;
10536
10537 /* Value of 1 is low enough to make no need for probability
10538 to be updated. Later we may run some experiments and see
10539 if unordered values are more frequent in practice. */
10540 if (bypass)
10541 bypass_probability = 1;
10542 if (second)
10543 second_probability = 1;
10544 }
10545 if (bypass != NULL_RTX)
10546 {
10547 label = gen_label_rtx ();
10548 i = emit_jump_insn (gen_rtx_SET
10549 (VOIDmode, pc_rtx,
10550 gen_rtx_IF_THEN_ELSE (VOIDmode,
10551 bypass,
10552 gen_rtx_LABEL_REF (VOIDmode,
10553 label),
10554 pc_rtx)));
10555 if (bypass_probability >= 0)
10556 REG_NOTES (i)
10557 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10558 GEN_INT (bypass_probability),
10559 REG_NOTES (i));
10560 }
10561 i = emit_jump_insn (gen_rtx_SET
10562 (VOIDmode, pc_rtx,
10563 gen_rtx_IF_THEN_ELSE (VOIDmode,
10564 condition, target1, target2)));
10565 if (probability >= 0)
10566 REG_NOTES (i)
10567 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10568 GEN_INT (probability),
10569 REG_NOTES (i));
10570 if (second != NULL_RTX)
10571 {
10572 i = emit_jump_insn (gen_rtx_SET
10573 (VOIDmode, pc_rtx,
10574 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10575 target2)));
10576 if (second_probability >= 0)
10577 REG_NOTES (i)
10578 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10579 GEN_INT (second_probability),
10580 REG_NOTES (i));
10581 }
10582 if (label != NULL_RTX)
10583 emit_label (label);
10584 }
10585
10586 int
10587 ix86_expand_setcc (enum rtx_code code, rtx dest)
10588 {
10589 rtx ret, tmp, tmpreg, equiv;
10590 rtx second_test, bypass_test;
10591
10592 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10593 return 0; /* FAIL */
10594
10595 gcc_assert (GET_MODE (dest) == QImode);
10596
10597 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10598 PUT_MODE (ret, QImode);
10599
10600 tmp = dest;
10601 tmpreg = dest;
10602
10603 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10604 if (bypass_test || second_test)
10605 {
10606 rtx test = second_test;
10607 int bypass = 0;
10608 rtx tmp2 = gen_reg_rtx (QImode);
10609 if (bypass_test)
10610 {
10611 gcc_assert (!second_test);
10612 test = bypass_test;
10613 bypass = 1;
10614 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10615 }
10616 PUT_MODE (test, QImode);
10617 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10618
10619 if (bypass)
10620 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10621 else
10622 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10623 }
10624
10625 /* Attach a REG_EQUAL note describing the comparison result. */
10626 if (ix86_compare_op0 && ix86_compare_op1)
10627 {
10628 equiv = simplify_gen_relational (code, QImode,
10629 GET_MODE (ix86_compare_op0),
10630 ix86_compare_op0, ix86_compare_op1);
10631 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10632 }
10633
10634 return 1; /* DONE */
10635 }
10636
10637 /* Expand comparison setting or clearing carry flag. Return true when
10638 successful and set pop for the operation. */
10639 static bool
10640 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10641 {
10642 enum machine_mode mode =
10643 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10644
10645 /* Do not handle DImode compares that go through special path. Also we can't
10646 deal with FP compares yet. This is possible to add. */
10647 if (mode == (TARGET_64BIT ? TImode : DImode))
10648 return false;
10649 if (FLOAT_MODE_P (mode))
10650 {
10651 rtx second_test = NULL, bypass_test = NULL;
10652 rtx compare_op, compare_seq;
10653
10654 /* Shortcut: following common codes never translate into carry flag compares. */
10655 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10656 || code == ORDERED || code == UNORDERED)
10657 return false;
10658
10659 /* These comparisons require zero flag; swap operands so they won't. */
10660 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10661 && !TARGET_IEEE_FP)
10662 {
10663 rtx tmp = op0;
10664 op0 = op1;
10665 op1 = tmp;
10666 code = swap_condition (code);
10667 }
10668
10669 /* Try to expand the comparison and verify that we end up with carry flag
10670 based comparison. This is fails to be true only when we decide to expand
10671 comparison using arithmetic that is not too common scenario. */
10672 start_sequence ();
10673 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10674 &second_test, &bypass_test);
10675 compare_seq = get_insns ();
10676 end_sequence ();
10677
10678 if (second_test || bypass_test)
10679 return false;
10680 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10681 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10682 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10683 else
10684 code = GET_CODE (compare_op);
10685 if (code != LTU && code != GEU)
10686 return false;
10687 emit_insn (compare_seq);
10688 *pop = compare_op;
10689 return true;
10690 }
10691 if (!INTEGRAL_MODE_P (mode))
10692 return false;
10693 switch (code)
10694 {
10695 case LTU:
10696 case GEU:
10697 break;
10698
10699 /* Convert a==0 into (unsigned)a<1. */
10700 case EQ:
10701 case NE:
10702 if (op1 != const0_rtx)
10703 return false;
10704 op1 = const1_rtx;
10705 code = (code == EQ ? LTU : GEU);
10706 break;
10707
10708 /* Convert a>b into b<a or a>=b-1. */
10709 case GTU:
10710 case LEU:
10711 if (GET_CODE (op1) == CONST_INT)
10712 {
10713 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10714 /* Bail out on overflow. We still can swap operands but that
10715 would force loading of the constant into register. */
10716 if (op1 == const0_rtx
10717 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10718 return false;
10719 code = (code == GTU ? GEU : LTU);
10720 }
10721 else
10722 {
10723 rtx tmp = op1;
10724 op1 = op0;
10725 op0 = tmp;
10726 code = (code == GTU ? LTU : GEU);
10727 }
10728 break;
10729
10730 /* Convert a>=0 into (unsigned)a<0x80000000. */
10731 case LT:
10732 case GE:
10733 if (mode == DImode || op1 != const0_rtx)
10734 return false;
10735 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10736 code = (code == LT ? GEU : LTU);
10737 break;
10738 case LE:
10739 case GT:
10740 if (mode == DImode || op1 != constm1_rtx)
10741 return false;
10742 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10743 code = (code == LE ? GEU : LTU);
10744 break;
10745
10746 default:
10747 return false;
10748 }
10749 /* Swapping operands may cause constant to appear as first operand. */
10750 if (!nonimmediate_operand (op0, VOIDmode))
10751 {
10752 if (no_new_pseudos)
10753 return false;
10754 op0 = force_reg (mode, op0);
10755 }
10756 ix86_compare_op0 = op0;
10757 ix86_compare_op1 = op1;
10758 *pop = ix86_expand_compare (code, NULL, NULL);
10759 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10760 return true;
10761 }
10762
10763 int
10764 ix86_expand_int_movcc (rtx operands[])
10765 {
10766 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10767 rtx compare_seq, compare_op;
10768 rtx second_test, bypass_test;
10769 enum machine_mode mode = GET_MODE (operands[0]);
10770 bool sign_bit_compare_p = false;;
10771
10772 start_sequence ();
10773 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10774 compare_seq = get_insns ();
10775 end_sequence ();
10776
10777 compare_code = GET_CODE (compare_op);
10778
10779 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10780 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10781 sign_bit_compare_p = true;
10782
10783 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10784 HImode insns, we'd be swallowed in word prefix ops. */
10785
10786 if ((mode != HImode || TARGET_FAST_PREFIX)
10787 && (mode != (TARGET_64BIT ? TImode : DImode))
10788 && GET_CODE (operands[2]) == CONST_INT
10789 && GET_CODE (operands[3]) == CONST_INT)
10790 {
10791 rtx out = operands[0];
10792 HOST_WIDE_INT ct = INTVAL (operands[2]);
10793 HOST_WIDE_INT cf = INTVAL (operands[3]);
10794 HOST_WIDE_INT diff;
10795
10796 diff = ct - cf;
10797 /* Sign bit compares are better done using shifts than we do by using
10798 sbb. */
10799 if (sign_bit_compare_p
10800 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10801 ix86_compare_op1, &compare_op))
10802 {
10803 /* Detect overlap between destination and compare sources. */
10804 rtx tmp = out;
10805
10806 if (!sign_bit_compare_p)
10807 {
10808 bool fpcmp = false;
10809
10810 compare_code = GET_CODE (compare_op);
10811
10812 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10813 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10814 {
10815 fpcmp = true;
10816 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10817 }
10818
10819 /* To simplify rest of code, restrict to the GEU case. */
10820 if (compare_code == LTU)
10821 {
10822 HOST_WIDE_INT tmp = ct;
10823 ct = cf;
10824 cf = tmp;
10825 compare_code = reverse_condition (compare_code);
10826 code = reverse_condition (code);
10827 }
10828 else
10829 {
10830 if (fpcmp)
10831 PUT_CODE (compare_op,
10832 reverse_condition_maybe_unordered
10833 (GET_CODE (compare_op)));
10834 else
10835 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10836 }
10837 diff = ct - cf;
10838
10839 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10840 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10841 tmp = gen_reg_rtx (mode);
10842
10843 if (mode == DImode)
10844 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10845 else
10846 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10847 }
10848 else
10849 {
10850 if (code == GT || code == GE)
10851 code = reverse_condition (code);
10852 else
10853 {
10854 HOST_WIDE_INT tmp = ct;
10855 ct = cf;
10856 cf = tmp;
10857 diff = ct - cf;
10858 }
10859 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10860 ix86_compare_op1, VOIDmode, 0, -1);
10861 }
10862
10863 if (diff == 1)
10864 {
10865 /*
10866 * cmpl op0,op1
10867 * sbbl dest,dest
10868 * [addl dest, ct]
10869 *
10870 * Size 5 - 8.
10871 */
10872 if (ct)
10873 tmp = expand_simple_binop (mode, PLUS,
10874 tmp, GEN_INT (ct),
10875 copy_rtx (tmp), 1, OPTAB_DIRECT);
10876 }
10877 else if (cf == -1)
10878 {
10879 /*
10880 * cmpl op0,op1
10881 * sbbl dest,dest
10882 * orl $ct, dest
10883 *
10884 * Size 8.
10885 */
10886 tmp = expand_simple_binop (mode, IOR,
10887 tmp, GEN_INT (ct),
10888 copy_rtx (tmp), 1, OPTAB_DIRECT);
10889 }
10890 else if (diff == -1 && ct)
10891 {
10892 /*
10893 * cmpl op0,op1
10894 * sbbl dest,dest
10895 * notl dest
10896 * [addl dest, cf]
10897 *
10898 * Size 8 - 11.
10899 */
10900 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10901 if (cf)
10902 tmp = expand_simple_binop (mode, PLUS,
10903 copy_rtx (tmp), GEN_INT (cf),
10904 copy_rtx (tmp), 1, OPTAB_DIRECT);
10905 }
10906 else
10907 {
10908 /*
10909 * cmpl op0,op1
10910 * sbbl dest,dest
10911 * [notl dest]
10912 * andl cf - ct, dest
10913 * [addl dest, ct]
10914 *
10915 * Size 8 - 11.
10916 */
10917
10918 if (cf == 0)
10919 {
10920 cf = ct;
10921 ct = 0;
10922 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10923 }
10924
10925 tmp = expand_simple_binop (mode, AND,
10926 copy_rtx (tmp),
10927 gen_int_mode (cf - ct, mode),
10928 copy_rtx (tmp), 1, OPTAB_DIRECT);
10929 if (ct)
10930 tmp = expand_simple_binop (mode, PLUS,
10931 copy_rtx (tmp), GEN_INT (ct),
10932 copy_rtx (tmp), 1, OPTAB_DIRECT);
10933 }
10934
10935 if (!rtx_equal_p (tmp, out))
10936 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10937
10938 return 1; /* DONE */
10939 }
10940
10941 if (diff < 0)
10942 {
10943 HOST_WIDE_INT tmp;
10944 tmp = ct, ct = cf, cf = tmp;
10945 diff = -diff;
10946 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10947 {
10948 /* We may be reversing unordered compare to normal compare, that
10949 is not valid in general (we may convert non-trapping condition
10950 to trapping one), however on i386 we currently emit all
10951 comparisons unordered. */
10952 compare_code = reverse_condition_maybe_unordered (compare_code);
10953 code = reverse_condition_maybe_unordered (code);
10954 }
10955 else
10956 {
10957 compare_code = reverse_condition (compare_code);
10958 code = reverse_condition (code);
10959 }
10960 }
10961
10962 compare_code = UNKNOWN;
10963 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10964 && GET_CODE (ix86_compare_op1) == CONST_INT)
10965 {
10966 if (ix86_compare_op1 == const0_rtx
10967 && (code == LT || code == GE))
10968 compare_code = code;
10969 else if (ix86_compare_op1 == constm1_rtx)
10970 {
10971 if (code == LE)
10972 compare_code = LT;
10973 else if (code == GT)
10974 compare_code = GE;
10975 }
10976 }
10977
10978 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10979 if (compare_code != UNKNOWN
10980 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10981 && (cf == -1 || ct == -1))
10982 {
10983 /* If lea code below could be used, only optimize
10984 if it results in a 2 insn sequence. */
10985
10986 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10987 || diff == 3 || diff == 5 || diff == 9)
10988 || (compare_code == LT && ct == -1)
10989 || (compare_code == GE && cf == -1))
10990 {
10991 /*
10992 * notl op1 (if necessary)
10993 * sarl $31, op1
10994 * orl cf, op1
10995 */
10996 if (ct != -1)
10997 {
10998 cf = ct;
10999 ct = -1;
11000 code = reverse_condition (code);
11001 }
11002
11003 out = emit_store_flag (out, code, ix86_compare_op0,
11004 ix86_compare_op1, VOIDmode, 0, -1);
11005
11006 out = expand_simple_binop (mode, IOR,
11007 out, GEN_INT (cf),
11008 out, 1, OPTAB_DIRECT);
11009 if (out != operands[0])
11010 emit_move_insn (operands[0], out);
11011
11012 return 1; /* DONE */
11013 }
11014 }
11015
11016
11017 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11018 || diff == 3 || diff == 5 || diff == 9)
11019 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11020 && (mode != DImode
11021 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11022 {
11023 /*
11024 * xorl dest,dest
11025 * cmpl op1,op2
11026 * setcc dest
11027 * lea cf(dest*(ct-cf)),dest
11028 *
11029 * Size 14.
11030 *
11031 * This also catches the degenerate setcc-only case.
11032 */
11033
11034 rtx tmp;
11035 int nops;
11036
11037 out = emit_store_flag (out, code, ix86_compare_op0,
11038 ix86_compare_op1, VOIDmode, 0, 1);
11039
11040 nops = 0;
11041 /* On x86_64 the lea instruction operates on Pmode, so we need
11042 to get arithmetics done in proper mode to match. */
11043 if (diff == 1)
11044 tmp = copy_rtx (out);
11045 else
11046 {
11047 rtx out1;
11048 out1 = copy_rtx (out);
11049 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11050 nops++;
11051 if (diff & 1)
11052 {
11053 tmp = gen_rtx_PLUS (mode, tmp, out1);
11054 nops++;
11055 }
11056 }
11057 if (cf != 0)
11058 {
11059 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11060 nops++;
11061 }
11062 if (!rtx_equal_p (tmp, out))
11063 {
11064 if (nops == 1)
11065 out = force_operand (tmp, copy_rtx (out));
11066 else
11067 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11068 }
11069 if (!rtx_equal_p (out, operands[0]))
11070 emit_move_insn (operands[0], copy_rtx (out));
11071
11072 return 1; /* DONE */
11073 }
11074
11075 /*
11076 * General case: Jumpful:
11077 * xorl dest,dest cmpl op1, op2
11078 * cmpl op1, op2 movl ct, dest
11079 * setcc dest jcc 1f
11080 * decl dest movl cf, dest
11081 * andl (cf-ct),dest 1:
11082 * addl ct,dest
11083 *
11084 * Size 20. Size 14.
11085 *
11086 * This is reasonably steep, but branch mispredict costs are
11087 * high on modern cpus, so consider failing only if optimizing
11088 * for space.
11089 */
11090
11091 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11092 && BRANCH_COST >= 2)
11093 {
11094 if (cf == 0)
11095 {
11096 cf = ct;
11097 ct = 0;
11098 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11099 /* We may be reversing unordered compare to normal compare,
11100 that is not valid in general (we may convert non-trapping
11101 condition to trapping one), however on i386 we currently
11102 emit all comparisons unordered. */
11103 code = reverse_condition_maybe_unordered (code);
11104 else
11105 {
11106 code = reverse_condition (code);
11107 if (compare_code != UNKNOWN)
11108 compare_code = reverse_condition (compare_code);
11109 }
11110 }
11111
11112 if (compare_code != UNKNOWN)
11113 {
11114 /* notl op1 (if needed)
11115 sarl $31, op1
11116 andl (cf-ct), op1
11117 addl ct, op1
11118
11119 For x < 0 (resp. x <= -1) there will be no notl,
11120 so if possible swap the constants to get rid of the
11121 complement.
11122 True/false will be -1/0 while code below (store flag
11123 followed by decrement) is 0/-1, so the constants need
11124 to be exchanged once more. */
11125
11126 if (compare_code == GE || !cf)
11127 {
11128 code = reverse_condition (code);
11129 compare_code = LT;
11130 }
11131 else
11132 {
11133 HOST_WIDE_INT tmp = cf;
11134 cf = ct;
11135 ct = tmp;
11136 }
11137
11138 out = emit_store_flag (out, code, ix86_compare_op0,
11139 ix86_compare_op1, VOIDmode, 0, -1);
11140 }
11141 else
11142 {
11143 out = emit_store_flag (out, code, ix86_compare_op0,
11144 ix86_compare_op1, VOIDmode, 0, 1);
11145
11146 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11147 copy_rtx (out), 1, OPTAB_DIRECT);
11148 }
11149
11150 out = expand_simple_binop (mode, AND, copy_rtx (out),
11151 gen_int_mode (cf - ct, mode),
11152 copy_rtx (out), 1, OPTAB_DIRECT);
11153 if (ct)
11154 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11155 copy_rtx (out), 1, OPTAB_DIRECT);
11156 if (!rtx_equal_p (out, operands[0]))
11157 emit_move_insn (operands[0], copy_rtx (out));
11158
11159 return 1; /* DONE */
11160 }
11161 }
11162
11163 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11164 {
11165 /* Try a few things more with specific constants and a variable. */
11166
11167 optab op;
11168 rtx var, orig_out, out, tmp;
11169
11170 if (BRANCH_COST <= 2)
11171 return 0; /* FAIL */
11172
11173 /* If one of the two operands is an interesting constant, load a
11174 constant with the above and mask it in with a logical operation. */
11175
11176 if (GET_CODE (operands[2]) == CONST_INT)
11177 {
11178 var = operands[3];
11179 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11180 operands[3] = constm1_rtx, op = and_optab;
11181 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11182 operands[3] = const0_rtx, op = ior_optab;
11183 else
11184 return 0; /* FAIL */
11185 }
11186 else if (GET_CODE (operands[3]) == CONST_INT)
11187 {
11188 var = operands[2];
11189 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11190 operands[2] = constm1_rtx, op = and_optab;
11191 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11192 operands[2] = const0_rtx, op = ior_optab;
11193 else
11194 return 0; /* FAIL */
11195 }
11196 else
11197 return 0; /* FAIL */
11198
11199 orig_out = operands[0];
11200 tmp = gen_reg_rtx (mode);
11201 operands[0] = tmp;
11202
11203 /* Recurse to get the constant loaded. */
11204 if (ix86_expand_int_movcc (operands) == 0)
11205 return 0; /* FAIL */
11206
11207 /* Mask in the interesting variable. */
11208 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11209 OPTAB_WIDEN);
11210 if (!rtx_equal_p (out, orig_out))
11211 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11212
11213 return 1; /* DONE */
11214 }
11215
11216 /*
11217 * For comparison with above,
11218 *
11219 * movl cf,dest
11220 * movl ct,tmp
11221 * cmpl op1,op2
11222 * cmovcc tmp,dest
11223 *
11224 * Size 15.
11225 */
11226
11227 if (! nonimmediate_operand (operands[2], mode))
11228 operands[2] = force_reg (mode, operands[2]);
11229 if (! nonimmediate_operand (operands[3], mode))
11230 operands[3] = force_reg (mode, operands[3]);
11231
11232 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11233 {
11234 rtx tmp = gen_reg_rtx (mode);
11235 emit_move_insn (tmp, operands[3]);
11236 operands[3] = tmp;
11237 }
11238 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11239 {
11240 rtx tmp = gen_reg_rtx (mode);
11241 emit_move_insn (tmp, operands[2]);
11242 operands[2] = tmp;
11243 }
11244
11245 if (! register_operand (operands[2], VOIDmode)
11246 && (mode == QImode
11247 || ! register_operand (operands[3], VOIDmode)))
11248 operands[2] = force_reg (mode, operands[2]);
11249
11250 if (mode == QImode
11251 && ! register_operand (operands[3], VOIDmode))
11252 operands[3] = force_reg (mode, operands[3]);
11253
11254 emit_insn (compare_seq);
11255 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11256 gen_rtx_IF_THEN_ELSE (mode,
11257 compare_op, operands[2],
11258 operands[3])));
11259 if (bypass_test)
11260 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11261 gen_rtx_IF_THEN_ELSE (mode,
11262 bypass_test,
11263 copy_rtx (operands[3]),
11264 copy_rtx (operands[0]))));
11265 if (second_test)
11266 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11267 gen_rtx_IF_THEN_ELSE (mode,
11268 second_test,
11269 copy_rtx (operands[2]),
11270 copy_rtx (operands[0]))));
11271
11272 return 1; /* DONE */
11273 }
11274
11275 /* Swap, force into registers, or otherwise massage the two operands
11276 to an sse comparison with a mask result. Thus we differ a bit from
11277 ix86_prepare_fp_compare_args which expects to produce a flags result.
11278
11279 The DEST operand exists to help determine whether to commute commutative
11280 operators. The POP0/POP1 operands are updated in place. The new
11281 comparison code is returned, or UNKNOWN if not implementable. */
11282
11283 static enum rtx_code
11284 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11285 rtx *pop0, rtx *pop1)
11286 {
11287 rtx tmp;
11288
11289 switch (code)
11290 {
11291 case LTGT:
11292 case UNEQ:
11293 /* We have no LTGT as an operator. We could implement it with
11294 NE & ORDERED, but this requires an extra temporary. It's
11295 not clear that it's worth it. */
11296 return UNKNOWN;
11297
11298 case LT:
11299 case LE:
11300 case UNGT:
11301 case UNGE:
11302 /* These are supported directly. */
11303 break;
11304
11305 case EQ:
11306 case NE:
11307 case UNORDERED:
11308 case ORDERED:
11309 /* For commutative operators, try to canonicalize the destination
11310 operand to be first in the comparison - this helps reload to
11311 avoid extra moves. */
11312 if (!dest || !rtx_equal_p (dest, *pop1))
11313 break;
11314 /* FALLTHRU */
11315
11316 case GE:
11317 case GT:
11318 case UNLE:
11319 case UNLT:
11320 /* These are not supported directly. Swap the comparison operands
11321 to transform into something that is supported. */
11322 tmp = *pop0;
11323 *pop0 = *pop1;
11324 *pop1 = tmp;
11325 code = swap_condition (code);
11326 break;
11327
11328 default:
11329 gcc_unreachable ();
11330 }
11331
11332 return code;
11333 }
11334
11335 /* Detect conditional moves that exactly match min/max operational
11336 semantics. Note that this is IEEE safe, as long as we don't
11337 interchange the operands.
11338
11339 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11340 and TRUE if the operation is successful and instructions are emitted. */
11341
11342 static bool
11343 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11344 rtx cmp_op1, rtx if_true, rtx if_false)
11345 {
11346 enum machine_mode mode;
11347 bool is_min;
11348 rtx tmp;
11349
11350 if (code == LT)
11351 ;
11352 else if (code == UNGE)
11353 {
11354 tmp = if_true;
11355 if_true = if_false;
11356 if_false = tmp;
11357 }
11358 else
11359 return false;
11360
11361 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11362 is_min = true;
11363 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11364 is_min = false;
11365 else
11366 return false;
11367
11368 mode = GET_MODE (dest);
11369
11370 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11371 but MODE may be a vector mode and thus not appropriate. */
11372 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11373 {
11374 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11375 rtvec v;
11376
11377 if_true = force_reg (mode, if_true);
11378 v = gen_rtvec (2, if_true, if_false);
11379 tmp = gen_rtx_UNSPEC (mode, v, u);
11380 }
11381 else
11382 {
11383 code = is_min ? SMIN : SMAX;
11384 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11385 }
11386
11387 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11388 return true;
11389 }
11390
11391 /* Expand an sse vector comparison. Return the register with the result. */
11392
11393 static rtx
11394 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11395 rtx op_true, rtx op_false)
11396 {
11397 enum machine_mode mode = GET_MODE (dest);
11398 rtx x;
11399
11400 cmp_op0 = force_reg (mode, cmp_op0);
11401 if (!nonimmediate_operand (cmp_op1, mode))
11402 cmp_op1 = force_reg (mode, cmp_op1);
11403
11404 if (optimize
11405 || reg_overlap_mentioned_p (dest, op_true)
11406 || reg_overlap_mentioned_p (dest, op_false))
11407 dest = gen_reg_rtx (mode);
11408
11409 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11410 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11411
11412 return dest;
11413 }
11414
11415 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11416 operations. This is used for both scalar and vector conditional moves. */
11417
11418 static void
11419 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11420 {
11421 enum machine_mode mode = GET_MODE (dest);
11422 rtx t2, t3, x;
11423
11424 if (op_false == CONST0_RTX (mode))
11425 {
11426 op_true = force_reg (mode, op_true);
11427 x = gen_rtx_AND (mode, cmp, op_true);
11428 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11429 }
11430 else if (op_true == CONST0_RTX (mode))
11431 {
11432 op_false = force_reg (mode, op_false);
11433 x = gen_rtx_NOT (mode, cmp);
11434 x = gen_rtx_AND (mode, x, op_false);
11435 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11436 }
11437 else
11438 {
11439 op_true = force_reg (mode, op_true);
11440 op_false = force_reg (mode, op_false);
11441
11442 t2 = gen_reg_rtx (mode);
11443 if (optimize)
11444 t3 = gen_reg_rtx (mode);
11445 else
11446 t3 = dest;
11447
11448 x = gen_rtx_AND (mode, op_true, cmp);
11449 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11450
11451 x = gen_rtx_NOT (mode, cmp);
11452 x = gen_rtx_AND (mode, x, op_false);
11453 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11454
11455 x = gen_rtx_IOR (mode, t3, t2);
11456 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11457 }
11458 }
11459
11460 /* Expand a floating-point conditional move. Return true if successful. */
11461
11462 int
11463 ix86_expand_fp_movcc (rtx operands[])
11464 {
11465 enum machine_mode mode = GET_MODE (operands[0]);
11466 enum rtx_code code = GET_CODE (operands[1]);
11467 rtx tmp, compare_op, second_test, bypass_test;
11468
11469 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11470 {
11471 enum machine_mode cmode;
11472
11473 /* Since we've no cmove for sse registers, don't force bad register
11474 allocation just to gain access to it. Deny movcc when the
11475 comparison mode doesn't match the move mode. */
11476 cmode = GET_MODE (ix86_compare_op0);
11477 if (cmode == VOIDmode)
11478 cmode = GET_MODE (ix86_compare_op1);
11479 if (cmode != mode)
11480 return 0;
11481
11482 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11483 &ix86_compare_op0,
11484 &ix86_compare_op1);
11485 if (code == UNKNOWN)
11486 return 0;
11487
11488 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11489 ix86_compare_op1, operands[2],
11490 operands[3]))
11491 return 1;
11492
11493 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11494 ix86_compare_op1, operands[2], operands[3]);
11495 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11496 return 1;
11497 }
11498
11499 /* The floating point conditional move instructions don't directly
11500 support conditions resulting from a signed integer comparison. */
11501
11502 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11503
11504 /* The floating point conditional move instructions don't directly
11505 support signed integer comparisons. */
11506
11507 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11508 {
11509 gcc_assert (!second_test && !bypass_test);
11510 tmp = gen_reg_rtx (QImode);
11511 ix86_expand_setcc (code, tmp);
11512 code = NE;
11513 ix86_compare_op0 = tmp;
11514 ix86_compare_op1 = const0_rtx;
11515 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11516 }
11517 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11518 {
11519 tmp = gen_reg_rtx (mode);
11520 emit_move_insn (tmp, operands[3]);
11521 operands[3] = tmp;
11522 }
11523 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11524 {
11525 tmp = gen_reg_rtx (mode);
11526 emit_move_insn (tmp, operands[2]);
11527 operands[2] = tmp;
11528 }
11529
11530 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11531 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11532 operands[2], operands[3])));
11533 if (bypass_test)
11534 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11535 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11536 operands[3], operands[0])));
11537 if (second_test)
11538 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11539 gen_rtx_IF_THEN_ELSE (mode, second_test,
11540 operands[2], operands[0])));
11541
11542 return 1;
11543 }
11544
11545 /* Expand a floating-point vector conditional move; a vcond operation
11546 rather than a movcc operation. */
11547
11548 bool
11549 ix86_expand_fp_vcond (rtx operands[])
11550 {
11551 enum rtx_code code = GET_CODE (operands[3]);
11552 rtx cmp;
11553
11554 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11555 &operands[4], &operands[5]);
11556 if (code == UNKNOWN)
11557 return false;
11558
11559 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11560 operands[5], operands[1], operands[2]))
11561 return true;
11562
11563 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11564 operands[1], operands[2]);
11565 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11566 return true;
11567 }
11568
11569 /* Expand a signed integral vector conditional move. */
11570
11571 bool
11572 ix86_expand_int_vcond (rtx operands[])
11573 {
11574 enum machine_mode mode = GET_MODE (operands[0]);
11575 enum rtx_code code = GET_CODE (operands[3]);
11576 bool negate = false;
11577 rtx x, cop0, cop1;
11578
11579 cop0 = operands[4];
11580 cop1 = operands[5];
11581
11582 /* Canonicalize the comparison to EQ, GT, GTU. */
11583 switch (code)
11584 {
11585 case EQ:
11586 case GT:
11587 case GTU:
11588 break;
11589
11590 case NE:
11591 case LE:
11592 case LEU:
11593 code = reverse_condition (code);
11594 negate = true;
11595 break;
11596
11597 case GE:
11598 case GEU:
11599 code = reverse_condition (code);
11600 negate = true;
11601 /* FALLTHRU */
11602
11603 case LT:
11604 case LTU:
11605 code = swap_condition (code);
11606 x = cop0, cop0 = cop1, cop1 = x;
11607 break;
11608
11609 default:
11610 gcc_unreachable ();
11611 }
11612
11613 /* Unsigned parallel compare is not supported by the hardware. Play some
11614 tricks to turn this into a signed comparison against 0. */
11615 if (code == GTU)
11616 {
11617 cop0 = force_reg (mode, cop0);
11618
11619 switch (mode)
11620 {
11621 case V4SImode:
11622 {
11623 rtx t1, t2, mask;
11624
11625 /* Perform a parallel modulo subtraction. */
11626 t1 = gen_reg_rtx (mode);
11627 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11628
11629 /* Extract the original sign bit of op0. */
11630 mask = GEN_INT (-0x80000000);
11631 mask = gen_rtx_CONST_VECTOR (mode,
11632 gen_rtvec (4, mask, mask, mask, mask));
11633 mask = force_reg (mode, mask);
11634 t2 = gen_reg_rtx (mode);
11635 emit_insn (gen_andv4si3 (t2, cop0, mask));
11636
11637 /* XOR it back into the result of the subtraction. This results
11638 in the sign bit set iff we saw unsigned underflow. */
11639 x = gen_reg_rtx (mode);
11640 emit_insn (gen_xorv4si3 (x, t1, t2));
11641
11642 code = GT;
11643 }
11644 break;
11645
11646 case V16QImode:
11647 case V8HImode:
11648 /* Perform a parallel unsigned saturating subtraction. */
11649 x = gen_reg_rtx (mode);
11650 emit_insn (gen_rtx_SET (VOIDmode, x,
11651 gen_rtx_US_MINUS (mode, cop0, cop1)));
11652
11653 code = EQ;
11654 negate = !negate;
11655 break;
11656
11657 default:
11658 gcc_unreachable ();
11659 }
11660
11661 cop0 = x;
11662 cop1 = CONST0_RTX (mode);
11663 }
11664
11665 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11666 operands[1+negate], operands[2-negate]);
11667
11668 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11669 operands[2-negate]);
11670 return true;
11671 }
11672
11673 /* Expand conditional increment or decrement using adb/sbb instructions.
11674 The default case using setcc followed by the conditional move can be
11675 done by generic code. */
11676 int
11677 ix86_expand_int_addcc (rtx operands[])
11678 {
11679 enum rtx_code code = GET_CODE (operands[1]);
11680 rtx compare_op;
11681 rtx val = const0_rtx;
11682 bool fpcmp = false;
11683 enum machine_mode mode = GET_MODE (operands[0]);
11684
11685 if (operands[3] != const1_rtx
11686 && operands[3] != constm1_rtx)
11687 return 0;
11688 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11689 ix86_compare_op1, &compare_op))
11690 return 0;
11691 code = GET_CODE (compare_op);
11692
11693 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11694 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11695 {
11696 fpcmp = true;
11697 code = ix86_fp_compare_code_to_integer (code);
11698 }
11699
11700 if (code != LTU)
11701 {
11702 val = constm1_rtx;
11703 if (fpcmp)
11704 PUT_CODE (compare_op,
11705 reverse_condition_maybe_unordered
11706 (GET_CODE (compare_op)));
11707 else
11708 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11709 }
11710 PUT_MODE (compare_op, mode);
11711
11712 /* Construct either adc or sbb insn. */
11713 if ((code == LTU) == (operands[3] == constm1_rtx))
11714 {
11715 switch (GET_MODE (operands[0]))
11716 {
11717 case QImode:
11718 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11719 break;
11720 case HImode:
11721 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11722 break;
11723 case SImode:
11724 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11725 break;
11726 case DImode:
11727 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11728 break;
11729 default:
11730 gcc_unreachable ();
11731 }
11732 }
11733 else
11734 {
11735 switch (GET_MODE (operands[0]))
11736 {
11737 case QImode:
11738 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11739 break;
11740 case HImode:
11741 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11742 break;
11743 case SImode:
11744 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11745 break;
11746 case DImode:
11747 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11748 break;
11749 default:
11750 gcc_unreachable ();
11751 }
11752 }
11753 return 1; /* DONE */
11754 }
11755
11756
11757 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11758 works for floating pointer parameters and nonoffsetable memories.
11759 For pushes, it returns just stack offsets; the values will be saved
11760 in the right order. Maximally three parts are generated. */
11761
11762 static int
11763 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11764 {
11765 int size;
11766
11767 if (!TARGET_64BIT)
11768 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11769 else
11770 size = (GET_MODE_SIZE (mode) + 4) / 8;
11771
11772 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11773 gcc_assert (size >= 2 && size <= 3);
11774
11775 /* Optimize constant pool reference to immediates. This is used by fp
11776 moves, that force all constants to memory to allow combining. */
11777 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11778 {
11779 rtx tmp = maybe_get_pool_constant (operand);
11780 if (tmp)
11781 operand = tmp;
11782 }
11783
11784 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11785 {
11786 /* The only non-offsetable memories we handle are pushes. */
11787 int ok = push_operand (operand, VOIDmode);
11788
11789 gcc_assert (ok);
11790
11791 operand = copy_rtx (operand);
11792 PUT_MODE (operand, Pmode);
11793 parts[0] = parts[1] = parts[2] = operand;
11794 return size;
11795 }
11796
11797 if (GET_CODE (operand) == CONST_VECTOR)
11798 {
11799 enum machine_mode imode = int_mode_for_mode (mode);
11800 /* Caution: if we looked through a constant pool memory above,
11801 the operand may actually have a different mode now. That's
11802 ok, since we want to pun this all the way back to an integer. */
11803 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11804 gcc_assert (operand != NULL);
11805 mode = imode;
11806 }
11807
11808 if (!TARGET_64BIT)
11809 {
11810 if (mode == DImode)
11811 split_di (&operand, 1, &parts[0], &parts[1]);
11812 else
11813 {
11814 if (REG_P (operand))
11815 {
11816 gcc_assert (reload_completed);
11817 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11818 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11819 if (size == 3)
11820 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11821 }
11822 else if (offsettable_memref_p (operand))
11823 {
11824 operand = adjust_address (operand, SImode, 0);
11825 parts[0] = operand;
11826 parts[1] = adjust_address (operand, SImode, 4);
11827 if (size == 3)
11828 parts[2] = adjust_address (operand, SImode, 8);
11829 }
11830 else if (GET_CODE (operand) == CONST_DOUBLE)
11831 {
11832 REAL_VALUE_TYPE r;
11833 long l[4];
11834
11835 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11836 switch (mode)
11837 {
11838 case XFmode:
11839 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11840 parts[2] = gen_int_mode (l[2], SImode);
11841 break;
11842 case DFmode:
11843 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11844 break;
11845 default:
11846 gcc_unreachable ();
11847 }
11848 parts[1] = gen_int_mode (l[1], SImode);
11849 parts[0] = gen_int_mode (l[0], SImode);
11850 }
11851 else
11852 gcc_unreachable ();
11853 }
11854 }
11855 else
11856 {
11857 if (mode == TImode)
11858 split_ti (&operand, 1, &parts[0], &parts[1]);
11859 if (mode == XFmode || mode == TFmode)
11860 {
11861 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11862 if (REG_P (operand))
11863 {
11864 gcc_assert (reload_completed);
11865 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11866 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11867 }
11868 else if (offsettable_memref_p (operand))
11869 {
11870 operand = adjust_address (operand, DImode, 0);
11871 parts[0] = operand;
11872 parts[1] = adjust_address (operand, upper_mode, 8);
11873 }
11874 else if (GET_CODE (operand) == CONST_DOUBLE)
11875 {
11876 REAL_VALUE_TYPE r;
11877 long l[4];
11878
11879 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11880 real_to_target (l, &r, mode);
11881
11882 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11883 if (HOST_BITS_PER_WIDE_INT >= 64)
11884 parts[0]
11885 = gen_int_mode
11886 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11887 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11888 DImode);
11889 else
11890 parts[0] = immed_double_const (l[0], l[1], DImode);
11891
11892 if (upper_mode == SImode)
11893 parts[1] = gen_int_mode (l[2], SImode);
11894 else if (HOST_BITS_PER_WIDE_INT >= 64)
11895 parts[1]
11896 = gen_int_mode
11897 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11898 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11899 DImode);
11900 else
11901 parts[1] = immed_double_const (l[2], l[3], DImode);
11902 }
11903 else
11904 gcc_unreachable ();
11905 }
11906 }
11907
11908 return size;
11909 }
11910
11911 /* Emit insns to perform a move or push of DI, DF, and XF values.
11912 Return false when normal moves are needed; true when all required
11913 insns have been emitted. Operands 2-4 contain the input values
11914 int the correct order; operands 5-7 contain the output values. */
11915
11916 void
11917 ix86_split_long_move (rtx operands[])
11918 {
11919 rtx part[2][3];
11920 int nparts;
11921 int push = 0;
11922 int collisions = 0;
11923 enum machine_mode mode = GET_MODE (operands[0]);
11924
11925 /* The DFmode expanders may ask us to move double.
11926 For 64bit target this is single move. By hiding the fact
11927 here we simplify i386.md splitters. */
11928 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11929 {
11930 /* Optimize constant pool reference to immediates. This is used by
11931 fp moves, that force all constants to memory to allow combining. */
11932
11933 if (GET_CODE (operands[1]) == MEM
11934 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11935 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11936 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11937 if (push_operand (operands[0], VOIDmode))
11938 {
11939 operands[0] = copy_rtx (operands[0]);
11940 PUT_MODE (operands[0], Pmode);
11941 }
11942 else
11943 operands[0] = gen_lowpart (DImode, operands[0]);
11944 operands[1] = gen_lowpart (DImode, operands[1]);
11945 emit_move_insn (operands[0], operands[1]);
11946 return;
11947 }
11948
11949 /* The only non-offsettable memory we handle is push. */
11950 if (push_operand (operands[0], VOIDmode))
11951 push = 1;
11952 else
11953 gcc_assert (GET_CODE (operands[0]) != MEM
11954 || offsettable_memref_p (operands[0]));
11955
11956 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11957 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
11958
11959 /* When emitting push, take care for source operands on the stack. */
11960 if (push && GET_CODE (operands[1]) == MEM
11961 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11962 {
11963 if (nparts == 3)
11964 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11965 XEXP (part[1][2], 0));
11966 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11967 XEXP (part[1][1], 0));
11968 }
11969
11970 /* We need to do copy in the right order in case an address register
11971 of the source overlaps the destination. */
11972 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11973 {
11974 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
11975 collisions++;
11976 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11977 collisions++;
11978 if (nparts == 3
11979 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
11980 collisions++;
11981
11982 /* Collision in the middle part can be handled by reordering. */
11983 if (collisions == 1 && nparts == 3
11984 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11985 {
11986 rtx tmp;
11987 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
11988 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
11989 }
11990
11991 /* If there are more collisions, we can't handle it by reordering.
11992 Do an lea to the last part and use only one colliding move. */
11993 else if (collisions > 1)
11994 {
11995 rtx base;
11996
11997 collisions = 1;
11998
11999 base = part[0][nparts - 1];
12000
12001 /* Handle the case when the last part isn't valid for lea.
12002 Happens in 64-bit mode storing the 12-byte XFmode. */
12003 if (GET_MODE (base) != Pmode)
12004 base = gen_rtx_REG (Pmode, REGNO (base));
12005
12006 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12007 part[1][0] = replace_equiv_address (part[1][0], base);
12008 part[1][1] = replace_equiv_address (part[1][1],
12009 plus_constant (base, UNITS_PER_WORD));
12010 if (nparts == 3)
12011 part[1][2] = replace_equiv_address (part[1][2],
12012 plus_constant (base, 8));
12013 }
12014 }
12015
12016 if (push)
12017 {
12018 if (!TARGET_64BIT)
12019 {
12020 if (nparts == 3)
12021 {
12022 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12023 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12024 emit_move_insn (part[0][2], part[1][2]);
12025 }
12026 }
12027 else
12028 {
12029 /* In 64bit mode we don't have 32bit push available. In case this is
12030 register, it is OK - we will just use larger counterpart. We also
12031 retype memory - these comes from attempt to avoid REX prefix on
12032 moving of second half of TFmode value. */
12033 if (GET_MODE (part[1][1]) == SImode)
12034 {
12035 switch (GET_CODE (part[1][1]))
12036 {
12037 case MEM:
12038 part[1][1] = adjust_address (part[1][1], DImode, 0);
12039 break;
12040
12041 case REG:
12042 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12043 break;
12044
12045 default:
12046 gcc_unreachable ();
12047 }
12048
12049 if (GET_MODE (part[1][0]) == SImode)
12050 part[1][0] = part[1][1];
12051 }
12052 }
12053 emit_move_insn (part[0][1], part[1][1]);
12054 emit_move_insn (part[0][0], part[1][0]);
12055 return;
12056 }
12057
12058 /* Choose correct order to not overwrite the source before it is copied. */
12059 if ((REG_P (part[0][0])
12060 && REG_P (part[1][1])
12061 && (REGNO (part[0][0]) == REGNO (part[1][1])
12062 || (nparts == 3
12063 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12064 || (collisions > 0
12065 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12066 {
12067 if (nparts == 3)
12068 {
12069 operands[2] = part[0][2];
12070 operands[3] = part[0][1];
12071 operands[4] = part[0][0];
12072 operands[5] = part[1][2];
12073 operands[6] = part[1][1];
12074 operands[7] = part[1][0];
12075 }
12076 else
12077 {
12078 operands[2] = part[0][1];
12079 operands[3] = part[0][0];
12080 operands[5] = part[1][1];
12081 operands[6] = part[1][0];
12082 }
12083 }
12084 else
12085 {
12086 if (nparts == 3)
12087 {
12088 operands[2] = part[0][0];
12089 operands[3] = part[0][1];
12090 operands[4] = part[0][2];
12091 operands[5] = part[1][0];
12092 operands[6] = part[1][1];
12093 operands[7] = part[1][2];
12094 }
12095 else
12096 {
12097 operands[2] = part[0][0];
12098 operands[3] = part[0][1];
12099 operands[5] = part[1][0];
12100 operands[6] = part[1][1];
12101 }
12102 }
12103
12104 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12105 if (optimize_size)
12106 {
12107 if (GET_CODE (operands[5]) == CONST_INT
12108 && operands[5] != const0_rtx
12109 && REG_P (operands[2]))
12110 {
12111 if (GET_CODE (operands[6]) == CONST_INT
12112 && INTVAL (operands[6]) == INTVAL (operands[5]))
12113 operands[6] = operands[2];
12114
12115 if (nparts == 3
12116 && GET_CODE (operands[7]) == CONST_INT
12117 && INTVAL (operands[7]) == INTVAL (operands[5]))
12118 operands[7] = operands[2];
12119 }
12120
12121 if (nparts == 3
12122 && GET_CODE (operands[6]) == CONST_INT
12123 && operands[6] != const0_rtx
12124 && REG_P (operands[3])
12125 && GET_CODE (operands[7]) == CONST_INT
12126 && INTVAL (operands[7]) == INTVAL (operands[6]))
12127 operands[7] = operands[3];
12128 }
12129
12130 emit_move_insn (operands[2], operands[5]);
12131 emit_move_insn (operands[3], operands[6]);
12132 if (nparts == 3)
12133 emit_move_insn (operands[4], operands[7]);
12134
12135 return;
12136 }
12137
12138 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12139 left shift by a constant, either using a single shift or
12140 a sequence of add instructions. */
12141
12142 static void
12143 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12144 {
12145 if (count == 1)
12146 {
12147 emit_insn ((mode == DImode
12148 ? gen_addsi3
12149 : gen_adddi3) (operand, operand, operand));
12150 }
12151 else if (!optimize_size
12152 && count * ix86_cost->add <= ix86_cost->shift_const)
12153 {
12154 int i;
12155 for (i=0; i<count; i++)
12156 {
12157 emit_insn ((mode == DImode
12158 ? gen_addsi3
12159 : gen_adddi3) (operand, operand, operand));
12160 }
12161 }
12162 else
12163 emit_insn ((mode == DImode
12164 ? gen_ashlsi3
12165 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12166 }
12167
12168 void
12169 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12170 {
12171 rtx low[2], high[2];
12172 int count;
12173 const int single_width = mode == DImode ? 32 : 64;
12174
12175 if (GET_CODE (operands[2]) == CONST_INT)
12176 {
12177 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12178 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12179
12180 if (count >= single_width)
12181 {
12182 emit_move_insn (high[0], low[1]);
12183 emit_move_insn (low[0], const0_rtx);
12184
12185 if (count > single_width)
12186 ix86_expand_ashl_const (high[0], count - single_width, mode);
12187 }
12188 else
12189 {
12190 if (!rtx_equal_p (operands[0], operands[1]))
12191 emit_move_insn (operands[0], operands[1]);
12192 emit_insn ((mode == DImode
12193 ? gen_x86_shld_1
12194 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12195 ix86_expand_ashl_const (low[0], count, mode);
12196 }
12197 return;
12198 }
12199
12200 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12201
12202 if (operands[1] == const1_rtx)
12203 {
12204 /* Assuming we've chosen a QImode capable registers, then 1 << N
12205 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12206 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12207 {
12208 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12209
12210 ix86_expand_clear (low[0]);
12211 ix86_expand_clear (high[0]);
12212 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12213
12214 d = gen_lowpart (QImode, low[0]);
12215 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12216 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12217 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12218
12219 d = gen_lowpart (QImode, high[0]);
12220 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12221 s = gen_rtx_NE (QImode, flags, const0_rtx);
12222 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12223 }
12224
12225 /* Otherwise, we can get the same results by manually performing
12226 a bit extract operation on bit 5/6, and then performing the two
12227 shifts. The two methods of getting 0/1 into low/high are exactly
12228 the same size. Avoiding the shift in the bit extract case helps
12229 pentium4 a bit; no one else seems to care much either way. */
12230 else
12231 {
12232 rtx x;
12233
12234 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12235 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12236 else
12237 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12238 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12239
12240 emit_insn ((mode == DImode
12241 ? gen_lshrsi3
12242 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12243 emit_insn ((mode == DImode
12244 ? gen_andsi3
12245 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12246 emit_move_insn (low[0], high[0]);
12247 emit_insn ((mode == DImode
12248 ? gen_xorsi3
12249 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12250 }
12251
12252 emit_insn ((mode == DImode
12253 ? gen_ashlsi3
12254 : gen_ashldi3) (low[0], low[0], operands[2]));
12255 emit_insn ((mode == DImode
12256 ? gen_ashlsi3
12257 : gen_ashldi3) (high[0], high[0], operands[2]));
12258 return;
12259 }
12260
12261 if (operands[1] == constm1_rtx)
12262 {
12263 /* For -1 << N, we can avoid the shld instruction, because we
12264 know that we're shifting 0...31/63 ones into a -1. */
12265 emit_move_insn (low[0], constm1_rtx);
12266 if (optimize_size)
12267 emit_move_insn (high[0], low[0]);
12268 else
12269 emit_move_insn (high[0], constm1_rtx);
12270 }
12271 else
12272 {
12273 if (!rtx_equal_p (operands[0], operands[1]))
12274 emit_move_insn (operands[0], operands[1]);
12275
12276 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12277 emit_insn ((mode == DImode
12278 ? gen_x86_shld_1
12279 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12280 }
12281
12282 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12283
12284 if (TARGET_CMOVE && scratch)
12285 {
12286 ix86_expand_clear (scratch);
12287 emit_insn ((mode == DImode
12288 ? gen_x86_shift_adj_1
12289 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12290 }
12291 else
12292 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12293 }
12294
12295 void
12296 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12297 {
12298 rtx low[2], high[2];
12299 int count;
12300 const int single_width = mode == DImode ? 32 : 64;
12301
12302 if (GET_CODE (operands[2]) == CONST_INT)
12303 {
12304 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12305 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12306
12307 if (count == single_width * 2 - 1)
12308 {
12309 emit_move_insn (high[0], high[1]);
12310 emit_insn ((mode == DImode
12311 ? gen_ashrsi3
12312 : gen_ashrdi3) (high[0], high[0],
12313 GEN_INT (single_width - 1)));
12314 emit_move_insn (low[0], high[0]);
12315
12316 }
12317 else if (count >= single_width)
12318 {
12319 emit_move_insn (low[0], high[1]);
12320 emit_move_insn (high[0], low[0]);
12321 emit_insn ((mode == DImode
12322 ? gen_ashrsi3
12323 : gen_ashrdi3) (high[0], high[0],
12324 GEN_INT (single_width - 1)));
12325 if (count > single_width)
12326 emit_insn ((mode == DImode
12327 ? gen_ashrsi3
12328 : gen_ashrdi3) (low[0], low[0],
12329 GEN_INT (count - single_width)));
12330 }
12331 else
12332 {
12333 if (!rtx_equal_p (operands[0], operands[1]))
12334 emit_move_insn (operands[0], operands[1]);
12335 emit_insn ((mode == DImode
12336 ? gen_x86_shrd_1
12337 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12338 emit_insn ((mode == DImode
12339 ? gen_ashrsi3
12340 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12341 }
12342 }
12343 else
12344 {
12345 if (!rtx_equal_p (operands[0], operands[1]))
12346 emit_move_insn (operands[0], operands[1]);
12347
12348 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12349
12350 emit_insn ((mode == DImode
12351 ? gen_x86_shrd_1
12352 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12353 emit_insn ((mode == DImode
12354 ? gen_ashrsi3
12355 : gen_ashrdi3) (high[0], high[0], operands[2]));
12356
12357 if (TARGET_CMOVE && scratch)
12358 {
12359 emit_move_insn (scratch, high[0]);
12360 emit_insn ((mode == DImode
12361 ? gen_ashrsi3
12362 : gen_ashrdi3) (scratch, scratch,
12363 GEN_INT (single_width - 1)));
12364 emit_insn ((mode == DImode
12365 ? gen_x86_shift_adj_1
12366 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12367 scratch));
12368 }
12369 else
12370 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12371 }
12372 }
12373
12374 void
12375 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12376 {
12377 rtx low[2], high[2];
12378 int count;
12379 const int single_width = mode == DImode ? 32 : 64;
12380
12381 if (GET_CODE (operands[2]) == CONST_INT)
12382 {
12383 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12384 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12385
12386 if (count >= single_width)
12387 {
12388 emit_move_insn (low[0], high[1]);
12389 ix86_expand_clear (high[0]);
12390
12391 if (count > single_width)
12392 emit_insn ((mode == DImode
12393 ? gen_lshrsi3
12394 : gen_lshrdi3) (low[0], low[0],
12395 GEN_INT (count - single_width)));
12396 }
12397 else
12398 {
12399 if (!rtx_equal_p (operands[0], operands[1]))
12400 emit_move_insn (operands[0], operands[1]);
12401 emit_insn ((mode == DImode
12402 ? gen_x86_shrd_1
12403 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12404 emit_insn ((mode == DImode
12405 ? gen_lshrsi3
12406 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12407 }
12408 }
12409 else
12410 {
12411 if (!rtx_equal_p (operands[0], operands[1]))
12412 emit_move_insn (operands[0], operands[1]);
12413
12414 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12415
12416 emit_insn ((mode == DImode
12417 ? gen_x86_shrd_1
12418 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12419 emit_insn ((mode == DImode
12420 ? gen_lshrsi3
12421 : gen_lshrdi3) (high[0], high[0], operands[2]));
12422
12423 /* Heh. By reversing the arguments, we can reuse this pattern. */
12424 if (TARGET_CMOVE && scratch)
12425 {
12426 ix86_expand_clear (scratch);
12427 emit_insn ((mode == DImode
12428 ? gen_x86_shift_adj_1
12429 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12430 scratch));
12431 }
12432 else
12433 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12434 }
12435 }
12436
12437 /* Helper function for the string operations below. Dest VARIABLE whether
12438 it is aligned to VALUE bytes. If true, jump to the label. */
12439 static rtx
12440 ix86_expand_aligntest (rtx variable, int value)
12441 {
12442 rtx label = gen_label_rtx ();
12443 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12444 if (GET_MODE (variable) == DImode)
12445 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12446 else
12447 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12448 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12449 1, label);
12450 return label;
12451 }
12452
12453 /* Adjust COUNTER by the VALUE. */
12454 static void
12455 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12456 {
12457 if (GET_MODE (countreg) == DImode)
12458 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12459 else
12460 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12461 }
12462
12463 /* Zero extend possibly SImode EXP to Pmode register. */
12464 rtx
12465 ix86_zero_extend_to_Pmode (rtx exp)
12466 {
12467 rtx r;
12468 if (GET_MODE (exp) == VOIDmode)
12469 return force_reg (Pmode, exp);
12470 if (GET_MODE (exp) == Pmode)
12471 return copy_to_mode_reg (Pmode, exp);
12472 r = gen_reg_rtx (Pmode);
12473 emit_insn (gen_zero_extendsidi2 (r, exp));
12474 return r;
12475 }
12476
12477 /* Expand string move (memcpy) operation. Use i386 string operations when
12478 profitable. expand_clrmem contains similar code. */
12479 int
12480 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12481 {
12482 rtx srcreg, destreg, countreg, srcexp, destexp;
12483 enum machine_mode counter_mode;
12484 HOST_WIDE_INT align = 0;
12485 unsigned HOST_WIDE_INT count = 0;
12486
12487 if (GET_CODE (align_exp) == CONST_INT)
12488 align = INTVAL (align_exp);
12489
12490 /* Can't use any of this if the user has appropriated esi or edi. */
12491 if (global_regs[4] || global_regs[5])
12492 return 0;
12493
12494 /* This simple hack avoids all inlining code and simplifies code below. */
12495 if (!TARGET_ALIGN_STRINGOPS)
12496 align = 64;
12497
12498 if (GET_CODE (count_exp) == CONST_INT)
12499 {
12500 count = INTVAL (count_exp);
12501 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12502 return 0;
12503 }
12504
12505 /* Figure out proper mode for counter. For 32bits it is always SImode,
12506 for 64bits use SImode when possible, otherwise DImode.
12507 Set count to number of bytes copied when known at compile time. */
12508 if (!TARGET_64BIT
12509 || GET_MODE (count_exp) == SImode
12510 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12511 counter_mode = SImode;
12512 else
12513 counter_mode = DImode;
12514
12515 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12516
12517 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12518 if (destreg != XEXP (dst, 0))
12519 dst = replace_equiv_address_nv (dst, destreg);
12520 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12521 if (srcreg != XEXP (src, 0))
12522 src = replace_equiv_address_nv (src, srcreg);
12523
12524 /* When optimizing for size emit simple rep ; movsb instruction for
12525 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12526 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12527 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12528 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12529 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12530 known to be zero or not. The rep; movsb sequence causes higher
12531 register pressure though, so take that into account. */
12532
12533 if ((!optimize || optimize_size)
12534 && (count == 0
12535 || ((count & 0x03)
12536 && (!optimize_size
12537 || count > 5 * 4
12538 || (count & 3) + count / 4 > 6))))
12539 {
12540 emit_insn (gen_cld ());
12541 countreg = ix86_zero_extend_to_Pmode (count_exp);
12542 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12543 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12544 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12545 destexp, srcexp));
12546 }
12547
12548 /* For constant aligned (or small unaligned) copies use rep movsl
12549 followed by code copying the rest. For PentiumPro ensure 8 byte
12550 alignment to allow rep movsl acceleration. */
12551
12552 else if (count != 0
12553 && (align >= 8
12554 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12555 || optimize_size || count < (unsigned int) 64))
12556 {
12557 unsigned HOST_WIDE_INT offset = 0;
12558 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12559 rtx srcmem, dstmem;
12560
12561 emit_insn (gen_cld ());
12562 if (count & ~(size - 1))
12563 {
12564 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12565 {
12566 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12567
12568 while (offset < (count & ~(size - 1)))
12569 {
12570 srcmem = adjust_automodify_address_nv (src, movs_mode,
12571 srcreg, offset);
12572 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12573 destreg, offset);
12574 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12575 offset += size;
12576 }
12577 }
12578 else
12579 {
12580 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12581 & (TARGET_64BIT ? -1 : 0x3fffffff));
12582 countreg = copy_to_mode_reg (counter_mode, countreg);
12583 countreg = ix86_zero_extend_to_Pmode (countreg);
12584
12585 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12586 GEN_INT (size == 4 ? 2 : 3));
12587 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12588 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12589
12590 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12591 countreg, destexp, srcexp));
12592 offset = count & ~(size - 1);
12593 }
12594 }
12595 if (size == 8 && (count & 0x04))
12596 {
12597 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12598 offset);
12599 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12600 offset);
12601 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12602 offset += 4;
12603 }
12604 if (count & 0x02)
12605 {
12606 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12607 offset);
12608 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12609 offset);
12610 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12611 offset += 2;
12612 }
12613 if (count & 0x01)
12614 {
12615 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12616 offset);
12617 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12618 offset);
12619 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12620 }
12621 }
12622 /* The generic code based on the glibc implementation:
12623 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12624 allowing accelerated copying there)
12625 - copy the data using rep movsl
12626 - copy the rest. */
12627 else
12628 {
12629 rtx countreg2;
12630 rtx label = NULL;
12631 rtx srcmem, dstmem;
12632 int desired_alignment = (TARGET_PENTIUMPRO
12633 && (count == 0 || count >= (unsigned int) 260)
12634 ? 8 : UNITS_PER_WORD);
12635 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12636 dst = change_address (dst, BLKmode, destreg);
12637 src = change_address (src, BLKmode, srcreg);
12638
12639 /* In case we don't know anything about the alignment, default to
12640 library version, since it is usually equally fast and result in
12641 shorter code.
12642
12643 Also emit call when we know that the count is large and call overhead
12644 will not be important. */
12645 if (!TARGET_INLINE_ALL_STRINGOPS
12646 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12647 return 0;
12648
12649 if (TARGET_SINGLE_STRINGOP)
12650 emit_insn (gen_cld ());
12651
12652 countreg2 = gen_reg_rtx (Pmode);
12653 countreg = copy_to_mode_reg (counter_mode, count_exp);
12654
12655 /* We don't use loops to align destination and to copy parts smaller
12656 than 4 bytes, because gcc is able to optimize such code better (in
12657 the case the destination or the count really is aligned, gcc is often
12658 able to predict the branches) and also it is friendlier to the
12659 hardware branch prediction.
12660
12661 Using loops is beneficial for generic case, because we can
12662 handle small counts using the loops. Many CPUs (such as Athlon)
12663 have large REP prefix setup costs.
12664
12665 This is quite costly. Maybe we can revisit this decision later or
12666 add some customizability to this code. */
12667
12668 if (count == 0 && align < desired_alignment)
12669 {
12670 label = gen_label_rtx ();
12671 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12672 LEU, 0, counter_mode, 1, label);
12673 }
12674 if (align <= 1)
12675 {
12676 rtx label = ix86_expand_aligntest (destreg, 1);
12677 srcmem = change_address (src, QImode, srcreg);
12678 dstmem = change_address (dst, QImode, destreg);
12679 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12680 ix86_adjust_counter (countreg, 1);
12681 emit_label (label);
12682 LABEL_NUSES (label) = 1;
12683 }
12684 if (align <= 2)
12685 {
12686 rtx label = ix86_expand_aligntest (destreg, 2);
12687 srcmem = change_address (src, HImode, srcreg);
12688 dstmem = change_address (dst, HImode, destreg);
12689 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12690 ix86_adjust_counter (countreg, 2);
12691 emit_label (label);
12692 LABEL_NUSES (label) = 1;
12693 }
12694 if (align <= 4 && desired_alignment > 4)
12695 {
12696 rtx label = ix86_expand_aligntest (destreg, 4);
12697 srcmem = change_address (src, SImode, srcreg);
12698 dstmem = change_address (dst, SImode, destreg);
12699 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12700 ix86_adjust_counter (countreg, 4);
12701 emit_label (label);
12702 LABEL_NUSES (label) = 1;
12703 }
12704
12705 if (label && desired_alignment > 4 && !TARGET_64BIT)
12706 {
12707 emit_label (label);
12708 LABEL_NUSES (label) = 1;
12709 label = NULL_RTX;
12710 }
12711 if (!TARGET_SINGLE_STRINGOP)
12712 emit_insn (gen_cld ());
12713 if (TARGET_64BIT)
12714 {
12715 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12716 GEN_INT (3)));
12717 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12718 }
12719 else
12720 {
12721 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12722 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12723 }
12724 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12725 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12726 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12727 countreg2, destexp, srcexp));
12728
12729 if (label)
12730 {
12731 emit_label (label);
12732 LABEL_NUSES (label) = 1;
12733 }
12734 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12735 {
12736 srcmem = change_address (src, SImode, srcreg);
12737 dstmem = change_address (dst, SImode, destreg);
12738 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12739 }
12740 if ((align <= 4 || count == 0) && TARGET_64BIT)
12741 {
12742 rtx label = ix86_expand_aligntest (countreg, 4);
12743 srcmem = change_address (src, SImode, srcreg);
12744 dstmem = change_address (dst, SImode, destreg);
12745 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12746 emit_label (label);
12747 LABEL_NUSES (label) = 1;
12748 }
12749 if (align > 2 && count != 0 && (count & 2))
12750 {
12751 srcmem = change_address (src, HImode, srcreg);
12752 dstmem = change_address (dst, HImode, destreg);
12753 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12754 }
12755 if (align <= 2 || count == 0)
12756 {
12757 rtx label = ix86_expand_aligntest (countreg, 2);
12758 srcmem = change_address (src, HImode, srcreg);
12759 dstmem = change_address (dst, HImode, destreg);
12760 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12761 emit_label (label);
12762 LABEL_NUSES (label) = 1;
12763 }
12764 if (align > 1 && count != 0 && (count & 1))
12765 {
12766 srcmem = change_address (src, QImode, srcreg);
12767 dstmem = change_address (dst, QImode, destreg);
12768 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12769 }
12770 if (align <= 1 || count == 0)
12771 {
12772 rtx label = ix86_expand_aligntest (countreg, 1);
12773 srcmem = change_address (src, QImode, srcreg);
12774 dstmem = change_address (dst, QImode, destreg);
12775 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12776 emit_label (label);
12777 LABEL_NUSES (label) = 1;
12778 }
12779 }
12780
12781 return 1;
12782 }
12783
12784 /* Expand string clear operation (bzero). Use i386 string operations when
12785 profitable. expand_movmem contains similar code. */
12786 int
12787 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12788 {
12789 rtx destreg, zeroreg, countreg, destexp;
12790 enum machine_mode counter_mode;
12791 HOST_WIDE_INT align = 0;
12792 unsigned HOST_WIDE_INT count = 0;
12793
12794 if (GET_CODE (align_exp) == CONST_INT)
12795 align = INTVAL (align_exp);
12796
12797 /* Can't use any of this if the user has appropriated esi. */
12798 if (global_regs[4])
12799 return 0;
12800
12801 /* This simple hack avoids all inlining code and simplifies code below. */
12802 if (!TARGET_ALIGN_STRINGOPS)
12803 align = 32;
12804
12805 if (GET_CODE (count_exp) == CONST_INT)
12806 {
12807 count = INTVAL (count_exp);
12808 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12809 return 0;
12810 }
12811 /* Figure out proper mode for counter. For 32bits it is always SImode,
12812 for 64bits use SImode when possible, otherwise DImode.
12813 Set count to number of bytes copied when known at compile time. */
12814 if (!TARGET_64BIT
12815 || GET_MODE (count_exp) == SImode
12816 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12817 counter_mode = SImode;
12818 else
12819 counter_mode = DImode;
12820
12821 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12822 if (destreg != XEXP (dst, 0))
12823 dst = replace_equiv_address_nv (dst, destreg);
12824
12825
12826 /* When optimizing for size emit simple rep ; movsb instruction for
12827 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12828 sequence is 7 bytes long, so if optimizing for size and count is
12829 small enough that some stosl, stosw and stosb instructions without
12830 rep are shorter, fall back into the next if. */
12831
12832 if ((!optimize || optimize_size)
12833 && (count == 0
12834 || ((count & 0x03)
12835 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12836 {
12837 emit_insn (gen_cld ());
12838
12839 countreg = ix86_zero_extend_to_Pmode (count_exp);
12840 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12841 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12842 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12843 }
12844 else if (count != 0
12845 && (align >= 8
12846 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12847 || optimize_size || count < (unsigned int) 64))
12848 {
12849 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12850 unsigned HOST_WIDE_INT offset = 0;
12851
12852 emit_insn (gen_cld ());
12853
12854 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12855 if (count & ~(size - 1))
12856 {
12857 unsigned HOST_WIDE_INT repcount;
12858 unsigned int max_nonrep;
12859
12860 repcount = count >> (size == 4 ? 2 : 3);
12861 if (!TARGET_64BIT)
12862 repcount &= 0x3fffffff;
12863
12864 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12865 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12866 bytes. In both cases the latter seems to be faster for small
12867 values of N. */
12868 max_nonrep = size == 4 ? 7 : 4;
12869 if (!optimize_size)
12870 switch (ix86_tune)
12871 {
12872 case PROCESSOR_PENTIUM4:
12873 case PROCESSOR_NOCONA:
12874 max_nonrep = 3;
12875 break;
12876 default:
12877 break;
12878 }
12879
12880 if (repcount <= max_nonrep)
12881 while (repcount-- > 0)
12882 {
12883 rtx mem = adjust_automodify_address_nv (dst,
12884 GET_MODE (zeroreg),
12885 destreg, offset);
12886 emit_insn (gen_strset (destreg, mem, zeroreg));
12887 offset += size;
12888 }
12889 else
12890 {
12891 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12892 countreg = ix86_zero_extend_to_Pmode (countreg);
12893 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12894 GEN_INT (size == 4 ? 2 : 3));
12895 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12896 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12897 destexp));
12898 offset = count & ~(size - 1);
12899 }
12900 }
12901 if (size == 8 && (count & 0x04))
12902 {
12903 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12904 offset);
12905 emit_insn (gen_strset (destreg, mem,
12906 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12907 offset += 4;
12908 }
12909 if (count & 0x02)
12910 {
12911 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12912 offset);
12913 emit_insn (gen_strset (destreg, mem,
12914 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12915 offset += 2;
12916 }
12917 if (count & 0x01)
12918 {
12919 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12920 offset);
12921 emit_insn (gen_strset (destreg, mem,
12922 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12923 }
12924 }
12925 else
12926 {
12927 rtx countreg2;
12928 rtx label = NULL;
12929 /* Compute desired alignment of the string operation. */
12930 int desired_alignment = (TARGET_PENTIUMPRO
12931 && (count == 0 || count >= (unsigned int) 260)
12932 ? 8 : UNITS_PER_WORD);
12933
12934 /* In case we don't know anything about the alignment, default to
12935 library version, since it is usually equally fast and result in
12936 shorter code.
12937
12938 Also emit call when we know that the count is large and call overhead
12939 will not be important. */
12940 if (!TARGET_INLINE_ALL_STRINGOPS
12941 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12942 return 0;
12943
12944 if (TARGET_SINGLE_STRINGOP)
12945 emit_insn (gen_cld ());
12946
12947 countreg2 = gen_reg_rtx (Pmode);
12948 countreg = copy_to_mode_reg (counter_mode, count_exp);
12949 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12950 /* Get rid of MEM_OFFSET, it won't be accurate. */
12951 dst = change_address (dst, BLKmode, destreg);
12952
12953 if (count == 0 && align < desired_alignment)
12954 {
12955 label = gen_label_rtx ();
12956 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12957 LEU, 0, counter_mode, 1, label);
12958 }
12959 if (align <= 1)
12960 {
12961 rtx label = ix86_expand_aligntest (destreg, 1);
12962 emit_insn (gen_strset (destreg, dst,
12963 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12964 ix86_adjust_counter (countreg, 1);
12965 emit_label (label);
12966 LABEL_NUSES (label) = 1;
12967 }
12968 if (align <= 2)
12969 {
12970 rtx label = ix86_expand_aligntest (destreg, 2);
12971 emit_insn (gen_strset (destreg, dst,
12972 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12973 ix86_adjust_counter (countreg, 2);
12974 emit_label (label);
12975 LABEL_NUSES (label) = 1;
12976 }
12977 if (align <= 4 && desired_alignment > 4)
12978 {
12979 rtx label = ix86_expand_aligntest (destreg, 4);
12980 emit_insn (gen_strset (destreg, dst,
12981 (TARGET_64BIT
12982 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
12983 : zeroreg)));
12984 ix86_adjust_counter (countreg, 4);
12985 emit_label (label);
12986 LABEL_NUSES (label) = 1;
12987 }
12988
12989 if (label && desired_alignment > 4 && !TARGET_64BIT)
12990 {
12991 emit_label (label);
12992 LABEL_NUSES (label) = 1;
12993 label = NULL_RTX;
12994 }
12995
12996 if (!TARGET_SINGLE_STRINGOP)
12997 emit_insn (gen_cld ());
12998 if (TARGET_64BIT)
12999 {
13000 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13001 GEN_INT (3)));
13002 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13003 }
13004 else
13005 {
13006 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13007 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13008 }
13009 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13010 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13011
13012 if (label)
13013 {
13014 emit_label (label);
13015 LABEL_NUSES (label) = 1;
13016 }
13017
13018 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13019 emit_insn (gen_strset (destreg, dst,
13020 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13021 if (TARGET_64BIT && (align <= 4 || count == 0))
13022 {
13023 rtx label = ix86_expand_aligntest (countreg, 4);
13024 emit_insn (gen_strset (destreg, dst,
13025 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13026 emit_label (label);
13027 LABEL_NUSES (label) = 1;
13028 }
13029 if (align > 2 && count != 0 && (count & 2))
13030 emit_insn (gen_strset (destreg, dst,
13031 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13032 if (align <= 2 || count == 0)
13033 {
13034 rtx label = ix86_expand_aligntest (countreg, 2);
13035 emit_insn (gen_strset (destreg, dst,
13036 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13037 emit_label (label);
13038 LABEL_NUSES (label) = 1;
13039 }
13040 if (align > 1 && count != 0 && (count & 1))
13041 emit_insn (gen_strset (destreg, dst,
13042 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13043 if (align <= 1 || count == 0)
13044 {
13045 rtx label = ix86_expand_aligntest (countreg, 1);
13046 emit_insn (gen_strset (destreg, dst,
13047 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13048 emit_label (label);
13049 LABEL_NUSES (label) = 1;
13050 }
13051 }
13052 return 1;
13053 }
13054
13055 /* Expand strlen. */
13056 int
13057 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13058 {
13059 rtx addr, scratch1, scratch2, scratch3, scratch4;
13060
13061 /* The generic case of strlen expander is long. Avoid it's
13062 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13063
13064 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13065 && !TARGET_INLINE_ALL_STRINGOPS
13066 && !optimize_size
13067 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13068 return 0;
13069
13070 addr = force_reg (Pmode, XEXP (src, 0));
13071 scratch1 = gen_reg_rtx (Pmode);
13072
13073 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13074 && !optimize_size)
13075 {
13076 /* Well it seems that some optimizer does not combine a call like
13077 foo(strlen(bar), strlen(bar));
13078 when the move and the subtraction is done here. It does calculate
13079 the length just once when these instructions are done inside of
13080 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13081 often used and I use one fewer register for the lifetime of
13082 output_strlen_unroll() this is better. */
13083
13084 emit_move_insn (out, addr);
13085
13086 ix86_expand_strlensi_unroll_1 (out, src, align);
13087
13088 /* strlensi_unroll_1 returns the address of the zero at the end of
13089 the string, like memchr(), so compute the length by subtracting
13090 the start address. */
13091 if (TARGET_64BIT)
13092 emit_insn (gen_subdi3 (out, out, addr));
13093 else
13094 emit_insn (gen_subsi3 (out, out, addr));
13095 }
13096 else
13097 {
13098 rtx unspec;
13099 scratch2 = gen_reg_rtx (Pmode);
13100 scratch3 = gen_reg_rtx (Pmode);
13101 scratch4 = force_reg (Pmode, constm1_rtx);
13102
13103 emit_move_insn (scratch3, addr);
13104 eoschar = force_reg (QImode, eoschar);
13105
13106 emit_insn (gen_cld ());
13107 src = replace_equiv_address_nv (src, scratch3);
13108
13109 /* If .md starts supporting :P, this can be done in .md. */
13110 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13111 scratch4), UNSPEC_SCAS);
13112 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13113 if (TARGET_64BIT)
13114 {
13115 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13116 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13117 }
13118 else
13119 {
13120 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13121 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13122 }
13123 }
13124 return 1;
13125 }
13126
13127 /* Expand the appropriate insns for doing strlen if not just doing
13128 repnz; scasb
13129
13130 out = result, initialized with the start address
13131 align_rtx = alignment of the address.
13132 scratch = scratch register, initialized with the startaddress when
13133 not aligned, otherwise undefined
13134
13135 This is just the body. It needs the initializations mentioned above and
13136 some address computing at the end. These things are done in i386.md. */
13137
13138 static void
13139 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13140 {
13141 int align;
13142 rtx tmp;
13143 rtx align_2_label = NULL_RTX;
13144 rtx align_3_label = NULL_RTX;
13145 rtx align_4_label = gen_label_rtx ();
13146 rtx end_0_label = gen_label_rtx ();
13147 rtx mem;
13148 rtx tmpreg = gen_reg_rtx (SImode);
13149 rtx scratch = gen_reg_rtx (SImode);
13150 rtx cmp;
13151
13152 align = 0;
13153 if (GET_CODE (align_rtx) == CONST_INT)
13154 align = INTVAL (align_rtx);
13155
13156 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13157
13158 /* Is there a known alignment and is it less than 4? */
13159 if (align < 4)
13160 {
13161 rtx scratch1 = gen_reg_rtx (Pmode);
13162 emit_move_insn (scratch1, out);
13163 /* Is there a known alignment and is it not 2? */
13164 if (align != 2)
13165 {
13166 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13167 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13168
13169 /* Leave just the 3 lower bits. */
13170 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13171 NULL_RTX, 0, OPTAB_WIDEN);
13172
13173 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13174 Pmode, 1, align_4_label);
13175 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13176 Pmode, 1, align_2_label);
13177 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13178 Pmode, 1, align_3_label);
13179 }
13180 else
13181 {
13182 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13183 check if is aligned to 4 - byte. */
13184
13185 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13186 NULL_RTX, 0, OPTAB_WIDEN);
13187
13188 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13189 Pmode, 1, align_4_label);
13190 }
13191
13192 mem = change_address (src, QImode, out);
13193
13194 /* Now compare the bytes. */
13195
13196 /* Compare the first n unaligned byte on a byte per byte basis. */
13197 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13198 QImode, 1, end_0_label);
13199
13200 /* Increment the address. */
13201 if (TARGET_64BIT)
13202 emit_insn (gen_adddi3 (out, out, const1_rtx));
13203 else
13204 emit_insn (gen_addsi3 (out, out, const1_rtx));
13205
13206 /* Not needed with an alignment of 2 */
13207 if (align != 2)
13208 {
13209 emit_label (align_2_label);
13210
13211 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13212 end_0_label);
13213
13214 if (TARGET_64BIT)
13215 emit_insn (gen_adddi3 (out, out, const1_rtx));
13216 else
13217 emit_insn (gen_addsi3 (out, out, const1_rtx));
13218
13219 emit_label (align_3_label);
13220 }
13221
13222 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13223 end_0_label);
13224
13225 if (TARGET_64BIT)
13226 emit_insn (gen_adddi3 (out, out, const1_rtx));
13227 else
13228 emit_insn (gen_addsi3 (out, out, const1_rtx));
13229 }
13230
13231 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13232 align this loop. It gives only huge programs, but does not help to
13233 speed up. */
13234 emit_label (align_4_label);
13235
13236 mem = change_address (src, SImode, out);
13237 emit_move_insn (scratch, mem);
13238 if (TARGET_64BIT)
13239 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13240 else
13241 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13242
13243 /* This formula yields a nonzero result iff one of the bytes is zero.
13244 This saves three branches inside loop and many cycles. */
13245
13246 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13247 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13248 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13249 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13250 gen_int_mode (0x80808080, SImode)));
13251 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13252 align_4_label);
13253
13254 if (TARGET_CMOVE)
13255 {
13256 rtx reg = gen_reg_rtx (SImode);
13257 rtx reg2 = gen_reg_rtx (Pmode);
13258 emit_move_insn (reg, tmpreg);
13259 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13260
13261 /* If zero is not in the first two bytes, move two bytes forward. */
13262 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13263 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13264 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13265 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13266 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13267 reg,
13268 tmpreg)));
13269 /* Emit lea manually to avoid clobbering of flags. */
13270 emit_insn (gen_rtx_SET (SImode, reg2,
13271 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13272
13273 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13274 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13275 emit_insn (gen_rtx_SET (VOIDmode, out,
13276 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13277 reg2,
13278 out)));
13279
13280 }
13281 else
13282 {
13283 rtx end_2_label = gen_label_rtx ();
13284 /* Is zero in the first two bytes? */
13285
13286 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13287 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13288 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13289 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13290 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13291 pc_rtx);
13292 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13293 JUMP_LABEL (tmp) = end_2_label;
13294
13295 /* Not in the first two. Move two bytes forward. */
13296 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13297 if (TARGET_64BIT)
13298 emit_insn (gen_adddi3 (out, out, const2_rtx));
13299 else
13300 emit_insn (gen_addsi3 (out, out, const2_rtx));
13301
13302 emit_label (end_2_label);
13303
13304 }
13305
13306 /* Avoid branch in fixing the byte. */
13307 tmpreg = gen_lowpart (QImode, tmpreg);
13308 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13309 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13310 if (TARGET_64BIT)
13311 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13312 else
13313 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13314
13315 emit_label (end_0_label);
13316 }
13317
13318 void
13319 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13320 rtx callarg2 ATTRIBUTE_UNUSED,
13321 rtx pop, int sibcall)
13322 {
13323 rtx use = NULL, call;
13324
13325 if (pop == const0_rtx)
13326 pop = NULL;
13327 gcc_assert (!TARGET_64BIT || !pop);
13328
13329 #if TARGET_MACHO
13330 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13331 fnaddr = machopic_indirect_call_target (fnaddr);
13332 #else
13333 /* Static functions and indirect calls don't need the pic register. */
13334 if (! TARGET_64BIT && flag_pic
13335 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13336 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13337 use_reg (&use, pic_offset_table_rtx);
13338
13339 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13340 {
13341 rtx al = gen_rtx_REG (QImode, 0);
13342 emit_move_insn (al, callarg2);
13343 use_reg (&use, al);
13344 }
13345 #endif /* TARGET_MACHO */
13346
13347 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13348 {
13349 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13350 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13351 }
13352 if (sibcall && TARGET_64BIT
13353 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13354 {
13355 rtx addr;
13356 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13357 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13358 emit_move_insn (fnaddr, addr);
13359 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13360 }
13361
13362 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13363 if (retval)
13364 call = gen_rtx_SET (VOIDmode, retval, call);
13365 if (pop)
13366 {
13367 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13368 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13369 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13370 }
13371
13372 call = emit_call_insn (call);
13373 if (use)
13374 CALL_INSN_FUNCTION_USAGE (call) = use;
13375 }
13376
13377 \f
13378 /* Clear stack slot assignments remembered from previous functions.
13379 This is called from INIT_EXPANDERS once before RTL is emitted for each
13380 function. */
13381
13382 static struct machine_function *
13383 ix86_init_machine_status (void)
13384 {
13385 struct machine_function *f;
13386
13387 f = ggc_alloc_cleared (sizeof (struct machine_function));
13388 f->use_fast_prologue_epilogue_nregs = -1;
13389 f->tls_descriptor_call_expanded_p = 0;
13390
13391 return f;
13392 }
13393
13394 /* Return a MEM corresponding to a stack slot with mode MODE.
13395 Allocate a new slot if necessary.
13396
13397 The RTL for a function can have several slots available: N is
13398 which slot to use. */
13399
13400 rtx
13401 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13402 {
13403 struct stack_local_entry *s;
13404
13405 gcc_assert (n < MAX_386_STACK_LOCALS);
13406
13407 for (s = ix86_stack_locals; s; s = s->next)
13408 if (s->mode == mode && s->n == n)
13409 return s->rtl;
13410
13411 s = (struct stack_local_entry *)
13412 ggc_alloc (sizeof (struct stack_local_entry));
13413 s->n = n;
13414 s->mode = mode;
13415 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13416
13417 s->next = ix86_stack_locals;
13418 ix86_stack_locals = s;
13419 return s->rtl;
13420 }
13421
13422 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13423
13424 static GTY(()) rtx ix86_tls_symbol;
13425 rtx
13426 ix86_tls_get_addr (void)
13427 {
13428
13429 if (!ix86_tls_symbol)
13430 {
13431 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13432 (TARGET_ANY_GNU_TLS
13433 && !TARGET_64BIT)
13434 ? "___tls_get_addr"
13435 : "__tls_get_addr");
13436 }
13437
13438 return ix86_tls_symbol;
13439 }
13440
13441 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13442
13443 static GTY(()) rtx ix86_tls_module_base_symbol;
13444 rtx
13445 ix86_tls_module_base (void)
13446 {
13447
13448 if (!ix86_tls_module_base_symbol)
13449 {
13450 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13451 "_TLS_MODULE_BASE_");
13452 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13453 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13454 }
13455
13456 return ix86_tls_module_base_symbol;
13457 }
13458 \f
13459 /* Calculate the length of the memory address in the instruction
13460 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13461
13462 int
13463 memory_address_length (rtx addr)
13464 {
13465 struct ix86_address parts;
13466 rtx base, index, disp;
13467 int len;
13468 int ok;
13469
13470 if (GET_CODE (addr) == PRE_DEC
13471 || GET_CODE (addr) == POST_INC
13472 || GET_CODE (addr) == PRE_MODIFY
13473 || GET_CODE (addr) == POST_MODIFY)
13474 return 0;
13475
13476 ok = ix86_decompose_address (addr, &parts);
13477 gcc_assert (ok);
13478
13479 if (parts.base && GET_CODE (parts.base) == SUBREG)
13480 parts.base = SUBREG_REG (parts.base);
13481 if (parts.index && GET_CODE (parts.index) == SUBREG)
13482 parts.index = SUBREG_REG (parts.index);
13483
13484 base = parts.base;
13485 index = parts.index;
13486 disp = parts.disp;
13487 len = 0;
13488
13489 /* Rule of thumb:
13490 - esp as the base always wants an index,
13491 - ebp as the base always wants a displacement. */
13492
13493 /* Register Indirect. */
13494 if (base && !index && !disp)
13495 {
13496 /* esp (for its index) and ebp (for its displacement) need
13497 the two-byte modrm form. */
13498 if (addr == stack_pointer_rtx
13499 || addr == arg_pointer_rtx
13500 || addr == frame_pointer_rtx
13501 || addr == hard_frame_pointer_rtx)
13502 len = 1;
13503 }
13504
13505 /* Direct Addressing. */
13506 else if (disp && !base && !index)
13507 len = 4;
13508
13509 else
13510 {
13511 /* Find the length of the displacement constant. */
13512 if (disp)
13513 {
13514 if (base && satisfies_constraint_K (disp))
13515 len = 1;
13516 else
13517 len = 4;
13518 }
13519 /* ebp always wants a displacement. */
13520 else if (base == hard_frame_pointer_rtx)
13521 len = 1;
13522
13523 /* An index requires the two-byte modrm form.... */
13524 if (index
13525 /* ...like esp, which always wants an index. */
13526 || base == stack_pointer_rtx
13527 || base == arg_pointer_rtx
13528 || base == frame_pointer_rtx)
13529 len += 1;
13530 }
13531
13532 return len;
13533 }
13534
13535 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13536 is set, expect that insn have 8bit immediate alternative. */
13537 int
13538 ix86_attr_length_immediate_default (rtx insn, int shortform)
13539 {
13540 int len = 0;
13541 int i;
13542 extract_insn_cached (insn);
13543 for (i = recog_data.n_operands - 1; i >= 0; --i)
13544 if (CONSTANT_P (recog_data.operand[i]))
13545 {
13546 gcc_assert (!len);
13547 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13548 len = 1;
13549 else
13550 {
13551 switch (get_attr_mode (insn))
13552 {
13553 case MODE_QI:
13554 len+=1;
13555 break;
13556 case MODE_HI:
13557 len+=2;
13558 break;
13559 case MODE_SI:
13560 len+=4;
13561 break;
13562 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13563 case MODE_DI:
13564 len+=4;
13565 break;
13566 default:
13567 fatal_insn ("unknown insn mode", insn);
13568 }
13569 }
13570 }
13571 return len;
13572 }
13573 /* Compute default value for "length_address" attribute. */
13574 int
13575 ix86_attr_length_address_default (rtx insn)
13576 {
13577 int i;
13578
13579 if (get_attr_type (insn) == TYPE_LEA)
13580 {
13581 rtx set = PATTERN (insn);
13582
13583 if (GET_CODE (set) == PARALLEL)
13584 set = XVECEXP (set, 0, 0);
13585
13586 gcc_assert (GET_CODE (set) == SET);
13587
13588 return memory_address_length (SET_SRC (set));
13589 }
13590
13591 extract_insn_cached (insn);
13592 for (i = recog_data.n_operands - 1; i >= 0; --i)
13593 if (GET_CODE (recog_data.operand[i]) == MEM)
13594 {
13595 return memory_address_length (XEXP (recog_data.operand[i], 0));
13596 break;
13597 }
13598 return 0;
13599 }
13600 \f
13601 /* Return the maximum number of instructions a cpu can issue. */
13602
13603 static int
13604 ix86_issue_rate (void)
13605 {
13606 switch (ix86_tune)
13607 {
13608 case PROCESSOR_PENTIUM:
13609 case PROCESSOR_K6:
13610 return 2;
13611
13612 case PROCESSOR_PENTIUMPRO:
13613 case PROCESSOR_PENTIUM4:
13614 case PROCESSOR_ATHLON:
13615 case PROCESSOR_K8:
13616 case PROCESSOR_NOCONA:
13617 case PROCESSOR_GENERIC32:
13618 case PROCESSOR_GENERIC64:
13619 return 3;
13620
13621 default:
13622 return 1;
13623 }
13624 }
13625
13626 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13627 by DEP_INSN and nothing set by DEP_INSN. */
13628
13629 static int
13630 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13631 {
13632 rtx set, set2;
13633
13634 /* Simplify the test for uninteresting insns. */
13635 if (insn_type != TYPE_SETCC
13636 && insn_type != TYPE_ICMOV
13637 && insn_type != TYPE_FCMOV
13638 && insn_type != TYPE_IBR)
13639 return 0;
13640
13641 if ((set = single_set (dep_insn)) != 0)
13642 {
13643 set = SET_DEST (set);
13644 set2 = NULL_RTX;
13645 }
13646 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13647 && XVECLEN (PATTERN (dep_insn), 0) == 2
13648 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13649 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13650 {
13651 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13652 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13653 }
13654 else
13655 return 0;
13656
13657 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13658 return 0;
13659
13660 /* This test is true if the dependent insn reads the flags but
13661 not any other potentially set register. */
13662 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13663 return 0;
13664
13665 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13666 return 0;
13667
13668 return 1;
13669 }
13670
13671 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13672 address with operands set by DEP_INSN. */
13673
13674 static int
13675 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13676 {
13677 rtx addr;
13678
13679 if (insn_type == TYPE_LEA
13680 && TARGET_PENTIUM)
13681 {
13682 addr = PATTERN (insn);
13683
13684 if (GET_CODE (addr) == PARALLEL)
13685 addr = XVECEXP (addr, 0, 0);
13686
13687 gcc_assert (GET_CODE (addr) == SET);
13688
13689 addr = SET_SRC (addr);
13690 }
13691 else
13692 {
13693 int i;
13694 extract_insn_cached (insn);
13695 for (i = recog_data.n_operands - 1; i >= 0; --i)
13696 if (GET_CODE (recog_data.operand[i]) == MEM)
13697 {
13698 addr = XEXP (recog_data.operand[i], 0);
13699 goto found;
13700 }
13701 return 0;
13702 found:;
13703 }
13704
13705 return modified_in_p (addr, dep_insn);
13706 }
13707
13708 static int
13709 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13710 {
13711 enum attr_type insn_type, dep_insn_type;
13712 enum attr_memory memory;
13713 rtx set, set2;
13714 int dep_insn_code_number;
13715
13716 /* Anti and output dependencies have zero cost on all CPUs. */
13717 if (REG_NOTE_KIND (link) != 0)
13718 return 0;
13719
13720 dep_insn_code_number = recog_memoized (dep_insn);
13721
13722 /* If we can't recognize the insns, we can't really do anything. */
13723 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13724 return cost;
13725
13726 insn_type = get_attr_type (insn);
13727 dep_insn_type = get_attr_type (dep_insn);
13728
13729 switch (ix86_tune)
13730 {
13731 case PROCESSOR_PENTIUM:
13732 /* Address Generation Interlock adds a cycle of latency. */
13733 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13734 cost += 1;
13735
13736 /* ??? Compares pair with jump/setcc. */
13737 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13738 cost = 0;
13739
13740 /* Floating point stores require value to be ready one cycle earlier. */
13741 if (insn_type == TYPE_FMOV
13742 && get_attr_memory (insn) == MEMORY_STORE
13743 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13744 cost += 1;
13745 break;
13746
13747 case PROCESSOR_PENTIUMPRO:
13748 memory = get_attr_memory (insn);
13749
13750 /* INT->FP conversion is expensive. */
13751 if (get_attr_fp_int_src (dep_insn))
13752 cost += 5;
13753
13754 /* There is one cycle extra latency between an FP op and a store. */
13755 if (insn_type == TYPE_FMOV
13756 && (set = single_set (dep_insn)) != NULL_RTX
13757 && (set2 = single_set (insn)) != NULL_RTX
13758 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13759 && GET_CODE (SET_DEST (set2)) == MEM)
13760 cost += 1;
13761
13762 /* Show ability of reorder buffer to hide latency of load by executing
13763 in parallel with previous instruction in case
13764 previous instruction is not needed to compute the address. */
13765 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13766 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13767 {
13768 /* Claim moves to take one cycle, as core can issue one load
13769 at time and the next load can start cycle later. */
13770 if (dep_insn_type == TYPE_IMOV
13771 || dep_insn_type == TYPE_FMOV)
13772 cost = 1;
13773 else if (cost > 1)
13774 cost--;
13775 }
13776 break;
13777
13778 case PROCESSOR_K6:
13779 memory = get_attr_memory (insn);
13780
13781 /* The esp dependency is resolved before the instruction is really
13782 finished. */
13783 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13784 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13785 return 1;
13786
13787 /* INT->FP conversion is expensive. */
13788 if (get_attr_fp_int_src (dep_insn))
13789 cost += 5;
13790
13791 /* Show ability of reorder buffer to hide latency of load by executing
13792 in parallel with previous instruction in case
13793 previous instruction is not needed to compute the address. */
13794 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13795 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13796 {
13797 /* Claim moves to take one cycle, as core can issue one load
13798 at time and the next load can start cycle later. */
13799 if (dep_insn_type == TYPE_IMOV
13800 || dep_insn_type == TYPE_FMOV)
13801 cost = 1;
13802 else if (cost > 2)
13803 cost -= 2;
13804 else
13805 cost = 1;
13806 }
13807 break;
13808
13809 case PROCESSOR_ATHLON:
13810 case PROCESSOR_K8:
13811 case PROCESSOR_GENERIC32:
13812 case PROCESSOR_GENERIC64:
13813 memory = get_attr_memory (insn);
13814
13815 /* Show ability of reorder buffer to hide latency of load by executing
13816 in parallel with previous instruction in case
13817 previous instruction is not needed to compute the address. */
13818 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13819 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13820 {
13821 enum attr_unit unit = get_attr_unit (insn);
13822 int loadcost = 3;
13823
13824 /* Because of the difference between the length of integer and
13825 floating unit pipeline preparation stages, the memory operands
13826 for floating point are cheaper.
13827
13828 ??? For Athlon it the difference is most probably 2. */
13829 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13830 loadcost = 3;
13831 else
13832 loadcost = TARGET_ATHLON ? 2 : 0;
13833
13834 if (cost >= loadcost)
13835 cost -= loadcost;
13836 else
13837 cost = 0;
13838 }
13839
13840 default:
13841 break;
13842 }
13843
13844 return cost;
13845 }
13846
13847 /* How many alternative schedules to try. This should be as wide as the
13848 scheduling freedom in the DFA, but no wider. Making this value too
13849 large results extra work for the scheduler. */
13850
13851 static int
13852 ia32_multipass_dfa_lookahead (void)
13853 {
13854 if (ix86_tune == PROCESSOR_PENTIUM)
13855 return 2;
13856
13857 if (ix86_tune == PROCESSOR_PENTIUMPRO
13858 || ix86_tune == PROCESSOR_K6)
13859 return 1;
13860
13861 else
13862 return 0;
13863 }
13864
13865 \f
13866 /* Compute the alignment given to a constant that is being placed in memory.
13867 EXP is the constant and ALIGN is the alignment that the object would
13868 ordinarily have.
13869 The value of this function is used instead of that alignment to align
13870 the object. */
13871
13872 int
13873 ix86_constant_alignment (tree exp, int align)
13874 {
13875 if (TREE_CODE (exp) == REAL_CST)
13876 {
13877 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13878 return 64;
13879 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13880 return 128;
13881 }
13882 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13883 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13884 return BITS_PER_WORD;
13885
13886 return align;
13887 }
13888
13889 /* Compute the alignment for a static variable.
13890 TYPE is the data type, and ALIGN is the alignment that
13891 the object would ordinarily have. The value of this function is used
13892 instead of that alignment to align the object. */
13893
13894 int
13895 ix86_data_alignment (tree type, int align)
13896 {
13897 int max_align = optimize_size ? BITS_PER_WORD : 256;
13898
13899 if (AGGREGATE_TYPE_P (type)
13900 && TYPE_SIZE (type)
13901 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13902 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13903 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13904 && align < max_align)
13905 align = max_align;
13906
13907 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13908 to 16byte boundary. */
13909 if (TARGET_64BIT)
13910 {
13911 if (AGGREGATE_TYPE_P (type)
13912 && TYPE_SIZE (type)
13913 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13914 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13915 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13916 return 128;
13917 }
13918
13919 if (TREE_CODE (type) == ARRAY_TYPE)
13920 {
13921 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13922 return 64;
13923 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13924 return 128;
13925 }
13926 else if (TREE_CODE (type) == COMPLEX_TYPE)
13927 {
13928
13929 if (TYPE_MODE (type) == DCmode && align < 64)
13930 return 64;
13931 if (TYPE_MODE (type) == XCmode && align < 128)
13932 return 128;
13933 }
13934 else if ((TREE_CODE (type) == RECORD_TYPE
13935 || TREE_CODE (type) == UNION_TYPE
13936 || TREE_CODE (type) == QUAL_UNION_TYPE)
13937 && TYPE_FIELDS (type))
13938 {
13939 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13940 return 64;
13941 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13942 return 128;
13943 }
13944 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13945 || TREE_CODE (type) == INTEGER_TYPE)
13946 {
13947 if (TYPE_MODE (type) == DFmode && align < 64)
13948 return 64;
13949 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13950 return 128;
13951 }
13952
13953 return align;
13954 }
13955
13956 /* Compute the alignment for a local variable.
13957 TYPE is the data type, and ALIGN is the alignment that
13958 the object would ordinarily have. The value of this macro is used
13959 instead of that alignment to align the object. */
13960
13961 int
13962 ix86_local_alignment (tree type, int align)
13963 {
13964 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13965 to 16byte boundary. */
13966 if (TARGET_64BIT)
13967 {
13968 if (AGGREGATE_TYPE_P (type)
13969 && TYPE_SIZE (type)
13970 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13971 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
13972 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13973 return 128;
13974 }
13975 if (TREE_CODE (type) == ARRAY_TYPE)
13976 {
13977 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13978 return 64;
13979 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13980 return 128;
13981 }
13982 else if (TREE_CODE (type) == COMPLEX_TYPE)
13983 {
13984 if (TYPE_MODE (type) == DCmode && align < 64)
13985 return 64;
13986 if (TYPE_MODE (type) == XCmode && align < 128)
13987 return 128;
13988 }
13989 else if ((TREE_CODE (type) == RECORD_TYPE
13990 || TREE_CODE (type) == UNION_TYPE
13991 || TREE_CODE (type) == QUAL_UNION_TYPE)
13992 && TYPE_FIELDS (type))
13993 {
13994 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13995 return 64;
13996 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13997 return 128;
13998 }
13999 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14000 || TREE_CODE (type) == INTEGER_TYPE)
14001 {
14002
14003 if (TYPE_MODE (type) == DFmode && align < 64)
14004 return 64;
14005 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14006 return 128;
14007 }
14008 return align;
14009 }
14010 \f
14011 /* Emit RTL insns to initialize the variable parts of a trampoline.
14012 FNADDR is an RTX for the address of the function's pure code.
14013 CXT is an RTX for the static chain value for the function. */
14014 void
14015 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14016 {
14017 if (!TARGET_64BIT)
14018 {
14019 /* Compute offset from the end of the jmp to the target function. */
14020 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14021 plus_constant (tramp, 10),
14022 NULL_RTX, 1, OPTAB_DIRECT);
14023 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14024 gen_int_mode (0xb9, QImode));
14025 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14026 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14027 gen_int_mode (0xe9, QImode));
14028 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14029 }
14030 else
14031 {
14032 int offset = 0;
14033 /* Try to load address using shorter movl instead of movabs.
14034 We may want to support movq for kernel mode, but kernel does not use
14035 trampolines at the moment. */
14036 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14037 {
14038 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14039 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14040 gen_int_mode (0xbb41, HImode));
14041 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14042 gen_lowpart (SImode, fnaddr));
14043 offset += 6;
14044 }
14045 else
14046 {
14047 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14048 gen_int_mode (0xbb49, HImode));
14049 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14050 fnaddr);
14051 offset += 10;
14052 }
14053 /* Load static chain using movabs to r10. */
14054 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14055 gen_int_mode (0xba49, HImode));
14056 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14057 cxt);
14058 offset += 10;
14059 /* Jump to the r11 */
14060 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14061 gen_int_mode (0xff49, HImode));
14062 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14063 gen_int_mode (0xe3, QImode));
14064 offset += 3;
14065 gcc_assert (offset <= TRAMPOLINE_SIZE);
14066 }
14067
14068 #ifdef ENABLE_EXECUTE_STACK
14069 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14070 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14071 #endif
14072 }
14073 \f
14074 /* Codes for all the SSE/MMX builtins. */
14075 enum ix86_builtins
14076 {
14077 IX86_BUILTIN_ADDPS,
14078 IX86_BUILTIN_ADDSS,
14079 IX86_BUILTIN_DIVPS,
14080 IX86_BUILTIN_DIVSS,
14081 IX86_BUILTIN_MULPS,
14082 IX86_BUILTIN_MULSS,
14083 IX86_BUILTIN_SUBPS,
14084 IX86_BUILTIN_SUBSS,
14085
14086 IX86_BUILTIN_CMPEQPS,
14087 IX86_BUILTIN_CMPLTPS,
14088 IX86_BUILTIN_CMPLEPS,
14089 IX86_BUILTIN_CMPGTPS,
14090 IX86_BUILTIN_CMPGEPS,
14091 IX86_BUILTIN_CMPNEQPS,
14092 IX86_BUILTIN_CMPNLTPS,
14093 IX86_BUILTIN_CMPNLEPS,
14094 IX86_BUILTIN_CMPNGTPS,
14095 IX86_BUILTIN_CMPNGEPS,
14096 IX86_BUILTIN_CMPORDPS,
14097 IX86_BUILTIN_CMPUNORDPS,
14098 IX86_BUILTIN_CMPEQSS,
14099 IX86_BUILTIN_CMPLTSS,
14100 IX86_BUILTIN_CMPLESS,
14101 IX86_BUILTIN_CMPNEQSS,
14102 IX86_BUILTIN_CMPNLTSS,
14103 IX86_BUILTIN_CMPNLESS,
14104 IX86_BUILTIN_CMPNGTSS,
14105 IX86_BUILTIN_CMPNGESS,
14106 IX86_BUILTIN_CMPORDSS,
14107 IX86_BUILTIN_CMPUNORDSS,
14108
14109 IX86_BUILTIN_COMIEQSS,
14110 IX86_BUILTIN_COMILTSS,
14111 IX86_BUILTIN_COMILESS,
14112 IX86_BUILTIN_COMIGTSS,
14113 IX86_BUILTIN_COMIGESS,
14114 IX86_BUILTIN_COMINEQSS,
14115 IX86_BUILTIN_UCOMIEQSS,
14116 IX86_BUILTIN_UCOMILTSS,
14117 IX86_BUILTIN_UCOMILESS,
14118 IX86_BUILTIN_UCOMIGTSS,
14119 IX86_BUILTIN_UCOMIGESS,
14120 IX86_BUILTIN_UCOMINEQSS,
14121
14122 IX86_BUILTIN_CVTPI2PS,
14123 IX86_BUILTIN_CVTPS2PI,
14124 IX86_BUILTIN_CVTSI2SS,
14125 IX86_BUILTIN_CVTSI642SS,
14126 IX86_BUILTIN_CVTSS2SI,
14127 IX86_BUILTIN_CVTSS2SI64,
14128 IX86_BUILTIN_CVTTPS2PI,
14129 IX86_BUILTIN_CVTTSS2SI,
14130 IX86_BUILTIN_CVTTSS2SI64,
14131
14132 IX86_BUILTIN_MAXPS,
14133 IX86_BUILTIN_MAXSS,
14134 IX86_BUILTIN_MINPS,
14135 IX86_BUILTIN_MINSS,
14136
14137 IX86_BUILTIN_LOADUPS,
14138 IX86_BUILTIN_STOREUPS,
14139 IX86_BUILTIN_MOVSS,
14140
14141 IX86_BUILTIN_MOVHLPS,
14142 IX86_BUILTIN_MOVLHPS,
14143 IX86_BUILTIN_LOADHPS,
14144 IX86_BUILTIN_LOADLPS,
14145 IX86_BUILTIN_STOREHPS,
14146 IX86_BUILTIN_STORELPS,
14147
14148 IX86_BUILTIN_MASKMOVQ,
14149 IX86_BUILTIN_MOVMSKPS,
14150 IX86_BUILTIN_PMOVMSKB,
14151
14152 IX86_BUILTIN_MOVNTPS,
14153 IX86_BUILTIN_MOVNTQ,
14154
14155 IX86_BUILTIN_LOADDQU,
14156 IX86_BUILTIN_STOREDQU,
14157
14158 IX86_BUILTIN_PACKSSWB,
14159 IX86_BUILTIN_PACKSSDW,
14160 IX86_BUILTIN_PACKUSWB,
14161
14162 IX86_BUILTIN_PADDB,
14163 IX86_BUILTIN_PADDW,
14164 IX86_BUILTIN_PADDD,
14165 IX86_BUILTIN_PADDQ,
14166 IX86_BUILTIN_PADDSB,
14167 IX86_BUILTIN_PADDSW,
14168 IX86_BUILTIN_PADDUSB,
14169 IX86_BUILTIN_PADDUSW,
14170 IX86_BUILTIN_PSUBB,
14171 IX86_BUILTIN_PSUBW,
14172 IX86_BUILTIN_PSUBD,
14173 IX86_BUILTIN_PSUBQ,
14174 IX86_BUILTIN_PSUBSB,
14175 IX86_BUILTIN_PSUBSW,
14176 IX86_BUILTIN_PSUBUSB,
14177 IX86_BUILTIN_PSUBUSW,
14178
14179 IX86_BUILTIN_PAND,
14180 IX86_BUILTIN_PANDN,
14181 IX86_BUILTIN_POR,
14182 IX86_BUILTIN_PXOR,
14183
14184 IX86_BUILTIN_PAVGB,
14185 IX86_BUILTIN_PAVGW,
14186
14187 IX86_BUILTIN_PCMPEQB,
14188 IX86_BUILTIN_PCMPEQW,
14189 IX86_BUILTIN_PCMPEQD,
14190 IX86_BUILTIN_PCMPGTB,
14191 IX86_BUILTIN_PCMPGTW,
14192 IX86_BUILTIN_PCMPGTD,
14193
14194 IX86_BUILTIN_PMADDWD,
14195
14196 IX86_BUILTIN_PMAXSW,
14197 IX86_BUILTIN_PMAXUB,
14198 IX86_BUILTIN_PMINSW,
14199 IX86_BUILTIN_PMINUB,
14200
14201 IX86_BUILTIN_PMULHUW,
14202 IX86_BUILTIN_PMULHW,
14203 IX86_BUILTIN_PMULLW,
14204
14205 IX86_BUILTIN_PSADBW,
14206 IX86_BUILTIN_PSHUFW,
14207
14208 IX86_BUILTIN_PSLLW,
14209 IX86_BUILTIN_PSLLD,
14210 IX86_BUILTIN_PSLLQ,
14211 IX86_BUILTIN_PSRAW,
14212 IX86_BUILTIN_PSRAD,
14213 IX86_BUILTIN_PSRLW,
14214 IX86_BUILTIN_PSRLD,
14215 IX86_BUILTIN_PSRLQ,
14216 IX86_BUILTIN_PSLLWI,
14217 IX86_BUILTIN_PSLLDI,
14218 IX86_BUILTIN_PSLLQI,
14219 IX86_BUILTIN_PSRAWI,
14220 IX86_BUILTIN_PSRADI,
14221 IX86_BUILTIN_PSRLWI,
14222 IX86_BUILTIN_PSRLDI,
14223 IX86_BUILTIN_PSRLQI,
14224
14225 IX86_BUILTIN_PUNPCKHBW,
14226 IX86_BUILTIN_PUNPCKHWD,
14227 IX86_BUILTIN_PUNPCKHDQ,
14228 IX86_BUILTIN_PUNPCKLBW,
14229 IX86_BUILTIN_PUNPCKLWD,
14230 IX86_BUILTIN_PUNPCKLDQ,
14231
14232 IX86_BUILTIN_SHUFPS,
14233
14234 IX86_BUILTIN_RCPPS,
14235 IX86_BUILTIN_RCPSS,
14236 IX86_BUILTIN_RSQRTPS,
14237 IX86_BUILTIN_RSQRTSS,
14238 IX86_BUILTIN_SQRTPS,
14239 IX86_BUILTIN_SQRTSS,
14240
14241 IX86_BUILTIN_UNPCKHPS,
14242 IX86_BUILTIN_UNPCKLPS,
14243
14244 IX86_BUILTIN_ANDPS,
14245 IX86_BUILTIN_ANDNPS,
14246 IX86_BUILTIN_ORPS,
14247 IX86_BUILTIN_XORPS,
14248
14249 IX86_BUILTIN_EMMS,
14250 IX86_BUILTIN_LDMXCSR,
14251 IX86_BUILTIN_STMXCSR,
14252 IX86_BUILTIN_SFENCE,
14253
14254 /* 3DNow! Original */
14255 IX86_BUILTIN_FEMMS,
14256 IX86_BUILTIN_PAVGUSB,
14257 IX86_BUILTIN_PF2ID,
14258 IX86_BUILTIN_PFACC,
14259 IX86_BUILTIN_PFADD,
14260 IX86_BUILTIN_PFCMPEQ,
14261 IX86_BUILTIN_PFCMPGE,
14262 IX86_BUILTIN_PFCMPGT,
14263 IX86_BUILTIN_PFMAX,
14264 IX86_BUILTIN_PFMIN,
14265 IX86_BUILTIN_PFMUL,
14266 IX86_BUILTIN_PFRCP,
14267 IX86_BUILTIN_PFRCPIT1,
14268 IX86_BUILTIN_PFRCPIT2,
14269 IX86_BUILTIN_PFRSQIT1,
14270 IX86_BUILTIN_PFRSQRT,
14271 IX86_BUILTIN_PFSUB,
14272 IX86_BUILTIN_PFSUBR,
14273 IX86_BUILTIN_PI2FD,
14274 IX86_BUILTIN_PMULHRW,
14275
14276 /* 3DNow! Athlon Extensions */
14277 IX86_BUILTIN_PF2IW,
14278 IX86_BUILTIN_PFNACC,
14279 IX86_BUILTIN_PFPNACC,
14280 IX86_BUILTIN_PI2FW,
14281 IX86_BUILTIN_PSWAPDSI,
14282 IX86_BUILTIN_PSWAPDSF,
14283
14284 /* SSE2 */
14285 IX86_BUILTIN_ADDPD,
14286 IX86_BUILTIN_ADDSD,
14287 IX86_BUILTIN_DIVPD,
14288 IX86_BUILTIN_DIVSD,
14289 IX86_BUILTIN_MULPD,
14290 IX86_BUILTIN_MULSD,
14291 IX86_BUILTIN_SUBPD,
14292 IX86_BUILTIN_SUBSD,
14293
14294 IX86_BUILTIN_CMPEQPD,
14295 IX86_BUILTIN_CMPLTPD,
14296 IX86_BUILTIN_CMPLEPD,
14297 IX86_BUILTIN_CMPGTPD,
14298 IX86_BUILTIN_CMPGEPD,
14299 IX86_BUILTIN_CMPNEQPD,
14300 IX86_BUILTIN_CMPNLTPD,
14301 IX86_BUILTIN_CMPNLEPD,
14302 IX86_BUILTIN_CMPNGTPD,
14303 IX86_BUILTIN_CMPNGEPD,
14304 IX86_BUILTIN_CMPORDPD,
14305 IX86_BUILTIN_CMPUNORDPD,
14306 IX86_BUILTIN_CMPNEPD,
14307 IX86_BUILTIN_CMPEQSD,
14308 IX86_BUILTIN_CMPLTSD,
14309 IX86_BUILTIN_CMPLESD,
14310 IX86_BUILTIN_CMPNEQSD,
14311 IX86_BUILTIN_CMPNLTSD,
14312 IX86_BUILTIN_CMPNLESD,
14313 IX86_BUILTIN_CMPORDSD,
14314 IX86_BUILTIN_CMPUNORDSD,
14315 IX86_BUILTIN_CMPNESD,
14316
14317 IX86_BUILTIN_COMIEQSD,
14318 IX86_BUILTIN_COMILTSD,
14319 IX86_BUILTIN_COMILESD,
14320 IX86_BUILTIN_COMIGTSD,
14321 IX86_BUILTIN_COMIGESD,
14322 IX86_BUILTIN_COMINEQSD,
14323 IX86_BUILTIN_UCOMIEQSD,
14324 IX86_BUILTIN_UCOMILTSD,
14325 IX86_BUILTIN_UCOMILESD,
14326 IX86_BUILTIN_UCOMIGTSD,
14327 IX86_BUILTIN_UCOMIGESD,
14328 IX86_BUILTIN_UCOMINEQSD,
14329
14330 IX86_BUILTIN_MAXPD,
14331 IX86_BUILTIN_MAXSD,
14332 IX86_BUILTIN_MINPD,
14333 IX86_BUILTIN_MINSD,
14334
14335 IX86_BUILTIN_ANDPD,
14336 IX86_BUILTIN_ANDNPD,
14337 IX86_BUILTIN_ORPD,
14338 IX86_BUILTIN_XORPD,
14339
14340 IX86_BUILTIN_SQRTPD,
14341 IX86_BUILTIN_SQRTSD,
14342
14343 IX86_BUILTIN_UNPCKHPD,
14344 IX86_BUILTIN_UNPCKLPD,
14345
14346 IX86_BUILTIN_SHUFPD,
14347
14348 IX86_BUILTIN_LOADUPD,
14349 IX86_BUILTIN_STOREUPD,
14350 IX86_BUILTIN_MOVSD,
14351
14352 IX86_BUILTIN_LOADHPD,
14353 IX86_BUILTIN_LOADLPD,
14354
14355 IX86_BUILTIN_CVTDQ2PD,
14356 IX86_BUILTIN_CVTDQ2PS,
14357
14358 IX86_BUILTIN_CVTPD2DQ,
14359 IX86_BUILTIN_CVTPD2PI,
14360 IX86_BUILTIN_CVTPD2PS,
14361 IX86_BUILTIN_CVTTPD2DQ,
14362 IX86_BUILTIN_CVTTPD2PI,
14363
14364 IX86_BUILTIN_CVTPI2PD,
14365 IX86_BUILTIN_CVTSI2SD,
14366 IX86_BUILTIN_CVTSI642SD,
14367
14368 IX86_BUILTIN_CVTSD2SI,
14369 IX86_BUILTIN_CVTSD2SI64,
14370 IX86_BUILTIN_CVTSD2SS,
14371 IX86_BUILTIN_CVTSS2SD,
14372 IX86_BUILTIN_CVTTSD2SI,
14373 IX86_BUILTIN_CVTTSD2SI64,
14374
14375 IX86_BUILTIN_CVTPS2DQ,
14376 IX86_BUILTIN_CVTPS2PD,
14377 IX86_BUILTIN_CVTTPS2DQ,
14378
14379 IX86_BUILTIN_MOVNTI,
14380 IX86_BUILTIN_MOVNTPD,
14381 IX86_BUILTIN_MOVNTDQ,
14382
14383 /* SSE2 MMX */
14384 IX86_BUILTIN_MASKMOVDQU,
14385 IX86_BUILTIN_MOVMSKPD,
14386 IX86_BUILTIN_PMOVMSKB128,
14387
14388 IX86_BUILTIN_PACKSSWB128,
14389 IX86_BUILTIN_PACKSSDW128,
14390 IX86_BUILTIN_PACKUSWB128,
14391
14392 IX86_BUILTIN_PADDB128,
14393 IX86_BUILTIN_PADDW128,
14394 IX86_BUILTIN_PADDD128,
14395 IX86_BUILTIN_PADDQ128,
14396 IX86_BUILTIN_PADDSB128,
14397 IX86_BUILTIN_PADDSW128,
14398 IX86_BUILTIN_PADDUSB128,
14399 IX86_BUILTIN_PADDUSW128,
14400 IX86_BUILTIN_PSUBB128,
14401 IX86_BUILTIN_PSUBW128,
14402 IX86_BUILTIN_PSUBD128,
14403 IX86_BUILTIN_PSUBQ128,
14404 IX86_BUILTIN_PSUBSB128,
14405 IX86_BUILTIN_PSUBSW128,
14406 IX86_BUILTIN_PSUBUSB128,
14407 IX86_BUILTIN_PSUBUSW128,
14408
14409 IX86_BUILTIN_PAND128,
14410 IX86_BUILTIN_PANDN128,
14411 IX86_BUILTIN_POR128,
14412 IX86_BUILTIN_PXOR128,
14413
14414 IX86_BUILTIN_PAVGB128,
14415 IX86_BUILTIN_PAVGW128,
14416
14417 IX86_BUILTIN_PCMPEQB128,
14418 IX86_BUILTIN_PCMPEQW128,
14419 IX86_BUILTIN_PCMPEQD128,
14420 IX86_BUILTIN_PCMPGTB128,
14421 IX86_BUILTIN_PCMPGTW128,
14422 IX86_BUILTIN_PCMPGTD128,
14423
14424 IX86_BUILTIN_PMADDWD128,
14425
14426 IX86_BUILTIN_PMAXSW128,
14427 IX86_BUILTIN_PMAXUB128,
14428 IX86_BUILTIN_PMINSW128,
14429 IX86_BUILTIN_PMINUB128,
14430
14431 IX86_BUILTIN_PMULUDQ,
14432 IX86_BUILTIN_PMULUDQ128,
14433 IX86_BUILTIN_PMULHUW128,
14434 IX86_BUILTIN_PMULHW128,
14435 IX86_BUILTIN_PMULLW128,
14436
14437 IX86_BUILTIN_PSADBW128,
14438 IX86_BUILTIN_PSHUFHW,
14439 IX86_BUILTIN_PSHUFLW,
14440 IX86_BUILTIN_PSHUFD,
14441
14442 IX86_BUILTIN_PSLLW128,
14443 IX86_BUILTIN_PSLLD128,
14444 IX86_BUILTIN_PSLLQ128,
14445 IX86_BUILTIN_PSRAW128,
14446 IX86_BUILTIN_PSRAD128,
14447 IX86_BUILTIN_PSRLW128,
14448 IX86_BUILTIN_PSRLD128,
14449 IX86_BUILTIN_PSRLQ128,
14450 IX86_BUILTIN_PSLLDQI128,
14451 IX86_BUILTIN_PSLLWI128,
14452 IX86_BUILTIN_PSLLDI128,
14453 IX86_BUILTIN_PSLLQI128,
14454 IX86_BUILTIN_PSRAWI128,
14455 IX86_BUILTIN_PSRADI128,
14456 IX86_BUILTIN_PSRLDQI128,
14457 IX86_BUILTIN_PSRLWI128,
14458 IX86_BUILTIN_PSRLDI128,
14459 IX86_BUILTIN_PSRLQI128,
14460
14461 IX86_BUILTIN_PUNPCKHBW128,
14462 IX86_BUILTIN_PUNPCKHWD128,
14463 IX86_BUILTIN_PUNPCKHDQ128,
14464 IX86_BUILTIN_PUNPCKHQDQ128,
14465 IX86_BUILTIN_PUNPCKLBW128,
14466 IX86_BUILTIN_PUNPCKLWD128,
14467 IX86_BUILTIN_PUNPCKLDQ128,
14468 IX86_BUILTIN_PUNPCKLQDQ128,
14469
14470 IX86_BUILTIN_CLFLUSH,
14471 IX86_BUILTIN_MFENCE,
14472 IX86_BUILTIN_LFENCE,
14473
14474 /* Prescott New Instructions. */
14475 IX86_BUILTIN_ADDSUBPS,
14476 IX86_BUILTIN_HADDPS,
14477 IX86_BUILTIN_HSUBPS,
14478 IX86_BUILTIN_MOVSHDUP,
14479 IX86_BUILTIN_MOVSLDUP,
14480 IX86_BUILTIN_ADDSUBPD,
14481 IX86_BUILTIN_HADDPD,
14482 IX86_BUILTIN_HSUBPD,
14483 IX86_BUILTIN_LDDQU,
14484
14485 IX86_BUILTIN_MONITOR,
14486 IX86_BUILTIN_MWAIT,
14487
14488 IX86_BUILTIN_VEC_INIT_V2SI,
14489 IX86_BUILTIN_VEC_INIT_V4HI,
14490 IX86_BUILTIN_VEC_INIT_V8QI,
14491 IX86_BUILTIN_VEC_EXT_V2DF,
14492 IX86_BUILTIN_VEC_EXT_V2DI,
14493 IX86_BUILTIN_VEC_EXT_V4SF,
14494 IX86_BUILTIN_VEC_EXT_V4SI,
14495 IX86_BUILTIN_VEC_EXT_V8HI,
14496 IX86_BUILTIN_VEC_EXT_V2SI,
14497 IX86_BUILTIN_VEC_EXT_V4HI,
14498 IX86_BUILTIN_VEC_SET_V8HI,
14499 IX86_BUILTIN_VEC_SET_V4HI,
14500
14501 IX86_BUILTIN_MAX
14502 };
14503
14504 #define def_builtin(MASK, NAME, TYPE, CODE) \
14505 do { \
14506 if ((MASK) & target_flags \
14507 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14508 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14509 NULL, NULL_TREE); \
14510 } while (0)
14511
14512 /* Bits for builtin_description.flag. */
14513
14514 /* Set when we don't support the comparison natively, and should
14515 swap_comparison in order to support it. */
14516 #define BUILTIN_DESC_SWAP_OPERANDS 1
14517
14518 struct builtin_description
14519 {
14520 const unsigned int mask;
14521 const enum insn_code icode;
14522 const char *const name;
14523 const enum ix86_builtins code;
14524 const enum rtx_code comparison;
14525 const unsigned int flag;
14526 };
14527
14528 static const struct builtin_description bdesc_comi[] =
14529 {
14530 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14531 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14532 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14533 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14534 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14535 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14536 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14537 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14538 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14539 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14540 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14541 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14542 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14543 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14544 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14545 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14546 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14547 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14548 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14549 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14550 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14551 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14552 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14553 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14554 };
14555
14556 static const struct builtin_description bdesc_2arg[] =
14557 {
14558 /* SSE */
14559 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14560 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14561 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14562 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14563 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14564 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14565 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14566 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14567
14568 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14569 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14570 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14571 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14572 BUILTIN_DESC_SWAP_OPERANDS },
14573 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14574 BUILTIN_DESC_SWAP_OPERANDS },
14575 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14576 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14577 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14578 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14579 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14580 BUILTIN_DESC_SWAP_OPERANDS },
14581 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14582 BUILTIN_DESC_SWAP_OPERANDS },
14583 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14584 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14585 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14586 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14587 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14588 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14589 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14590 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14591 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14592 BUILTIN_DESC_SWAP_OPERANDS },
14593 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14594 BUILTIN_DESC_SWAP_OPERANDS },
14595 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14596
14597 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14598 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14599 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14600 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14601
14602 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14603 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14604 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14605 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14606
14607 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14608 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14609 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14610 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14611 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14612
14613 /* MMX */
14614 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14615 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14616 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14617 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14618 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14619 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14620 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14621 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14622
14623 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14624 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14625 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14626 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14627 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14628 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14629 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14630 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14631
14632 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14633 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14634 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14635
14636 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14637 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14638 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14639 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14640
14641 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14642 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14643
14644 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14645 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14646 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14647 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14648 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14649 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14650
14651 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14652 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14653 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14654 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14655
14656 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14657 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14658 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14659 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14660 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14661 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14662
14663 /* Special. */
14664 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14665 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14666 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14667
14668 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14669 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14670 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14671
14672 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14673 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14674 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14675 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14676 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14677 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14678
14679 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14680 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14681 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14682 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14683 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14684 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14685
14686 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14687 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14688 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14689 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14690
14691 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14692 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14693
14694 /* SSE2 */
14695 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14696 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14697 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14698 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14699 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14700 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14701 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14702 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14703
14704 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14705 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14706 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14707 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14708 BUILTIN_DESC_SWAP_OPERANDS },
14709 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14710 BUILTIN_DESC_SWAP_OPERANDS },
14711 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14712 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14713 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14714 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14715 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14716 BUILTIN_DESC_SWAP_OPERANDS },
14717 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14718 BUILTIN_DESC_SWAP_OPERANDS },
14719 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14720 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14721 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14722 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14723 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14724 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14725 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14726 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14727 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14728
14729 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14730 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14731 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14732 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14733
14734 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14735 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14736 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14737 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14738
14739 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14740 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14741 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14742
14743 /* SSE2 MMX */
14744 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14745 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14746 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14747 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14748 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14749 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14750 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14751 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14752
14753 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14754 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14755 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14756 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14757 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14758 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14759 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14760 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14761
14762 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14763 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14764
14765 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14766 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14767 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14768 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14769
14770 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14771 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14772
14773 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14774 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14775 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14776 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14777 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14778 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14779
14780 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14781 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14782 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14783 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14784
14785 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14786 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14787 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14788 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14789 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14790 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14791 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14792 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14793
14794 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14795 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14796 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14797
14798 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14799 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14800
14801 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14802 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14803
14804 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14805 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14806 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14807
14808 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14809 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14810 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14811
14812 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14813 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14814
14815 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14816
14817 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14818 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14819 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14820 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14821
14822 /* SSE3 MMX */
14823 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14824 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14825 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14826 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14827 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14828 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14829 };
14830
14831 static const struct builtin_description bdesc_1arg[] =
14832 {
14833 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14834 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14835
14836 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14837 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14838 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14839
14840 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14841 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14842 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14843 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14844 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14845 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14846
14847 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14848 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14849
14850 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14851
14852 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14853 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14854
14855 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14856 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14857 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14858 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14859 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14860
14861 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14862
14863 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14864 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14865 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14866 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14867
14868 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14869 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14870 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14871
14872 /* SSE3 */
14873 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14874 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14875 };
14876
14877 static void
14878 ix86_init_builtins (void)
14879 {
14880 if (TARGET_MMX)
14881 ix86_init_mmx_sse_builtins ();
14882 }
14883
14884 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14885 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14886 builtins. */
14887 static void
14888 ix86_init_mmx_sse_builtins (void)
14889 {
14890 const struct builtin_description * d;
14891 size_t i;
14892
14893 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14894 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14895 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14896 tree V2DI_type_node
14897 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14898 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14899 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14900 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14901 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14902 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14903 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14904
14905 tree pchar_type_node = build_pointer_type (char_type_node);
14906 tree pcchar_type_node = build_pointer_type (
14907 build_type_variant (char_type_node, 1, 0));
14908 tree pfloat_type_node = build_pointer_type (float_type_node);
14909 tree pcfloat_type_node = build_pointer_type (
14910 build_type_variant (float_type_node, 1, 0));
14911 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14912 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14913 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14914
14915 /* Comparisons. */
14916 tree int_ftype_v4sf_v4sf
14917 = build_function_type_list (integer_type_node,
14918 V4SF_type_node, V4SF_type_node, NULL_TREE);
14919 tree v4si_ftype_v4sf_v4sf
14920 = build_function_type_list (V4SI_type_node,
14921 V4SF_type_node, V4SF_type_node, NULL_TREE);
14922 /* MMX/SSE/integer conversions. */
14923 tree int_ftype_v4sf
14924 = build_function_type_list (integer_type_node,
14925 V4SF_type_node, NULL_TREE);
14926 tree int64_ftype_v4sf
14927 = build_function_type_list (long_long_integer_type_node,
14928 V4SF_type_node, NULL_TREE);
14929 tree int_ftype_v8qi
14930 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
14931 tree v4sf_ftype_v4sf_int
14932 = build_function_type_list (V4SF_type_node,
14933 V4SF_type_node, integer_type_node, NULL_TREE);
14934 tree v4sf_ftype_v4sf_int64
14935 = build_function_type_list (V4SF_type_node,
14936 V4SF_type_node, long_long_integer_type_node,
14937 NULL_TREE);
14938 tree v4sf_ftype_v4sf_v2si
14939 = build_function_type_list (V4SF_type_node,
14940 V4SF_type_node, V2SI_type_node, NULL_TREE);
14941
14942 /* Miscellaneous. */
14943 tree v8qi_ftype_v4hi_v4hi
14944 = build_function_type_list (V8QI_type_node,
14945 V4HI_type_node, V4HI_type_node, NULL_TREE);
14946 tree v4hi_ftype_v2si_v2si
14947 = build_function_type_list (V4HI_type_node,
14948 V2SI_type_node, V2SI_type_node, NULL_TREE);
14949 tree v4sf_ftype_v4sf_v4sf_int
14950 = build_function_type_list (V4SF_type_node,
14951 V4SF_type_node, V4SF_type_node,
14952 integer_type_node, NULL_TREE);
14953 tree v2si_ftype_v4hi_v4hi
14954 = build_function_type_list (V2SI_type_node,
14955 V4HI_type_node, V4HI_type_node, NULL_TREE);
14956 tree v4hi_ftype_v4hi_int
14957 = build_function_type_list (V4HI_type_node,
14958 V4HI_type_node, integer_type_node, NULL_TREE);
14959 tree v4hi_ftype_v4hi_di
14960 = build_function_type_list (V4HI_type_node,
14961 V4HI_type_node, long_long_unsigned_type_node,
14962 NULL_TREE);
14963 tree v2si_ftype_v2si_di
14964 = build_function_type_list (V2SI_type_node,
14965 V2SI_type_node, long_long_unsigned_type_node,
14966 NULL_TREE);
14967 tree void_ftype_void
14968 = build_function_type (void_type_node, void_list_node);
14969 tree void_ftype_unsigned
14970 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
14971 tree void_ftype_unsigned_unsigned
14972 = build_function_type_list (void_type_node, unsigned_type_node,
14973 unsigned_type_node, NULL_TREE);
14974 tree void_ftype_pcvoid_unsigned_unsigned
14975 = build_function_type_list (void_type_node, const_ptr_type_node,
14976 unsigned_type_node, unsigned_type_node,
14977 NULL_TREE);
14978 tree unsigned_ftype_void
14979 = build_function_type (unsigned_type_node, void_list_node);
14980 tree v2si_ftype_v4sf
14981 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
14982 /* Loads/stores. */
14983 tree void_ftype_v8qi_v8qi_pchar
14984 = build_function_type_list (void_type_node,
14985 V8QI_type_node, V8QI_type_node,
14986 pchar_type_node, NULL_TREE);
14987 tree v4sf_ftype_pcfloat
14988 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
14989 /* @@@ the type is bogus */
14990 tree v4sf_ftype_v4sf_pv2si
14991 = build_function_type_list (V4SF_type_node,
14992 V4SF_type_node, pv2si_type_node, NULL_TREE);
14993 tree void_ftype_pv2si_v4sf
14994 = build_function_type_list (void_type_node,
14995 pv2si_type_node, V4SF_type_node, NULL_TREE);
14996 tree void_ftype_pfloat_v4sf
14997 = build_function_type_list (void_type_node,
14998 pfloat_type_node, V4SF_type_node, NULL_TREE);
14999 tree void_ftype_pdi_di
15000 = build_function_type_list (void_type_node,
15001 pdi_type_node, long_long_unsigned_type_node,
15002 NULL_TREE);
15003 tree void_ftype_pv2di_v2di
15004 = build_function_type_list (void_type_node,
15005 pv2di_type_node, V2DI_type_node, NULL_TREE);
15006 /* Normal vector unops. */
15007 tree v4sf_ftype_v4sf
15008 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15009
15010 /* Normal vector binops. */
15011 tree v4sf_ftype_v4sf_v4sf
15012 = build_function_type_list (V4SF_type_node,
15013 V4SF_type_node, V4SF_type_node, NULL_TREE);
15014 tree v8qi_ftype_v8qi_v8qi
15015 = build_function_type_list (V8QI_type_node,
15016 V8QI_type_node, V8QI_type_node, NULL_TREE);
15017 tree v4hi_ftype_v4hi_v4hi
15018 = build_function_type_list (V4HI_type_node,
15019 V4HI_type_node, V4HI_type_node, NULL_TREE);
15020 tree v2si_ftype_v2si_v2si
15021 = build_function_type_list (V2SI_type_node,
15022 V2SI_type_node, V2SI_type_node, NULL_TREE);
15023 tree di_ftype_di_di
15024 = build_function_type_list (long_long_unsigned_type_node,
15025 long_long_unsigned_type_node,
15026 long_long_unsigned_type_node, NULL_TREE);
15027
15028 tree v2si_ftype_v2sf
15029 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15030 tree v2sf_ftype_v2si
15031 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15032 tree v2si_ftype_v2si
15033 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15034 tree v2sf_ftype_v2sf
15035 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15036 tree v2sf_ftype_v2sf_v2sf
15037 = build_function_type_list (V2SF_type_node,
15038 V2SF_type_node, V2SF_type_node, NULL_TREE);
15039 tree v2si_ftype_v2sf_v2sf
15040 = build_function_type_list (V2SI_type_node,
15041 V2SF_type_node, V2SF_type_node, NULL_TREE);
15042 tree pint_type_node = build_pointer_type (integer_type_node);
15043 tree pdouble_type_node = build_pointer_type (double_type_node);
15044 tree pcdouble_type_node = build_pointer_type (
15045 build_type_variant (double_type_node, 1, 0));
15046 tree int_ftype_v2df_v2df
15047 = build_function_type_list (integer_type_node,
15048 V2DF_type_node, V2DF_type_node, NULL_TREE);
15049
15050 tree void_ftype_pcvoid
15051 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15052 tree v4sf_ftype_v4si
15053 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15054 tree v4si_ftype_v4sf
15055 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15056 tree v2df_ftype_v4si
15057 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15058 tree v4si_ftype_v2df
15059 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15060 tree v2si_ftype_v2df
15061 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15062 tree v4sf_ftype_v2df
15063 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15064 tree v2df_ftype_v2si
15065 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15066 tree v2df_ftype_v4sf
15067 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15068 tree int_ftype_v2df
15069 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15070 tree int64_ftype_v2df
15071 = build_function_type_list (long_long_integer_type_node,
15072 V2DF_type_node, NULL_TREE);
15073 tree v2df_ftype_v2df_int
15074 = build_function_type_list (V2DF_type_node,
15075 V2DF_type_node, integer_type_node, NULL_TREE);
15076 tree v2df_ftype_v2df_int64
15077 = build_function_type_list (V2DF_type_node,
15078 V2DF_type_node, long_long_integer_type_node,
15079 NULL_TREE);
15080 tree v4sf_ftype_v4sf_v2df
15081 = build_function_type_list (V4SF_type_node,
15082 V4SF_type_node, V2DF_type_node, NULL_TREE);
15083 tree v2df_ftype_v2df_v4sf
15084 = build_function_type_list (V2DF_type_node,
15085 V2DF_type_node, V4SF_type_node, NULL_TREE);
15086 tree v2df_ftype_v2df_v2df_int
15087 = build_function_type_list (V2DF_type_node,
15088 V2DF_type_node, V2DF_type_node,
15089 integer_type_node,
15090 NULL_TREE);
15091 tree v2df_ftype_v2df_pcdouble
15092 = build_function_type_list (V2DF_type_node,
15093 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15094 tree void_ftype_pdouble_v2df
15095 = build_function_type_list (void_type_node,
15096 pdouble_type_node, V2DF_type_node, NULL_TREE);
15097 tree void_ftype_pint_int
15098 = build_function_type_list (void_type_node,
15099 pint_type_node, integer_type_node, NULL_TREE);
15100 tree void_ftype_v16qi_v16qi_pchar
15101 = build_function_type_list (void_type_node,
15102 V16QI_type_node, V16QI_type_node,
15103 pchar_type_node, NULL_TREE);
15104 tree v2df_ftype_pcdouble
15105 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15106 tree v2df_ftype_v2df_v2df
15107 = build_function_type_list (V2DF_type_node,
15108 V2DF_type_node, V2DF_type_node, NULL_TREE);
15109 tree v16qi_ftype_v16qi_v16qi
15110 = build_function_type_list (V16QI_type_node,
15111 V16QI_type_node, V16QI_type_node, NULL_TREE);
15112 tree v8hi_ftype_v8hi_v8hi
15113 = build_function_type_list (V8HI_type_node,
15114 V8HI_type_node, V8HI_type_node, NULL_TREE);
15115 tree v4si_ftype_v4si_v4si
15116 = build_function_type_list (V4SI_type_node,
15117 V4SI_type_node, V4SI_type_node, NULL_TREE);
15118 tree v2di_ftype_v2di_v2di
15119 = build_function_type_list (V2DI_type_node,
15120 V2DI_type_node, V2DI_type_node, NULL_TREE);
15121 tree v2di_ftype_v2df_v2df
15122 = build_function_type_list (V2DI_type_node,
15123 V2DF_type_node, V2DF_type_node, NULL_TREE);
15124 tree v2df_ftype_v2df
15125 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15126 tree v2di_ftype_v2di_int
15127 = build_function_type_list (V2DI_type_node,
15128 V2DI_type_node, integer_type_node, NULL_TREE);
15129 tree v4si_ftype_v4si_int
15130 = build_function_type_list (V4SI_type_node,
15131 V4SI_type_node, integer_type_node, NULL_TREE);
15132 tree v8hi_ftype_v8hi_int
15133 = build_function_type_list (V8HI_type_node,
15134 V8HI_type_node, integer_type_node, NULL_TREE);
15135 tree v8hi_ftype_v8hi_v2di
15136 = build_function_type_list (V8HI_type_node,
15137 V8HI_type_node, V2DI_type_node, NULL_TREE);
15138 tree v4si_ftype_v4si_v2di
15139 = build_function_type_list (V4SI_type_node,
15140 V4SI_type_node, V2DI_type_node, NULL_TREE);
15141 tree v4si_ftype_v8hi_v8hi
15142 = build_function_type_list (V4SI_type_node,
15143 V8HI_type_node, V8HI_type_node, NULL_TREE);
15144 tree di_ftype_v8qi_v8qi
15145 = build_function_type_list (long_long_unsigned_type_node,
15146 V8QI_type_node, V8QI_type_node, NULL_TREE);
15147 tree di_ftype_v2si_v2si
15148 = build_function_type_list (long_long_unsigned_type_node,
15149 V2SI_type_node, V2SI_type_node, NULL_TREE);
15150 tree v2di_ftype_v16qi_v16qi
15151 = build_function_type_list (V2DI_type_node,
15152 V16QI_type_node, V16QI_type_node, NULL_TREE);
15153 tree v2di_ftype_v4si_v4si
15154 = build_function_type_list (V2DI_type_node,
15155 V4SI_type_node, V4SI_type_node, NULL_TREE);
15156 tree int_ftype_v16qi
15157 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15158 tree v16qi_ftype_pcchar
15159 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15160 tree void_ftype_pchar_v16qi
15161 = build_function_type_list (void_type_node,
15162 pchar_type_node, V16QI_type_node, NULL_TREE);
15163
15164 tree float80_type;
15165 tree float128_type;
15166 tree ftype;
15167
15168 /* The __float80 type. */
15169 if (TYPE_MODE (long_double_type_node) == XFmode)
15170 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15171 "__float80");
15172 else
15173 {
15174 /* The __float80 type. */
15175 float80_type = make_node (REAL_TYPE);
15176 TYPE_PRECISION (float80_type) = 80;
15177 layout_type (float80_type);
15178 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15179 }
15180
15181 if (TARGET_64BIT)
15182 {
15183 float128_type = make_node (REAL_TYPE);
15184 TYPE_PRECISION (float128_type) = 128;
15185 layout_type (float128_type);
15186 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15187 }
15188
15189 /* Add all builtins that are more or less simple operations on two
15190 operands. */
15191 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15192 {
15193 /* Use one of the operands; the target can have a different mode for
15194 mask-generating compares. */
15195 enum machine_mode mode;
15196 tree type;
15197
15198 if (d->name == 0)
15199 continue;
15200 mode = insn_data[d->icode].operand[1].mode;
15201
15202 switch (mode)
15203 {
15204 case V16QImode:
15205 type = v16qi_ftype_v16qi_v16qi;
15206 break;
15207 case V8HImode:
15208 type = v8hi_ftype_v8hi_v8hi;
15209 break;
15210 case V4SImode:
15211 type = v4si_ftype_v4si_v4si;
15212 break;
15213 case V2DImode:
15214 type = v2di_ftype_v2di_v2di;
15215 break;
15216 case V2DFmode:
15217 type = v2df_ftype_v2df_v2df;
15218 break;
15219 case V4SFmode:
15220 type = v4sf_ftype_v4sf_v4sf;
15221 break;
15222 case V8QImode:
15223 type = v8qi_ftype_v8qi_v8qi;
15224 break;
15225 case V4HImode:
15226 type = v4hi_ftype_v4hi_v4hi;
15227 break;
15228 case V2SImode:
15229 type = v2si_ftype_v2si_v2si;
15230 break;
15231 case DImode:
15232 type = di_ftype_di_di;
15233 break;
15234
15235 default:
15236 gcc_unreachable ();
15237 }
15238
15239 /* Override for comparisons. */
15240 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15241 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15242 type = v4si_ftype_v4sf_v4sf;
15243
15244 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15245 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15246 type = v2di_ftype_v2df_v2df;
15247
15248 def_builtin (d->mask, d->name, type, d->code);
15249 }
15250
15251 /* Add the remaining MMX insns with somewhat more complicated types. */
15252 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15253 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15254 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15255 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15256
15257 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15258 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15259 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15260
15261 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15262 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15263
15264 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15265 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15266
15267 /* comi/ucomi insns. */
15268 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15269 if (d->mask == MASK_SSE2)
15270 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15271 else
15272 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15273
15274 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15275 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15276 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15277
15278 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15279 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15280 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15281 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15282 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15283 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15284 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15285 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15286 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15287 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15288 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15289
15290 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15291
15292 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15293 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15294
15295 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15296 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15297 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15298 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15299
15300 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15301 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15302 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15303 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15304
15305 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15306
15307 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15308
15309 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15310 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15311 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15312 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15313 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15314 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15315
15316 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15317
15318 /* Original 3DNow! */
15319 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15320 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15321 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15322 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15323 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15324 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15325 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15326 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15327 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15328 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15329 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15330 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15331 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15332 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15333 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15334 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15335 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15336 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15337 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15338 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15339
15340 /* 3DNow! extension as used in the Athlon CPU. */
15341 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15342 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15343 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15344 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15345 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15346 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15347
15348 /* SSE2 */
15349 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15350
15351 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15352 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15353
15354 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15355 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15356
15357 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15358 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15359 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15360 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15361 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15362
15363 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15364 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15365 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15366 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15367
15368 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15369 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15370
15371 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15372
15373 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15374 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15375
15376 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15377 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15378 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15379 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15380 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15381
15382 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15383
15384 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15385 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15386 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15387 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15388
15389 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15390 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15391 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15392
15393 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15394 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15395 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15396 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15397
15398 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15399 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15400 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15401
15402 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15403 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15404
15405 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15406 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15407
15408 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15409 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15410 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15411
15412 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15413 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15414 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15415
15416 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15417 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15418
15419 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15420 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15421 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15422 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15423
15424 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15425 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15426 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15427 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15428
15429 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15430 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15431
15432 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15433
15434 /* Prescott New Instructions. */
15435 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15436 void_ftype_pcvoid_unsigned_unsigned,
15437 IX86_BUILTIN_MONITOR);
15438 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15439 void_ftype_unsigned_unsigned,
15440 IX86_BUILTIN_MWAIT);
15441 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15442 v4sf_ftype_v4sf,
15443 IX86_BUILTIN_MOVSHDUP);
15444 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15445 v4sf_ftype_v4sf,
15446 IX86_BUILTIN_MOVSLDUP);
15447 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15448 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15449
15450 /* Access to the vec_init patterns. */
15451 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15452 integer_type_node, NULL_TREE);
15453 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15454 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15455
15456 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15457 short_integer_type_node,
15458 short_integer_type_node,
15459 short_integer_type_node, NULL_TREE);
15460 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15461 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15462
15463 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15464 char_type_node, char_type_node,
15465 char_type_node, char_type_node,
15466 char_type_node, char_type_node,
15467 char_type_node, NULL_TREE);
15468 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15469 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15470
15471 /* Access to the vec_extract patterns. */
15472 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15473 integer_type_node, NULL_TREE);
15474 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15475 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15476
15477 ftype = build_function_type_list (long_long_integer_type_node,
15478 V2DI_type_node, integer_type_node,
15479 NULL_TREE);
15480 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15481 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15482
15483 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15484 integer_type_node, NULL_TREE);
15485 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15486 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15487
15488 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15489 integer_type_node, NULL_TREE);
15490 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15491 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15492
15493 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15494 integer_type_node, NULL_TREE);
15495 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15496 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15497
15498 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15499 integer_type_node, NULL_TREE);
15500 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15501 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15502
15503 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15504 integer_type_node, NULL_TREE);
15505 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15506 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15507
15508 /* Access to the vec_set patterns. */
15509 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15510 intHI_type_node,
15511 integer_type_node, NULL_TREE);
15512 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15513 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15514
15515 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15516 intHI_type_node,
15517 integer_type_node, NULL_TREE);
15518 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15519 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15520 }
15521
15522 /* Errors in the source file can cause expand_expr to return const0_rtx
15523 where we expect a vector. To avoid crashing, use one of the vector
15524 clear instructions. */
15525 static rtx
15526 safe_vector_operand (rtx x, enum machine_mode mode)
15527 {
15528 if (x == const0_rtx)
15529 x = CONST0_RTX (mode);
15530 return x;
15531 }
15532
15533 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15534
15535 static rtx
15536 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15537 {
15538 rtx pat, xops[3];
15539 tree arg0 = TREE_VALUE (arglist);
15540 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15541 rtx op0 = expand_normal (arg0);
15542 rtx op1 = expand_normal (arg1);
15543 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15544 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15545 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15546
15547 if (VECTOR_MODE_P (mode0))
15548 op0 = safe_vector_operand (op0, mode0);
15549 if (VECTOR_MODE_P (mode1))
15550 op1 = safe_vector_operand (op1, mode1);
15551
15552 if (optimize || !target
15553 || GET_MODE (target) != tmode
15554 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15555 target = gen_reg_rtx (tmode);
15556
15557 if (GET_MODE (op1) == SImode && mode1 == TImode)
15558 {
15559 rtx x = gen_reg_rtx (V4SImode);
15560 emit_insn (gen_sse2_loadd (x, op1));
15561 op1 = gen_lowpart (TImode, x);
15562 }
15563
15564 /* The insn must want input operands in the same modes as the
15565 result. */
15566 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15567 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15568
15569 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15570 op0 = copy_to_mode_reg (mode0, op0);
15571 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15572 op1 = copy_to_mode_reg (mode1, op1);
15573
15574 /* ??? Using ix86_fixup_binary_operands is problematic when
15575 we've got mismatched modes. Fake it. */
15576
15577 xops[0] = target;
15578 xops[1] = op0;
15579 xops[2] = op1;
15580
15581 if (tmode == mode0 && tmode == mode1)
15582 {
15583 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15584 op0 = xops[1];
15585 op1 = xops[2];
15586 }
15587 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15588 {
15589 op0 = force_reg (mode0, op0);
15590 op1 = force_reg (mode1, op1);
15591 target = gen_reg_rtx (tmode);
15592 }
15593
15594 pat = GEN_FCN (icode) (target, op0, op1);
15595 if (! pat)
15596 return 0;
15597 emit_insn (pat);
15598 return target;
15599 }
15600
15601 /* Subroutine of ix86_expand_builtin to take care of stores. */
15602
15603 static rtx
15604 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15605 {
15606 rtx pat;
15607 tree arg0 = TREE_VALUE (arglist);
15608 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15609 rtx op0 = expand_normal (arg0);
15610 rtx op1 = expand_normal (arg1);
15611 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15612 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15613
15614 if (VECTOR_MODE_P (mode1))
15615 op1 = safe_vector_operand (op1, mode1);
15616
15617 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15618 op1 = copy_to_mode_reg (mode1, op1);
15619
15620 pat = GEN_FCN (icode) (op0, op1);
15621 if (pat)
15622 emit_insn (pat);
15623 return 0;
15624 }
15625
15626 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15627
15628 static rtx
15629 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15630 rtx target, int do_load)
15631 {
15632 rtx pat;
15633 tree arg0 = TREE_VALUE (arglist);
15634 rtx op0 = expand_normal (arg0);
15635 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15636 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15637
15638 if (optimize || !target
15639 || GET_MODE (target) != tmode
15640 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15641 target = gen_reg_rtx (tmode);
15642 if (do_load)
15643 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15644 else
15645 {
15646 if (VECTOR_MODE_P (mode0))
15647 op0 = safe_vector_operand (op0, mode0);
15648
15649 if ((optimize && !register_operand (op0, mode0))
15650 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15651 op0 = copy_to_mode_reg (mode0, op0);
15652 }
15653
15654 pat = GEN_FCN (icode) (target, op0);
15655 if (! pat)
15656 return 0;
15657 emit_insn (pat);
15658 return target;
15659 }
15660
15661 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15662 sqrtss, rsqrtss, rcpss. */
15663
15664 static rtx
15665 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15666 {
15667 rtx pat;
15668 tree arg0 = TREE_VALUE (arglist);
15669 rtx op1, op0 = expand_normal (arg0);
15670 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15671 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15672
15673 if (optimize || !target
15674 || GET_MODE (target) != tmode
15675 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15676 target = gen_reg_rtx (tmode);
15677
15678 if (VECTOR_MODE_P (mode0))
15679 op0 = safe_vector_operand (op0, mode0);
15680
15681 if ((optimize && !register_operand (op0, mode0))
15682 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15683 op0 = copy_to_mode_reg (mode0, op0);
15684
15685 op1 = op0;
15686 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15687 op1 = copy_to_mode_reg (mode0, op1);
15688
15689 pat = GEN_FCN (icode) (target, op0, op1);
15690 if (! pat)
15691 return 0;
15692 emit_insn (pat);
15693 return target;
15694 }
15695
15696 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15697
15698 static rtx
15699 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15700 rtx target)
15701 {
15702 rtx pat;
15703 tree arg0 = TREE_VALUE (arglist);
15704 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15705 rtx op0 = expand_normal (arg0);
15706 rtx op1 = expand_normal (arg1);
15707 rtx op2;
15708 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15709 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15710 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15711 enum rtx_code comparison = d->comparison;
15712
15713 if (VECTOR_MODE_P (mode0))
15714 op0 = safe_vector_operand (op0, mode0);
15715 if (VECTOR_MODE_P (mode1))
15716 op1 = safe_vector_operand (op1, mode1);
15717
15718 /* Swap operands if we have a comparison that isn't available in
15719 hardware. */
15720 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15721 {
15722 rtx tmp = gen_reg_rtx (mode1);
15723 emit_move_insn (tmp, op1);
15724 op1 = op0;
15725 op0 = tmp;
15726 }
15727
15728 if (optimize || !target
15729 || GET_MODE (target) != tmode
15730 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15731 target = gen_reg_rtx (tmode);
15732
15733 if ((optimize && !register_operand (op0, mode0))
15734 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15735 op0 = copy_to_mode_reg (mode0, op0);
15736 if ((optimize && !register_operand (op1, mode1))
15737 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15738 op1 = copy_to_mode_reg (mode1, op1);
15739
15740 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15741 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15742 if (! pat)
15743 return 0;
15744 emit_insn (pat);
15745 return target;
15746 }
15747
15748 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15749
15750 static rtx
15751 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15752 rtx target)
15753 {
15754 rtx pat;
15755 tree arg0 = TREE_VALUE (arglist);
15756 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15757 rtx op0 = expand_normal (arg0);
15758 rtx op1 = expand_normal (arg1);
15759 rtx op2;
15760 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15761 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15762 enum rtx_code comparison = d->comparison;
15763
15764 if (VECTOR_MODE_P (mode0))
15765 op0 = safe_vector_operand (op0, mode0);
15766 if (VECTOR_MODE_P (mode1))
15767 op1 = safe_vector_operand (op1, mode1);
15768
15769 /* Swap operands if we have a comparison that isn't available in
15770 hardware. */
15771 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15772 {
15773 rtx tmp = op1;
15774 op1 = op0;
15775 op0 = tmp;
15776 }
15777
15778 target = gen_reg_rtx (SImode);
15779 emit_move_insn (target, const0_rtx);
15780 target = gen_rtx_SUBREG (QImode, target, 0);
15781
15782 if ((optimize && !register_operand (op0, mode0))
15783 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15784 op0 = copy_to_mode_reg (mode0, op0);
15785 if ((optimize && !register_operand (op1, mode1))
15786 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15787 op1 = copy_to_mode_reg (mode1, op1);
15788
15789 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15790 pat = GEN_FCN (d->icode) (op0, op1);
15791 if (! pat)
15792 return 0;
15793 emit_insn (pat);
15794 emit_insn (gen_rtx_SET (VOIDmode,
15795 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15796 gen_rtx_fmt_ee (comparison, QImode,
15797 SET_DEST (pat),
15798 const0_rtx)));
15799
15800 return SUBREG_REG (target);
15801 }
15802
15803 /* Return the integer constant in ARG. Constrain it to be in the range
15804 of the subparts of VEC_TYPE; issue an error if not. */
15805
15806 static int
15807 get_element_number (tree vec_type, tree arg)
15808 {
15809 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15810
15811 if (!host_integerp (arg, 1)
15812 || (elt = tree_low_cst (arg, 1), elt > max))
15813 {
15814 error ("selector must be an integer constant in the range 0..%wi", max);
15815 return 0;
15816 }
15817
15818 return elt;
15819 }
15820
15821 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15822 ix86_expand_vector_init. We DO have language-level syntax for this, in
15823 the form of (type){ init-list }. Except that since we can't place emms
15824 instructions from inside the compiler, we can't allow the use of MMX
15825 registers unless the user explicitly asks for it. So we do *not* define
15826 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15827 we have builtins invoked by mmintrin.h that gives us license to emit
15828 these sorts of instructions. */
15829
15830 static rtx
15831 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15832 {
15833 enum machine_mode tmode = TYPE_MODE (type);
15834 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15835 int i, n_elt = GET_MODE_NUNITS (tmode);
15836 rtvec v = rtvec_alloc (n_elt);
15837
15838 gcc_assert (VECTOR_MODE_P (tmode));
15839
15840 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15841 {
15842 rtx x = expand_normal (TREE_VALUE (arglist));
15843 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15844 }
15845
15846 gcc_assert (arglist == NULL);
15847
15848 if (!target || !register_operand (target, tmode))
15849 target = gen_reg_rtx (tmode);
15850
15851 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15852 return target;
15853 }
15854
15855 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15856 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15857 had a language-level syntax for referencing vector elements. */
15858
15859 static rtx
15860 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15861 {
15862 enum machine_mode tmode, mode0;
15863 tree arg0, arg1;
15864 int elt;
15865 rtx op0;
15866
15867 arg0 = TREE_VALUE (arglist);
15868 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15869
15870 op0 = expand_normal (arg0);
15871 elt = get_element_number (TREE_TYPE (arg0), arg1);
15872
15873 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15874 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15875 gcc_assert (VECTOR_MODE_P (mode0));
15876
15877 op0 = force_reg (mode0, op0);
15878
15879 if (optimize || !target || !register_operand (target, tmode))
15880 target = gen_reg_rtx (tmode);
15881
15882 ix86_expand_vector_extract (true, target, op0, elt);
15883
15884 return target;
15885 }
15886
15887 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15888 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15889 a language-level syntax for referencing vector elements. */
15890
15891 static rtx
15892 ix86_expand_vec_set_builtin (tree arglist)
15893 {
15894 enum machine_mode tmode, mode1;
15895 tree arg0, arg1, arg2;
15896 int elt;
15897 rtx op0, op1;
15898
15899 arg0 = TREE_VALUE (arglist);
15900 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15901 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15902
15903 tmode = TYPE_MODE (TREE_TYPE (arg0));
15904 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15905 gcc_assert (VECTOR_MODE_P (tmode));
15906
15907 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15908 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15909 elt = get_element_number (TREE_TYPE (arg0), arg2);
15910
15911 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15912 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15913
15914 op0 = force_reg (tmode, op0);
15915 op1 = force_reg (mode1, op1);
15916
15917 ix86_expand_vector_set (true, op0, op1, elt);
15918
15919 return op0;
15920 }
15921
15922 /* Expand an expression EXP that calls a built-in function,
15923 with result going to TARGET if that's convenient
15924 (and in mode MODE if that's convenient).
15925 SUBTARGET may be used as the target for computing one of EXP's operands.
15926 IGNORE is nonzero if the value is to be ignored. */
15927
15928 static rtx
15929 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15930 enum machine_mode mode ATTRIBUTE_UNUSED,
15931 int ignore ATTRIBUTE_UNUSED)
15932 {
15933 const struct builtin_description *d;
15934 size_t i;
15935 enum insn_code icode;
15936 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
15937 tree arglist = TREE_OPERAND (exp, 1);
15938 tree arg0, arg1, arg2;
15939 rtx op0, op1, op2, pat;
15940 enum machine_mode tmode, mode0, mode1, mode2;
15941 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15942
15943 switch (fcode)
15944 {
15945 case IX86_BUILTIN_EMMS:
15946 emit_insn (gen_mmx_emms ());
15947 return 0;
15948
15949 case IX86_BUILTIN_SFENCE:
15950 emit_insn (gen_sse_sfence ());
15951 return 0;
15952
15953 case IX86_BUILTIN_MASKMOVQ:
15954 case IX86_BUILTIN_MASKMOVDQU:
15955 icode = (fcode == IX86_BUILTIN_MASKMOVQ
15956 ? CODE_FOR_mmx_maskmovq
15957 : CODE_FOR_sse2_maskmovdqu);
15958 /* Note the arg order is different from the operand order. */
15959 arg1 = TREE_VALUE (arglist);
15960 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
15961 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15962 op0 = expand_normal (arg0);
15963 op1 = expand_normal (arg1);
15964 op2 = expand_normal (arg2);
15965 mode0 = insn_data[icode].operand[0].mode;
15966 mode1 = insn_data[icode].operand[1].mode;
15967 mode2 = insn_data[icode].operand[2].mode;
15968
15969 op0 = force_reg (Pmode, op0);
15970 op0 = gen_rtx_MEM (mode1, op0);
15971
15972 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15973 op0 = copy_to_mode_reg (mode0, op0);
15974 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15975 op1 = copy_to_mode_reg (mode1, op1);
15976 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
15977 op2 = copy_to_mode_reg (mode2, op2);
15978 pat = GEN_FCN (icode) (op0, op1, op2);
15979 if (! pat)
15980 return 0;
15981 emit_insn (pat);
15982 return 0;
15983
15984 case IX86_BUILTIN_SQRTSS:
15985 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
15986 case IX86_BUILTIN_RSQRTSS:
15987 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
15988 case IX86_BUILTIN_RCPSS:
15989 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
15990
15991 case IX86_BUILTIN_LOADUPS:
15992 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
15993
15994 case IX86_BUILTIN_STOREUPS:
15995 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
15996
15997 case IX86_BUILTIN_LOADHPS:
15998 case IX86_BUILTIN_LOADLPS:
15999 case IX86_BUILTIN_LOADHPD:
16000 case IX86_BUILTIN_LOADLPD:
16001 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16002 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16003 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16004 : CODE_FOR_sse2_loadlpd);
16005 arg0 = TREE_VALUE (arglist);
16006 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16007 op0 = expand_normal (arg0);
16008 op1 = expand_normal (arg1);
16009 tmode = insn_data[icode].operand[0].mode;
16010 mode0 = insn_data[icode].operand[1].mode;
16011 mode1 = insn_data[icode].operand[2].mode;
16012
16013 op0 = force_reg (mode0, op0);
16014 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16015 if (optimize || target == 0
16016 || GET_MODE (target) != tmode
16017 || !register_operand (target, tmode))
16018 target = gen_reg_rtx (tmode);
16019 pat = GEN_FCN (icode) (target, op0, op1);
16020 if (! pat)
16021 return 0;
16022 emit_insn (pat);
16023 return target;
16024
16025 case IX86_BUILTIN_STOREHPS:
16026 case IX86_BUILTIN_STORELPS:
16027 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16028 : CODE_FOR_sse_storelps);
16029 arg0 = TREE_VALUE (arglist);
16030 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16031 op0 = expand_normal (arg0);
16032 op1 = expand_normal (arg1);
16033 mode0 = insn_data[icode].operand[0].mode;
16034 mode1 = insn_data[icode].operand[1].mode;
16035
16036 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16037 op1 = force_reg (mode1, op1);
16038
16039 pat = GEN_FCN (icode) (op0, op1);
16040 if (! pat)
16041 return 0;
16042 emit_insn (pat);
16043 return const0_rtx;
16044
16045 case IX86_BUILTIN_MOVNTPS:
16046 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16047 case IX86_BUILTIN_MOVNTQ:
16048 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16049
16050 case IX86_BUILTIN_LDMXCSR:
16051 op0 = expand_normal (TREE_VALUE (arglist));
16052 target = assign_386_stack_local (SImode, SLOT_TEMP);
16053 emit_move_insn (target, op0);
16054 emit_insn (gen_sse_ldmxcsr (target));
16055 return 0;
16056
16057 case IX86_BUILTIN_STMXCSR:
16058 target = assign_386_stack_local (SImode, SLOT_TEMP);
16059 emit_insn (gen_sse_stmxcsr (target));
16060 return copy_to_mode_reg (SImode, target);
16061
16062 case IX86_BUILTIN_SHUFPS:
16063 case IX86_BUILTIN_SHUFPD:
16064 icode = (fcode == IX86_BUILTIN_SHUFPS
16065 ? CODE_FOR_sse_shufps
16066 : CODE_FOR_sse2_shufpd);
16067 arg0 = TREE_VALUE (arglist);
16068 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16069 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16070 op0 = expand_normal (arg0);
16071 op1 = expand_normal (arg1);
16072 op2 = expand_normal (arg2);
16073 tmode = insn_data[icode].operand[0].mode;
16074 mode0 = insn_data[icode].operand[1].mode;
16075 mode1 = insn_data[icode].operand[2].mode;
16076 mode2 = insn_data[icode].operand[3].mode;
16077
16078 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16079 op0 = copy_to_mode_reg (mode0, op0);
16080 if ((optimize && !register_operand (op1, mode1))
16081 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16082 op1 = copy_to_mode_reg (mode1, op1);
16083 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16084 {
16085 /* @@@ better error message */
16086 error ("mask must be an immediate");
16087 return gen_reg_rtx (tmode);
16088 }
16089 if (optimize || target == 0
16090 || GET_MODE (target) != tmode
16091 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16092 target = gen_reg_rtx (tmode);
16093 pat = GEN_FCN (icode) (target, op0, op1, op2);
16094 if (! pat)
16095 return 0;
16096 emit_insn (pat);
16097 return target;
16098
16099 case IX86_BUILTIN_PSHUFW:
16100 case IX86_BUILTIN_PSHUFD:
16101 case IX86_BUILTIN_PSHUFHW:
16102 case IX86_BUILTIN_PSHUFLW:
16103 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16104 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16105 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16106 : CODE_FOR_mmx_pshufw);
16107 arg0 = TREE_VALUE (arglist);
16108 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16109 op0 = expand_normal (arg0);
16110 op1 = expand_normal (arg1);
16111 tmode = insn_data[icode].operand[0].mode;
16112 mode1 = insn_data[icode].operand[1].mode;
16113 mode2 = insn_data[icode].operand[2].mode;
16114
16115 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16116 op0 = copy_to_mode_reg (mode1, op0);
16117 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16118 {
16119 /* @@@ better error message */
16120 error ("mask must be an immediate");
16121 return const0_rtx;
16122 }
16123 if (target == 0
16124 || GET_MODE (target) != tmode
16125 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16126 target = gen_reg_rtx (tmode);
16127 pat = GEN_FCN (icode) (target, op0, op1);
16128 if (! pat)
16129 return 0;
16130 emit_insn (pat);
16131 return target;
16132
16133 case IX86_BUILTIN_PSLLDQI128:
16134 case IX86_BUILTIN_PSRLDQI128:
16135 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16136 : CODE_FOR_sse2_lshrti3);
16137 arg0 = TREE_VALUE (arglist);
16138 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16139 op0 = expand_normal (arg0);
16140 op1 = expand_normal (arg1);
16141 tmode = insn_data[icode].operand[0].mode;
16142 mode1 = insn_data[icode].operand[1].mode;
16143 mode2 = insn_data[icode].operand[2].mode;
16144
16145 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16146 {
16147 op0 = copy_to_reg (op0);
16148 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16149 }
16150 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16151 {
16152 error ("shift must be an immediate");
16153 return const0_rtx;
16154 }
16155 target = gen_reg_rtx (V2DImode);
16156 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16157 if (! pat)
16158 return 0;
16159 emit_insn (pat);
16160 return target;
16161
16162 case IX86_BUILTIN_FEMMS:
16163 emit_insn (gen_mmx_femms ());
16164 return NULL_RTX;
16165
16166 case IX86_BUILTIN_PAVGUSB:
16167 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16168
16169 case IX86_BUILTIN_PF2ID:
16170 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16171
16172 case IX86_BUILTIN_PFACC:
16173 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16174
16175 case IX86_BUILTIN_PFADD:
16176 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16177
16178 case IX86_BUILTIN_PFCMPEQ:
16179 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16180
16181 case IX86_BUILTIN_PFCMPGE:
16182 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16183
16184 case IX86_BUILTIN_PFCMPGT:
16185 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16186
16187 case IX86_BUILTIN_PFMAX:
16188 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16189
16190 case IX86_BUILTIN_PFMIN:
16191 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16192
16193 case IX86_BUILTIN_PFMUL:
16194 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16195
16196 case IX86_BUILTIN_PFRCP:
16197 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16198
16199 case IX86_BUILTIN_PFRCPIT1:
16200 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16201
16202 case IX86_BUILTIN_PFRCPIT2:
16203 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16204
16205 case IX86_BUILTIN_PFRSQIT1:
16206 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16207
16208 case IX86_BUILTIN_PFRSQRT:
16209 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16210
16211 case IX86_BUILTIN_PFSUB:
16212 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16213
16214 case IX86_BUILTIN_PFSUBR:
16215 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16216
16217 case IX86_BUILTIN_PI2FD:
16218 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16219
16220 case IX86_BUILTIN_PMULHRW:
16221 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16222
16223 case IX86_BUILTIN_PF2IW:
16224 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16225
16226 case IX86_BUILTIN_PFNACC:
16227 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16228
16229 case IX86_BUILTIN_PFPNACC:
16230 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16231
16232 case IX86_BUILTIN_PI2FW:
16233 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16234
16235 case IX86_BUILTIN_PSWAPDSI:
16236 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16237
16238 case IX86_BUILTIN_PSWAPDSF:
16239 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16240
16241 case IX86_BUILTIN_SQRTSD:
16242 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16243 case IX86_BUILTIN_LOADUPD:
16244 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16245 case IX86_BUILTIN_STOREUPD:
16246 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16247
16248 case IX86_BUILTIN_MFENCE:
16249 emit_insn (gen_sse2_mfence ());
16250 return 0;
16251 case IX86_BUILTIN_LFENCE:
16252 emit_insn (gen_sse2_lfence ());
16253 return 0;
16254
16255 case IX86_BUILTIN_CLFLUSH:
16256 arg0 = TREE_VALUE (arglist);
16257 op0 = expand_normal (arg0);
16258 icode = CODE_FOR_sse2_clflush;
16259 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16260 op0 = copy_to_mode_reg (Pmode, op0);
16261
16262 emit_insn (gen_sse2_clflush (op0));
16263 return 0;
16264
16265 case IX86_BUILTIN_MOVNTPD:
16266 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16267 case IX86_BUILTIN_MOVNTDQ:
16268 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16269 case IX86_BUILTIN_MOVNTI:
16270 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16271
16272 case IX86_BUILTIN_LOADDQU:
16273 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16274 case IX86_BUILTIN_STOREDQU:
16275 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16276
16277 case IX86_BUILTIN_MONITOR:
16278 arg0 = TREE_VALUE (arglist);
16279 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16280 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16281 op0 = expand_normal (arg0);
16282 op1 = expand_normal (arg1);
16283 op2 = expand_normal (arg2);
16284 if (!REG_P (op0))
16285 op0 = copy_to_mode_reg (Pmode, op0);
16286 if (!REG_P (op1))
16287 op1 = copy_to_mode_reg (SImode, op1);
16288 if (!REG_P (op2))
16289 op2 = copy_to_mode_reg (SImode, op2);
16290 if (!TARGET_64BIT)
16291 emit_insn (gen_sse3_monitor (op0, op1, op2));
16292 else
16293 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16294 return 0;
16295
16296 case IX86_BUILTIN_MWAIT:
16297 arg0 = TREE_VALUE (arglist);
16298 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16299 op0 = expand_normal (arg0);
16300 op1 = expand_normal (arg1);
16301 if (!REG_P (op0))
16302 op0 = copy_to_mode_reg (SImode, op0);
16303 if (!REG_P (op1))
16304 op1 = copy_to_mode_reg (SImode, op1);
16305 emit_insn (gen_sse3_mwait (op0, op1));
16306 return 0;
16307
16308 case IX86_BUILTIN_LDDQU:
16309 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16310 target, 1);
16311
16312 case IX86_BUILTIN_VEC_INIT_V2SI:
16313 case IX86_BUILTIN_VEC_INIT_V4HI:
16314 case IX86_BUILTIN_VEC_INIT_V8QI:
16315 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16316
16317 case IX86_BUILTIN_VEC_EXT_V2DF:
16318 case IX86_BUILTIN_VEC_EXT_V2DI:
16319 case IX86_BUILTIN_VEC_EXT_V4SF:
16320 case IX86_BUILTIN_VEC_EXT_V4SI:
16321 case IX86_BUILTIN_VEC_EXT_V8HI:
16322 case IX86_BUILTIN_VEC_EXT_V2SI:
16323 case IX86_BUILTIN_VEC_EXT_V4HI:
16324 return ix86_expand_vec_ext_builtin (arglist, target);
16325
16326 case IX86_BUILTIN_VEC_SET_V8HI:
16327 case IX86_BUILTIN_VEC_SET_V4HI:
16328 return ix86_expand_vec_set_builtin (arglist);
16329
16330 default:
16331 break;
16332 }
16333
16334 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16335 if (d->code == fcode)
16336 {
16337 /* Compares are treated specially. */
16338 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16339 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16340 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16341 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16342 return ix86_expand_sse_compare (d, arglist, target);
16343
16344 return ix86_expand_binop_builtin (d->icode, arglist, target);
16345 }
16346
16347 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16348 if (d->code == fcode)
16349 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16350
16351 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16352 if (d->code == fcode)
16353 return ix86_expand_sse_comi (d, arglist, target);
16354
16355 gcc_unreachable ();
16356 }
16357
16358 /* Store OPERAND to the memory after reload is completed. This means
16359 that we can't easily use assign_stack_local. */
16360 rtx
16361 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16362 {
16363 rtx result;
16364
16365 gcc_assert (reload_completed);
16366 if (TARGET_RED_ZONE)
16367 {
16368 result = gen_rtx_MEM (mode,
16369 gen_rtx_PLUS (Pmode,
16370 stack_pointer_rtx,
16371 GEN_INT (-RED_ZONE_SIZE)));
16372 emit_move_insn (result, operand);
16373 }
16374 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16375 {
16376 switch (mode)
16377 {
16378 case HImode:
16379 case SImode:
16380 operand = gen_lowpart (DImode, operand);
16381 /* FALLTHRU */
16382 case DImode:
16383 emit_insn (
16384 gen_rtx_SET (VOIDmode,
16385 gen_rtx_MEM (DImode,
16386 gen_rtx_PRE_DEC (DImode,
16387 stack_pointer_rtx)),
16388 operand));
16389 break;
16390 default:
16391 gcc_unreachable ();
16392 }
16393 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16394 }
16395 else
16396 {
16397 switch (mode)
16398 {
16399 case DImode:
16400 {
16401 rtx operands[2];
16402 split_di (&operand, 1, operands, operands + 1);
16403 emit_insn (
16404 gen_rtx_SET (VOIDmode,
16405 gen_rtx_MEM (SImode,
16406 gen_rtx_PRE_DEC (Pmode,
16407 stack_pointer_rtx)),
16408 operands[1]));
16409 emit_insn (
16410 gen_rtx_SET (VOIDmode,
16411 gen_rtx_MEM (SImode,
16412 gen_rtx_PRE_DEC (Pmode,
16413 stack_pointer_rtx)),
16414 operands[0]));
16415 }
16416 break;
16417 case HImode:
16418 /* Store HImodes as SImodes. */
16419 operand = gen_lowpart (SImode, operand);
16420 /* FALLTHRU */
16421 case SImode:
16422 emit_insn (
16423 gen_rtx_SET (VOIDmode,
16424 gen_rtx_MEM (GET_MODE (operand),
16425 gen_rtx_PRE_DEC (SImode,
16426 stack_pointer_rtx)),
16427 operand));
16428 break;
16429 default:
16430 gcc_unreachable ();
16431 }
16432 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16433 }
16434 return result;
16435 }
16436
16437 /* Free operand from the memory. */
16438 void
16439 ix86_free_from_memory (enum machine_mode mode)
16440 {
16441 if (!TARGET_RED_ZONE)
16442 {
16443 int size;
16444
16445 if (mode == DImode || TARGET_64BIT)
16446 size = 8;
16447 else
16448 size = 4;
16449 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16450 to pop or add instruction if registers are available. */
16451 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16452 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16453 GEN_INT (size))));
16454 }
16455 }
16456
16457 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16458 QImode must go into class Q_REGS.
16459 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16460 movdf to do mem-to-mem moves through integer regs. */
16461 enum reg_class
16462 ix86_preferred_reload_class (rtx x, enum reg_class class)
16463 {
16464 enum machine_mode mode = GET_MODE (x);
16465
16466 /* We're only allowed to return a subclass of CLASS. Many of the
16467 following checks fail for NO_REGS, so eliminate that early. */
16468 if (class == NO_REGS)
16469 return NO_REGS;
16470
16471 /* All classes can load zeros. */
16472 if (x == CONST0_RTX (mode))
16473 return class;
16474
16475 /* Force constants into memory if we are loading a (nonzero) constant into
16476 an MMX or SSE register. This is because there are no MMX/SSE instructions
16477 to load from a constant. */
16478 if (CONSTANT_P (x)
16479 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16480 return NO_REGS;
16481
16482 /* Prefer SSE regs only, if we can use them for math. */
16483 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16484 return SSE_CLASS_P (class) ? class : NO_REGS;
16485
16486 /* Floating-point constants need more complex checks. */
16487 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16488 {
16489 /* General regs can load everything. */
16490 if (reg_class_subset_p (class, GENERAL_REGS))
16491 return class;
16492
16493 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16494 zero above. We only want to wind up preferring 80387 registers if
16495 we plan on doing computation with them. */
16496 if (TARGET_80387
16497 && standard_80387_constant_p (x))
16498 {
16499 /* Limit class to non-sse. */
16500 if (class == FLOAT_SSE_REGS)
16501 return FLOAT_REGS;
16502 if (class == FP_TOP_SSE_REGS)
16503 return FP_TOP_REG;
16504 if (class == FP_SECOND_SSE_REGS)
16505 return FP_SECOND_REG;
16506 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16507 return class;
16508 }
16509
16510 return NO_REGS;
16511 }
16512
16513 /* Generally when we see PLUS here, it's the function invariant
16514 (plus soft-fp const_int). Which can only be computed into general
16515 regs. */
16516 if (GET_CODE (x) == PLUS)
16517 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16518
16519 /* QImode constants are easy to load, but non-constant QImode data
16520 must go into Q_REGS. */
16521 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16522 {
16523 if (reg_class_subset_p (class, Q_REGS))
16524 return class;
16525 if (reg_class_subset_p (Q_REGS, class))
16526 return Q_REGS;
16527 return NO_REGS;
16528 }
16529
16530 return class;
16531 }
16532
16533 /* Discourage putting floating-point values in SSE registers unless
16534 SSE math is being used, and likewise for the 387 registers. */
16535 enum reg_class
16536 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16537 {
16538 enum machine_mode mode = GET_MODE (x);
16539
16540 /* Restrict the output reload class to the register bank that we are doing
16541 math on. If we would like not to return a subset of CLASS, reject this
16542 alternative: if reload cannot do this, it will still use its choice. */
16543 mode = GET_MODE (x);
16544 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16545 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16546
16547 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16548 {
16549 if (class == FP_TOP_SSE_REGS)
16550 return FP_TOP_REG;
16551 else if (class == FP_SECOND_SSE_REGS)
16552 return FP_SECOND_REG;
16553 else
16554 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16555 }
16556
16557 return class;
16558 }
16559
16560 /* If we are copying between general and FP registers, we need a memory
16561 location. The same is true for SSE and MMX registers.
16562
16563 The macro can't work reliably when one of the CLASSES is class containing
16564 registers from multiple units (SSE, MMX, integer). We avoid this by never
16565 combining those units in single alternative in the machine description.
16566 Ensure that this constraint holds to avoid unexpected surprises.
16567
16568 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16569 enforce these sanity checks. */
16570
16571 int
16572 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16573 enum machine_mode mode, int strict)
16574 {
16575 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16576 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16577 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16578 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16579 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16580 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16581 {
16582 gcc_assert (!strict);
16583 return true;
16584 }
16585
16586 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16587 return true;
16588
16589 /* ??? This is a lie. We do have moves between mmx/general, and for
16590 mmx/sse2. But by saying we need secondary memory we discourage the
16591 register allocator from using the mmx registers unless needed. */
16592 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16593 return true;
16594
16595 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16596 {
16597 /* SSE1 doesn't have any direct moves from other classes. */
16598 if (!TARGET_SSE2)
16599 return true;
16600
16601 /* If the target says that inter-unit moves are more expensive
16602 than moving through memory, then don't generate them. */
16603 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16604 return true;
16605
16606 /* Between SSE and general, we have moves no larger than word size. */
16607 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16608 return true;
16609
16610 /* ??? For the cost of one register reformat penalty, we could use
16611 the same instructions to move SFmode and DFmode data, but the
16612 relevant move patterns don't support those alternatives. */
16613 if (mode == SFmode || mode == DFmode)
16614 return true;
16615 }
16616
16617 return false;
16618 }
16619
16620 /* Return true if the registers in CLASS cannot represent the change from
16621 modes FROM to TO. */
16622
16623 bool
16624 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16625 enum reg_class class)
16626 {
16627 if (from == to)
16628 return false;
16629
16630 /* x87 registers can't do subreg at all, as all values are reformatted
16631 to extended precision. */
16632 if (MAYBE_FLOAT_CLASS_P (class))
16633 return true;
16634
16635 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16636 {
16637 /* Vector registers do not support QI or HImode loads. If we don't
16638 disallow a change to these modes, reload will assume it's ok to
16639 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16640 the vec_dupv4hi pattern. */
16641 if (GET_MODE_SIZE (from) < 4)
16642 return true;
16643
16644 /* Vector registers do not support subreg with nonzero offsets, which
16645 are otherwise valid for integer registers. Since we can't see
16646 whether we have a nonzero offset from here, prohibit all
16647 nonparadoxical subregs changing size. */
16648 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16649 return true;
16650 }
16651
16652 return false;
16653 }
16654
16655 /* Return the cost of moving data from a register in class CLASS1 to
16656 one in class CLASS2.
16657
16658 It is not required that the cost always equal 2 when FROM is the same as TO;
16659 on some machines it is expensive to move between registers if they are not
16660 general registers. */
16661
16662 int
16663 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16664 enum reg_class class2)
16665 {
16666 /* In case we require secondary memory, compute cost of the store followed
16667 by load. In order to avoid bad register allocation choices, we need
16668 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16669
16670 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16671 {
16672 int cost = 1;
16673
16674 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16675 MEMORY_MOVE_COST (mode, class1, 1));
16676 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16677 MEMORY_MOVE_COST (mode, class2, 1));
16678
16679 /* In case of copying from general_purpose_register we may emit multiple
16680 stores followed by single load causing memory size mismatch stall.
16681 Count this as arbitrarily high cost of 20. */
16682 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16683 cost += 20;
16684
16685 /* In the case of FP/MMX moves, the registers actually overlap, and we
16686 have to switch modes in order to treat them differently. */
16687 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16688 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16689 cost += 20;
16690
16691 return cost;
16692 }
16693
16694 /* Moves between SSE/MMX and integer unit are expensive. */
16695 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16696 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16697 return ix86_cost->mmxsse_to_integer;
16698 if (MAYBE_FLOAT_CLASS_P (class1))
16699 return ix86_cost->fp_move;
16700 if (MAYBE_SSE_CLASS_P (class1))
16701 return ix86_cost->sse_move;
16702 if (MAYBE_MMX_CLASS_P (class1))
16703 return ix86_cost->mmx_move;
16704 return 2;
16705 }
16706
16707 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16708
16709 bool
16710 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16711 {
16712 /* Flags and only flags can only hold CCmode values. */
16713 if (CC_REGNO_P (regno))
16714 return GET_MODE_CLASS (mode) == MODE_CC;
16715 if (GET_MODE_CLASS (mode) == MODE_CC
16716 || GET_MODE_CLASS (mode) == MODE_RANDOM
16717 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16718 return 0;
16719 if (FP_REGNO_P (regno))
16720 return VALID_FP_MODE_P (mode);
16721 if (SSE_REGNO_P (regno))
16722 {
16723 /* We implement the move patterns for all vector modes into and
16724 out of SSE registers, even when no operation instructions
16725 are available. */
16726 return (VALID_SSE_REG_MODE (mode)
16727 || VALID_SSE2_REG_MODE (mode)
16728 || VALID_MMX_REG_MODE (mode)
16729 || VALID_MMX_REG_MODE_3DNOW (mode));
16730 }
16731 if (MMX_REGNO_P (regno))
16732 {
16733 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16734 so if the register is available at all, then we can move data of
16735 the given mode into or out of it. */
16736 return (VALID_MMX_REG_MODE (mode)
16737 || VALID_MMX_REG_MODE_3DNOW (mode));
16738 }
16739
16740 if (mode == QImode)
16741 {
16742 /* Take care for QImode values - they can be in non-QI regs,
16743 but then they do cause partial register stalls. */
16744 if (regno < 4 || TARGET_64BIT)
16745 return 1;
16746 if (!TARGET_PARTIAL_REG_STALL)
16747 return 1;
16748 return reload_in_progress || reload_completed;
16749 }
16750 /* We handle both integer and floats in the general purpose registers. */
16751 else if (VALID_INT_MODE_P (mode))
16752 return 1;
16753 else if (VALID_FP_MODE_P (mode))
16754 return 1;
16755 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16756 on to use that value in smaller contexts, this can easily force a
16757 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16758 supporting DImode, allow it. */
16759 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16760 return 1;
16761
16762 return 0;
16763 }
16764
16765 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16766 tieable integer mode. */
16767
16768 static bool
16769 ix86_tieable_integer_mode_p (enum machine_mode mode)
16770 {
16771 switch (mode)
16772 {
16773 case HImode:
16774 case SImode:
16775 return true;
16776
16777 case QImode:
16778 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16779
16780 case DImode:
16781 return TARGET_64BIT;
16782
16783 default:
16784 return false;
16785 }
16786 }
16787
16788 /* Return true if MODE1 is accessible in a register that can hold MODE2
16789 without copying. That is, all register classes that can hold MODE2
16790 can also hold MODE1. */
16791
16792 bool
16793 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16794 {
16795 if (mode1 == mode2)
16796 return true;
16797
16798 if (ix86_tieable_integer_mode_p (mode1)
16799 && ix86_tieable_integer_mode_p (mode2))
16800 return true;
16801
16802 /* MODE2 being XFmode implies fp stack or general regs, which means we
16803 can tie any smaller floating point modes to it. Note that we do not
16804 tie this with TFmode. */
16805 if (mode2 == XFmode)
16806 return mode1 == SFmode || mode1 == DFmode;
16807
16808 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16809 that we can tie it with SFmode. */
16810 if (mode2 == DFmode)
16811 return mode1 == SFmode;
16812
16813 /* If MODE2 is only appropriate for an SSE register, then tie with
16814 any other mode acceptable to SSE registers. */
16815 if (GET_MODE_SIZE (mode2) >= 8
16816 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16817 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16818
16819 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16820 with any other mode acceptable to MMX registers. */
16821 if (GET_MODE_SIZE (mode2) == 8
16822 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
16823 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
16824
16825 return false;
16826 }
16827
16828 /* Return the cost of moving data of mode M between a
16829 register and memory. A value of 2 is the default; this cost is
16830 relative to those in `REGISTER_MOVE_COST'.
16831
16832 If moving between registers and memory is more expensive than
16833 between two registers, you should define this macro to express the
16834 relative cost.
16835
16836 Model also increased moving costs of QImode registers in non
16837 Q_REGS classes.
16838 */
16839 int
16840 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
16841 {
16842 if (FLOAT_CLASS_P (class))
16843 {
16844 int index;
16845 switch (mode)
16846 {
16847 case SFmode:
16848 index = 0;
16849 break;
16850 case DFmode:
16851 index = 1;
16852 break;
16853 case XFmode:
16854 index = 2;
16855 break;
16856 default:
16857 return 100;
16858 }
16859 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
16860 }
16861 if (SSE_CLASS_P (class))
16862 {
16863 int index;
16864 switch (GET_MODE_SIZE (mode))
16865 {
16866 case 4:
16867 index = 0;
16868 break;
16869 case 8:
16870 index = 1;
16871 break;
16872 case 16:
16873 index = 2;
16874 break;
16875 default:
16876 return 100;
16877 }
16878 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
16879 }
16880 if (MMX_CLASS_P (class))
16881 {
16882 int index;
16883 switch (GET_MODE_SIZE (mode))
16884 {
16885 case 4:
16886 index = 0;
16887 break;
16888 case 8:
16889 index = 1;
16890 break;
16891 default:
16892 return 100;
16893 }
16894 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
16895 }
16896 switch (GET_MODE_SIZE (mode))
16897 {
16898 case 1:
16899 if (in)
16900 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
16901 : ix86_cost->movzbl_load);
16902 else
16903 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
16904 : ix86_cost->int_store[0] + 4);
16905 break;
16906 case 2:
16907 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
16908 default:
16909 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
16910 if (mode == TFmode)
16911 mode = XFmode;
16912 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
16913 * (((int) GET_MODE_SIZE (mode)
16914 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
16915 }
16916 }
16917
16918 /* Compute a (partial) cost for rtx X. Return true if the complete
16919 cost has been computed, and false if subexpressions should be
16920 scanned. In either case, *TOTAL contains the cost result. */
16921
16922 static bool
16923 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
16924 {
16925 enum machine_mode mode = GET_MODE (x);
16926
16927 switch (code)
16928 {
16929 case CONST_INT:
16930 case CONST:
16931 case LABEL_REF:
16932 case SYMBOL_REF:
16933 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
16934 *total = 3;
16935 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
16936 *total = 2;
16937 else if (flag_pic && SYMBOLIC_CONST (x)
16938 && (!TARGET_64BIT
16939 || (!GET_CODE (x) != LABEL_REF
16940 && (GET_CODE (x) != SYMBOL_REF
16941 || !SYMBOL_REF_LOCAL_P (x)))))
16942 *total = 1;
16943 else
16944 *total = 0;
16945 return true;
16946
16947 case CONST_DOUBLE:
16948 if (mode == VOIDmode)
16949 *total = 0;
16950 else
16951 switch (standard_80387_constant_p (x))
16952 {
16953 case 1: /* 0.0 */
16954 *total = 1;
16955 break;
16956 default: /* Other constants */
16957 *total = 2;
16958 break;
16959 case 0:
16960 case -1:
16961 /* Start with (MEM (SYMBOL_REF)), since that's where
16962 it'll probably end up. Add a penalty for size. */
16963 *total = (COSTS_N_INSNS (1)
16964 + (flag_pic != 0 && !TARGET_64BIT)
16965 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
16966 break;
16967 }
16968 return true;
16969
16970 case ZERO_EXTEND:
16971 /* The zero extensions is often completely free on x86_64, so make
16972 it as cheap as possible. */
16973 if (TARGET_64BIT && mode == DImode
16974 && GET_MODE (XEXP (x, 0)) == SImode)
16975 *total = 1;
16976 else if (TARGET_ZERO_EXTEND_WITH_AND)
16977 *total = ix86_cost->add;
16978 else
16979 *total = ix86_cost->movzx;
16980 return false;
16981
16982 case SIGN_EXTEND:
16983 *total = ix86_cost->movsx;
16984 return false;
16985
16986 case ASHIFT:
16987 if (GET_CODE (XEXP (x, 1)) == CONST_INT
16988 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
16989 {
16990 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16991 if (value == 1)
16992 {
16993 *total = ix86_cost->add;
16994 return false;
16995 }
16996 if ((value == 2 || value == 3)
16997 && ix86_cost->lea <= ix86_cost->shift_const)
16998 {
16999 *total = ix86_cost->lea;
17000 return false;
17001 }
17002 }
17003 /* FALLTHRU */
17004
17005 case ROTATE:
17006 case ASHIFTRT:
17007 case LSHIFTRT:
17008 case ROTATERT:
17009 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17010 {
17011 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17012 {
17013 if (INTVAL (XEXP (x, 1)) > 32)
17014 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17015 else
17016 *total = ix86_cost->shift_const * 2;
17017 }
17018 else
17019 {
17020 if (GET_CODE (XEXP (x, 1)) == AND)
17021 *total = ix86_cost->shift_var * 2;
17022 else
17023 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17024 }
17025 }
17026 else
17027 {
17028 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17029 *total = ix86_cost->shift_const;
17030 else
17031 *total = ix86_cost->shift_var;
17032 }
17033 return false;
17034
17035 case MULT:
17036 if (FLOAT_MODE_P (mode))
17037 {
17038 *total = ix86_cost->fmul;
17039 return false;
17040 }
17041 else
17042 {
17043 rtx op0 = XEXP (x, 0);
17044 rtx op1 = XEXP (x, 1);
17045 int nbits;
17046 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17047 {
17048 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17049 for (nbits = 0; value != 0; value &= value - 1)
17050 nbits++;
17051 }
17052 else
17053 /* This is arbitrary. */
17054 nbits = 7;
17055
17056 /* Compute costs correctly for widening multiplication. */
17057 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17058 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17059 == GET_MODE_SIZE (mode))
17060 {
17061 int is_mulwiden = 0;
17062 enum machine_mode inner_mode = GET_MODE (op0);
17063
17064 if (GET_CODE (op0) == GET_CODE (op1))
17065 is_mulwiden = 1, op1 = XEXP (op1, 0);
17066 else if (GET_CODE (op1) == CONST_INT)
17067 {
17068 if (GET_CODE (op0) == SIGN_EXTEND)
17069 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17070 == INTVAL (op1);
17071 else
17072 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17073 }
17074
17075 if (is_mulwiden)
17076 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17077 }
17078
17079 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17080 + nbits * ix86_cost->mult_bit
17081 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17082
17083 return true;
17084 }
17085
17086 case DIV:
17087 case UDIV:
17088 case MOD:
17089 case UMOD:
17090 if (FLOAT_MODE_P (mode))
17091 *total = ix86_cost->fdiv;
17092 else
17093 *total = ix86_cost->divide[MODE_INDEX (mode)];
17094 return false;
17095
17096 case PLUS:
17097 if (FLOAT_MODE_P (mode))
17098 *total = ix86_cost->fadd;
17099 else if (GET_MODE_CLASS (mode) == MODE_INT
17100 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17101 {
17102 if (GET_CODE (XEXP (x, 0)) == PLUS
17103 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17104 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17105 && CONSTANT_P (XEXP (x, 1)))
17106 {
17107 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17108 if (val == 2 || val == 4 || val == 8)
17109 {
17110 *total = ix86_cost->lea;
17111 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17112 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17113 outer_code);
17114 *total += rtx_cost (XEXP (x, 1), outer_code);
17115 return true;
17116 }
17117 }
17118 else if (GET_CODE (XEXP (x, 0)) == MULT
17119 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17120 {
17121 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17122 if (val == 2 || val == 4 || val == 8)
17123 {
17124 *total = ix86_cost->lea;
17125 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17126 *total += rtx_cost (XEXP (x, 1), outer_code);
17127 return true;
17128 }
17129 }
17130 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17131 {
17132 *total = ix86_cost->lea;
17133 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17134 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17135 *total += rtx_cost (XEXP (x, 1), outer_code);
17136 return true;
17137 }
17138 }
17139 /* FALLTHRU */
17140
17141 case MINUS:
17142 if (FLOAT_MODE_P (mode))
17143 {
17144 *total = ix86_cost->fadd;
17145 return false;
17146 }
17147 /* FALLTHRU */
17148
17149 case AND:
17150 case IOR:
17151 case XOR:
17152 if (!TARGET_64BIT && mode == DImode)
17153 {
17154 *total = (ix86_cost->add * 2
17155 + (rtx_cost (XEXP (x, 0), outer_code)
17156 << (GET_MODE (XEXP (x, 0)) != DImode))
17157 + (rtx_cost (XEXP (x, 1), outer_code)
17158 << (GET_MODE (XEXP (x, 1)) != DImode)));
17159 return true;
17160 }
17161 /* FALLTHRU */
17162
17163 case NEG:
17164 if (FLOAT_MODE_P (mode))
17165 {
17166 *total = ix86_cost->fchs;
17167 return false;
17168 }
17169 /* FALLTHRU */
17170
17171 case NOT:
17172 if (!TARGET_64BIT && mode == DImode)
17173 *total = ix86_cost->add * 2;
17174 else
17175 *total = ix86_cost->add;
17176 return false;
17177
17178 case COMPARE:
17179 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17180 && XEXP (XEXP (x, 0), 1) == const1_rtx
17181 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17182 && XEXP (x, 1) == const0_rtx)
17183 {
17184 /* This kind of construct is implemented using test[bwl].
17185 Treat it as if we had an AND. */
17186 *total = (ix86_cost->add
17187 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17188 + rtx_cost (const1_rtx, outer_code));
17189 return true;
17190 }
17191 return false;
17192
17193 case FLOAT_EXTEND:
17194 if (!TARGET_SSE_MATH
17195 || mode == XFmode
17196 || (mode == DFmode && !TARGET_SSE2))
17197 /* For standard 80387 constants, raise the cost to prevent
17198 compress_float_constant() to generate load from memory. */
17199 switch (standard_80387_constant_p (XEXP (x, 0)))
17200 {
17201 case -1:
17202 case 0:
17203 *total = 0;
17204 break;
17205 case 1: /* 0.0 */
17206 *total = 1;
17207 break;
17208 default:
17209 *total = (x86_ext_80387_constants & TUNEMASK
17210 || optimize_size
17211 ? 1 : 0);
17212 }
17213 return false;
17214
17215 case ABS:
17216 if (FLOAT_MODE_P (mode))
17217 *total = ix86_cost->fabs;
17218 return false;
17219
17220 case SQRT:
17221 if (FLOAT_MODE_P (mode))
17222 *total = ix86_cost->fsqrt;
17223 return false;
17224
17225 case UNSPEC:
17226 if (XINT (x, 1) == UNSPEC_TP)
17227 *total = 0;
17228 return false;
17229
17230 default:
17231 return false;
17232 }
17233 }
17234
17235 #if TARGET_MACHO
17236
17237 static int current_machopic_label_num;
17238
17239 /* Given a symbol name and its associated stub, write out the
17240 definition of the stub. */
17241
17242 void
17243 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17244 {
17245 unsigned int length;
17246 char *binder_name, *symbol_name, lazy_ptr_name[32];
17247 int label = ++current_machopic_label_num;
17248
17249 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17250 symb = (*targetm.strip_name_encoding) (symb);
17251
17252 length = strlen (stub);
17253 binder_name = alloca (length + 32);
17254 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17255
17256 length = strlen (symb);
17257 symbol_name = alloca (length + 32);
17258 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17259
17260 sprintf (lazy_ptr_name, "L%d$lz", label);
17261
17262 if (MACHOPIC_PURE)
17263 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17264 else
17265 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17266
17267 fprintf (file, "%s:\n", stub);
17268 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17269
17270 if (MACHOPIC_PURE)
17271 {
17272 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17273 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17274 fprintf (file, "\tjmp\t*%%edx\n");
17275 }
17276 else
17277 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17278
17279 fprintf (file, "%s:\n", binder_name);
17280
17281 if (MACHOPIC_PURE)
17282 {
17283 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17284 fprintf (file, "\tpushl\t%%eax\n");
17285 }
17286 else
17287 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17288
17289 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17290
17291 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17292 fprintf (file, "%s:\n", lazy_ptr_name);
17293 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17294 fprintf (file, "\t.long %s\n", binder_name);
17295 }
17296
17297 void
17298 darwin_x86_file_end (void)
17299 {
17300 darwin_file_end ();
17301 ix86_file_end ();
17302 }
17303 #endif /* TARGET_MACHO */
17304
17305 /* Order the registers for register allocator. */
17306
17307 void
17308 x86_order_regs_for_local_alloc (void)
17309 {
17310 int pos = 0;
17311 int i;
17312
17313 /* First allocate the local general purpose registers. */
17314 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17315 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17316 reg_alloc_order [pos++] = i;
17317
17318 /* Global general purpose registers. */
17319 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17320 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17321 reg_alloc_order [pos++] = i;
17322
17323 /* x87 registers come first in case we are doing FP math
17324 using them. */
17325 if (!TARGET_SSE_MATH)
17326 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17327 reg_alloc_order [pos++] = i;
17328
17329 /* SSE registers. */
17330 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17331 reg_alloc_order [pos++] = i;
17332 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17333 reg_alloc_order [pos++] = i;
17334
17335 /* x87 registers. */
17336 if (TARGET_SSE_MATH)
17337 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17338 reg_alloc_order [pos++] = i;
17339
17340 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17341 reg_alloc_order [pos++] = i;
17342
17343 /* Initialize the rest of array as we do not allocate some registers
17344 at all. */
17345 while (pos < FIRST_PSEUDO_REGISTER)
17346 reg_alloc_order [pos++] = 0;
17347 }
17348
17349 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17350 struct attribute_spec.handler. */
17351 static tree
17352 ix86_handle_struct_attribute (tree *node, tree name,
17353 tree args ATTRIBUTE_UNUSED,
17354 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17355 {
17356 tree *type = NULL;
17357 if (DECL_P (*node))
17358 {
17359 if (TREE_CODE (*node) == TYPE_DECL)
17360 type = &TREE_TYPE (*node);
17361 }
17362 else
17363 type = node;
17364
17365 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17366 || TREE_CODE (*type) == UNION_TYPE)))
17367 {
17368 warning (OPT_Wattributes, "%qs attribute ignored",
17369 IDENTIFIER_POINTER (name));
17370 *no_add_attrs = true;
17371 }
17372
17373 else if ((is_attribute_p ("ms_struct", name)
17374 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17375 || ((is_attribute_p ("gcc_struct", name)
17376 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17377 {
17378 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17379 IDENTIFIER_POINTER (name));
17380 *no_add_attrs = true;
17381 }
17382
17383 return NULL_TREE;
17384 }
17385
17386 static bool
17387 ix86_ms_bitfield_layout_p (tree record_type)
17388 {
17389 return (TARGET_MS_BITFIELD_LAYOUT &&
17390 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17391 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17392 }
17393
17394 /* Returns an expression indicating where the this parameter is
17395 located on entry to the FUNCTION. */
17396
17397 static rtx
17398 x86_this_parameter (tree function)
17399 {
17400 tree type = TREE_TYPE (function);
17401
17402 if (TARGET_64BIT)
17403 {
17404 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17405 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17406 }
17407
17408 if (ix86_function_regparm (type, function) > 0)
17409 {
17410 tree parm;
17411
17412 parm = TYPE_ARG_TYPES (type);
17413 /* Figure out whether or not the function has a variable number of
17414 arguments. */
17415 for (; parm; parm = TREE_CHAIN (parm))
17416 if (TREE_VALUE (parm) == void_type_node)
17417 break;
17418 /* If not, the this parameter is in the first argument. */
17419 if (parm)
17420 {
17421 int regno = 0;
17422 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17423 regno = 2;
17424 return gen_rtx_REG (SImode, regno);
17425 }
17426 }
17427
17428 if (aggregate_value_p (TREE_TYPE (type), type))
17429 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17430 else
17431 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17432 }
17433
17434 /* Determine whether x86_output_mi_thunk can succeed. */
17435
17436 static bool
17437 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17438 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17439 HOST_WIDE_INT vcall_offset, tree function)
17440 {
17441 /* 64-bit can handle anything. */
17442 if (TARGET_64BIT)
17443 return true;
17444
17445 /* For 32-bit, everything's fine if we have one free register. */
17446 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17447 return true;
17448
17449 /* Need a free register for vcall_offset. */
17450 if (vcall_offset)
17451 return false;
17452
17453 /* Need a free register for GOT references. */
17454 if (flag_pic && !(*targetm.binds_local_p) (function))
17455 return false;
17456
17457 /* Otherwise ok. */
17458 return true;
17459 }
17460
17461 /* Output the assembler code for a thunk function. THUNK_DECL is the
17462 declaration for the thunk function itself, FUNCTION is the decl for
17463 the target function. DELTA is an immediate constant offset to be
17464 added to THIS. If VCALL_OFFSET is nonzero, the word at
17465 *(*this + vcall_offset) should be added to THIS. */
17466
17467 static void
17468 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17469 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17470 HOST_WIDE_INT vcall_offset, tree function)
17471 {
17472 rtx xops[3];
17473 rtx this = x86_this_parameter (function);
17474 rtx this_reg, tmp;
17475
17476 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17477 pull it in now and let DELTA benefit. */
17478 if (REG_P (this))
17479 this_reg = this;
17480 else if (vcall_offset)
17481 {
17482 /* Put the this parameter into %eax. */
17483 xops[0] = this;
17484 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17485 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17486 }
17487 else
17488 this_reg = NULL_RTX;
17489
17490 /* Adjust the this parameter by a fixed constant. */
17491 if (delta)
17492 {
17493 xops[0] = GEN_INT (delta);
17494 xops[1] = this_reg ? this_reg : this;
17495 if (TARGET_64BIT)
17496 {
17497 if (!x86_64_general_operand (xops[0], DImode))
17498 {
17499 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17500 xops[1] = tmp;
17501 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17502 xops[0] = tmp;
17503 xops[1] = this;
17504 }
17505 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17506 }
17507 else
17508 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17509 }
17510
17511 /* Adjust the this parameter by a value stored in the vtable. */
17512 if (vcall_offset)
17513 {
17514 if (TARGET_64BIT)
17515 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17516 else
17517 {
17518 int tmp_regno = 2 /* ECX */;
17519 if (lookup_attribute ("fastcall",
17520 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17521 tmp_regno = 0 /* EAX */;
17522 tmp = gen_rtx_REG (SImode, tmp_regno);
17523 }
17524
17525 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17526 xops[1] = tmp;
17527 if (TARGET_64BIT)
17528 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17529 else
17530 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17531
17532 /* Adjust the this parameter. */
17533 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17534 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17535 {
17536 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17537 xops[0] = GEN_INT (vcall_offset);
17538 xops[1] = tmp2;
17539 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17540 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17541 }
17542 xops[1] = this_reg;
17543 if (TARGET_64BIT)
17544 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17545 else
17546 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17547 }
17548
17549 /* If necessary, drop THIS back to its stack slot. */
17550 if (this_reg && this_reg != this)
17551 {
17552 xops[0] = this_reg;
17553 xops[1] = this;
17554 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17555 }
17556
17557 xops[0] = XEXP (DECL_RTL (function), 0);
17558 if (TARGET_64BIT)
17559 {
17560 if (!flag_pic || (*targetm.binds_local_p) (function))
17561 output_asm_insn ("jmp\t%P0", xops);
17562 else
17563 {
17564 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17565 tmp = gen_rtx_CONST (Pmode, tmp);
17566 tmp = gen_rtx_MEM (QImode, tmp);
17567 xops[0] = tmp;
17568 output_asm_insn ("jmp\t%A0", xops);
17569 }
17570 }
17571 else
17572 {
17573 if (!flag_pic || (*targetm.binds_local_p) (function))
17574 output_asm_insn ("jmp\t%P0", xops);
17575 else
17576 #if TARGET_MACHO
17577 if (TARGET_MACHO)
17578 {
17579 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17580 tmp = (gen_rtx_SYMBOL_REF
17581 (Pmode,
17582 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17583 tmp = gen_rtx_MEM (QImode, tmp);
17584 xops[0] = tmp;
17585 output_asm_insn ("jmp\t%0", xops);
17586 }
17587 else
17588 #endif /* TARGET_MACHO */
17589 {
17590 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17591 output_set_got (tmp, NULL_RTX);
17592
17593 xops[1] = tmp;
17594 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17595 output_asm_insn ("jmp\t{*}%1", xops);
17596 }
17597 }
17598 }
17599
17600 static void
17601 x86_file_start (void)
17602 {
17603 default_file_start ();
17604 #if TARGET_MACHO
17605 darwin_file_start ();
17606 #endif
17607 if (X86_FILE_START_VERSION_DIRECTIVE)
17608 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17609 if (X86_FILE_START_FLTUSED)
17610 fputs ("\t.global\t__fltused\n", asm_out_file);
17611 if (ix86_asm_dialect == ASM_INTEL)
17612 fputs ("\t.intel_syntax\n", asm_out_file);
17613 }
17614
17615 int
17616 x86_field_alignment (tree field, int computed)
17617 {
17618 enum machine_mode mode;
17619 tree type = TREE_TYPE (field);
17620
17621 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17622 return computed;
17623 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17624 ? get_inner_array_type (type) : type);
17625 if (mode == DFmode || mode == DCmode
17626 || GET_MODE_CLASS (mode) == MODE_INT
17627 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17628 return MIN (32, computed);
17629 return computed;
17630 }
17631
17632 /* Output assembler code to FILE to increment profiler label # LABELNO
17633 for profiling a function entry. */
17634 void
17635 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17636 {
17637 if (TARGET_64BIT)
17638 if (flag_pic)
17639 {
17640 #ifndef NO_PROFILE_COUNTERS
17641 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17642 #endif
17643 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17644 }
17645 else
17646 {
17647 #ifndef NO_PROFILE_COUNTERS
17648 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17649 #endif
17650 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17651 }
17652 else if (flag_pic)
17653 {
17654 #ifndef NO_PROFILE_COUNTERS
17655 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17656 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17657 #endif
17658 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17659 }
17660 else
17661 {
17662 #ifndef NO_PROFILE_COUNTERS
17663 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17664 PROFILE_COUNT_REGISTER);
17665 #endif
17666 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17667 }
17668 }
17669
17670 /* We don't have exact information about the insn sizes, but we may assume
17671 quite safely that we are informed about all 1 byte insns and memory
17672 address sizes. This is enough to eliminate unnecessary padding in
17673 99% of cases. */
17674
17675 static int
17676 min_insn_size (rtx insn)
17677 {
17678 int l = 0;
17679
17680 if (!INSN_P (insn) || !active_insn_p (insn))
17681 return 0;
17682
17683 /* Discard alignments we've emit and jump instructions. */
17684 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17685 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17686 return 0;
17687 if (GET_CODE (insn) == JUMP_INSN
17688 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17689 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17690 return 0;
17691
17692 /* Important case - calls are always 5 bytes.
17693 It is common to have many calls in the row. */
17694 if (GET_CODE (insn) == CALL_INSN
17695 && symbolic_reference_mentioned_p (PATTERN (insn))
17696 && !SIBLING_CALL_P (insn))
17697 return 5;
17698 if (get_attr_length (insn) <= 1)
17699 return 1;
17700
17701 /* For normal instructions we may rely on the sizes of addresses
17702 and the presence of symbol to require 4 bytes of encoding.
17703 This is not the case for jumps where references are PC relative. */
17704 if (GET_CODE (insn) != JUMP_INSN)
17705 {
17706 l = get_attr_length_address (insn);
17707 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17708 l = 4;
17709 }
17710 if (l)
17711 return 1+l;
17712 else
17713 return 2;
17714 }
17715
17716 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17717 window. */
17718
17719 static void
17720 ix86_avoid_jump_misspredicts (void)
17721 {
17722 rtx insn, start = get_insns ();
17723 int nbytes = 0, njumps = 0;
17724 int isjump = 0;
17725
17726 /* Look for all minimal intervals of instructions containing 4 jumps.
17727 The intervals are bounded by START and INSN. NBYTES is the total
17728 size of instructions in the interval including INSN and not including
17729 START. When the NBYTES is smaller than 16 bytes, it is possible
17730 that the end of START and INSN ends up in the same 16byte page.
17731
17732 The smallest offset in the page INSN can start is the case where START
17733 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17734 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17735 */
17736 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17737 {
17738
17739 nbytes += min_insn_size (insn);
17740 if (dump_file)
17741 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17742 INSN_UID (insn), min_insn_size (insn));
17743 if ((GET_CODE (insn) == JUMP_INSN
17744 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17745 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17746 || GET_CODE (insn) == CALL_INSN)
17747 njumps++;
17748 else
17749 continue;
17750
17751 while (njumps > 3)
17752 {
17753 start = NEXT_INSN (start);
17754 if ((GET_CODE (start) == JUMP_INSN
17755 && GET_CODE (PATTERN (start)) != ADDR_VEC
17756 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17757 || GET_CODE (start) == CALL_INSN)
17758 njumps--, isjump = 1;
17759 else
17760 isjump = 0;
17761 nbytes -= min_insn_size (start);
17762 }
17763 gcc_assert (njumps >= 0);
17764 if (dump_file)
17765 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17766 INSN_UID (start), INSN_UID (insn), nbytes);
17767
17768 if (njumps == 3 && isjump && nbytes < 16)
17769 {
17770 int padsize = 15 - nbytes + min_insn_size (insn);
17771
17772 if (dump_file)
17773 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17774 INSN_UID (insn), padsize);
17775 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17776 }
17777 }
17778 }
17779
17780 /* AMD Athlon works faster
17781 when RET is not destination of conditional jump or directly preceded
17782 by other jump instruction. We avoid the penalty by inserting NOP just
17783 before the RET instructions in such cases. */
17784 static void
17785 ix86_pad_returns (void)
17786 {
17787 edge e;
17788 edge_iterator ei;
17789
17790 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17791 {
17792 basic_block bb = e->src;
17793 rtx ret = BB_END (bb);
17794 rtx prev;
17795 bool replace = false;
17796
17797 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17798 || !maybe_hot_bb_p (bb))
17799 continue;
17800 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17801 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17802 break;
17803 if (prev && GET_CODE (prev) == CODE_LABEL)
17804 {
17805 edge e;
17806 edge_iterator ei;
17807
17808 FOR_EACH_EDGE (e, ei, bb->preds)
17809 if (EDGE_FREQUENCY (e) && e->src->index >= 0
17810 && !(e->flags & EDGE_FALLTHRU))
17811 replace = true;
17812 }
17813 if (!replace)
17814 {
17815 prev = prev_active_insn (ret);
17816 if (prev
17817 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17818 || GET_CODE (prev) == CALL_INSN))
17819 replace = true;
17820 /* Empty functions get branch mispredict even when the jump destination
17821 is not visible to us. */
17822 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
17823 replace = true;
17824 }
17825 if (replace)
17826 {
17827 emit_insn_before (gen_return_internal_long (), ret);
17828 delete_insn (ret);
17829 }
17830 }
17831 }
17832
17833 /* Implement machine specific optimizations. We implement padding of returns
17834 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
17835 static void
17836 ix86_reorg (void)
17837 {
17838 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
17839 ix86_pad_returns ();
17840 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
17841 ix86_avoid_jump_misspredicts ();
17842 }
17843
17844 /* Return nonzero when QImode register that must be represented via REX prefix
17845 is used. */
17846 bool
17847 x86_extended_QIreg_mentioned_p (rtx insn)
17848 {
17849 int i;
17850 extract_insn_cached (insn);
17851 for (i = 0; i < recog_data.n_operands; i++)
17852 if (REG_P (recog_data.operand[i])
17853 && REGNO (recog_data.operand[i]) >= 4)
17854 return true;
17855 return false;
17856 }
17857
17858 /* Return nonzero when P points to register encoded via REX prefix.
17859 Called via for_each_rtx. */
17860 static int
17861 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
17862 {
17863 unsigned int regno;
17864 if (!REG_P (*p))
17865 return 0;
17866 regno = REGNO (*p);
17867 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
17868 }
17869
17870 /* Return true when INSN mentions register that must be encoded using REX
17871 prefix. */
17872 bool
17873 x86_extended_reg_mentioned_p (rtx insn)
17874 {
17875 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
17876 }
17877
17878 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
17879 optabs would emit if we didn't have TFmode patterns. */
17880
17881 void
17882 x86_emit_floatuns (rtx operands[2])
17883 {
17884 rtx neglab, donelab, i0, i1, f0, in, out;
17885 enum machine_mode mode, inmode;
17886
17887 inmode = GET_MODE (operands[1]);
17888 gcc_assert (inmode == SImode || inmode == DImode);
17889
17890 out = operands[0];
17891 in = force_reg (inmode, operands[1]);
17892 mode = GET_MODE (out);
17893 neglab = gen_label_rtx ();
17894 donelab = gen_label_rtx ();
17895 i1 = gen_reg_rtx (Pmode);
17896 f0 = gen_reg_rtx (mode);
17897
17898 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
17899
17900 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
17901 emit_jump_insn (gen_jump (donelab));
17902 emit_barrier ();
17903
17904 emit_label (neglab);
17905
17906 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17907 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17908 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
17909 expand_float (f0, i0, 0);
17910 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
17911
17912 emit_label (donelab);
17913 }
17914 \f
17915 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17916 with all elements equal to VAR. Return true if successful. */
17917
17918 static bool
17919 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
17920 rtx target, rtx val)
17921 {
17922 enum machine_mode smode, wsmode, wvmode;
17923 rtx x;
17924
17925 switch (mode)
17926 {
17927 case V2SImode:
17928 case V2SFmode:
17929 if (!mmx_ok && !TARGET_SSE)
17930 return false;
17931 /* FALLTHRU */
17932
17933 case V2DFmode:
17934 case V2DImode:
17935 case V4SFmode:
17936 case V4SImode:
17937 val = force_reg (GET_MODE_INNER (mode), val);
17938 x = gen_rtx_VEC_DUPLICATE (mode, val);
17939 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17940 return true;
17941
17942 case V4HImode:
17943 if (!mmx_ok)
17944 return false;
17945 if (TARGET_SSE || TARGET_3DNOW_A)
17946 {
17947 val = gen_lowpart (SImode, val);
17948 x = gen_rtx_TRUNCATE (HImode, val);
17949 x = gen_rtx_VEC_DUPLICATE (mode, x);
17950 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17951 return true;
17952 }
17953 else
17954 {
17955 smode = HImode;
17956 wsmode = SImode;
17957 wvmode = V2SImode;
17958 goto widen;
17959 }
17960
17961 case V8QImode:
17962 if (!mmx_ok)
17963 return false;
17964 smode = QImode;
17965 wsmode = HImode;
17966 wvmode = V4HImode;
17967 goto widen;
17968 case V8HImode:
17969 if (TARGET_SSE2)
17970 {
17971 rtx tmp1, tmp2;
17972 /* Extend HImode to SImode using a paradoxical SUBREG. */
17973 tmp1 = gen_reg_rtx (SImode);
17974 emit_move_insn (tmp1, gen_lowpart (SImode, val));
17975 /* Insert the SImode value as low element of V4SImode vector. */
17976 tmp2 = gen_reg_rtx (V4SImode);
17977 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
17978 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
17979 CONST0_RTX (V4SImode),
17980 const1_rtx);
17981 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
17982 /* Cast the V4SImode vector back to a V8HImode vector. */
17983 tmp1 = gen_reg_rtx (V8HImode);
17984 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
17985 /* Duplicate the low short through the whole low SImode word. */
17986 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
17987 /* Cast the V8HImode vector back to a V4SImode vector. */
17988 tmp2 = gen_reg_rtx (V4SImode);
17989 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
17990 /* Replicate the low element of the V4SImode vector. */
17991 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
17992 /* Cast the V2SImode back to V8HImode, and store in target. */
17993 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
17994 return true;
17995 }
17996 smode = HImode;
17997 wsmode = SImode;
17998 wvmode = V4SImode;
17999 goto widen;
18000 case V16QImode:
18001 if (TARGET_SSE2)
18002 {
18003 rtx tmp1, tmp2;
18004 /* Extend QImode to SImode using a paradoxical SUBREG. */
18005 tmp1 = gen_reg_rtx (SImode);
18006 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18007 /* Insert the SImode value as low element of V4SImode vector. */
18008 tmp2 = gen_reg_rtx (V4SImode);
18009 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18010 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18011 CONST0_RTX (V4SImode),
18012 const1_rtx);
18013 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18014 /* Cast the V4SImode vector back to a V16QImode vector. */
18015 tmp1 = gen_reg_rtx (V16QImode);
18016 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18017 /* Duplicate the low byte through the whole low SImode word. */
18018 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18019 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18020 /* Cast the V16QImode vector back to a V4SImode vector. */
18021 tmp2 = gen_reg_rtx (V4SImode);
18022 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18023 /* Replicate the low element of the V4SImode vector. */
18024 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18025 /* Cast the V2SImode back to V16QImode, and store in target. */
18026 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18027 return true;
18028 }
18029 smode = QImode;
18030 wsmode = HImode;
18031 wvmode = V8HImode;
18032 goto widen;
18033 widen:
18034 /* Replicate the value once into the next wider mode and recurse. */
18035 val = convert_modes (wsmode, smode, val, true);
18036 x = expand_simple_binop (wsmode, ASHIFT, val,
18037 GEN_INT (GET_MODE_BITSIZE (smode)),
18038 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18039 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18040
18041 x = gen_reg_rtx (wvmode);
18042 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18043 gcc_unreachable ();
18044 emit_move_insn (target, gen_lowpart (mode, x));
18045 return true;
18046
18047 default:
18048 return false;
18049 }
18050 }
18051
18052 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18053 whose ONE_VAR element is VAR, and other elements are zero. Return true
18054 if successful. */
18055
18056 static bool
18057 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18058 rtx target, rtx var, int one_var)
18059 {
18060 enum machine_mode vsimode;
18061 rtx new_target;
18062 rtx x, tmp;
18063
18064 switch (mode)
18065 {
18066 case V2SFmode:
18067 case V2SImode:
18068 if (!mmx_ok && !TARGET_SSE)
18069 return false;
18070 /* FALLTHRU */
18071
18072 case V2DFmode:
18073 case V2DImode:
18074 if (one_var != 0)
18075 return false;
18076 var = force_reg (GET_MODE_INNER (mode), var);
18077 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18078 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18079 return true;
18080
18081 case V4SFmode:
18082 case V4SImode:
18083 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18084 new_target = gen_reg_rtx (mode);
18085 else
18086 new_target = target;
18087 var = force_reg (GET_MODE_INNER (mode), var);
18088 x = gen_rtx_VEC_DUPLICATE (mode, var);
18089 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18090 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18091 if (one_var != 0)
18092 {
18093 /* We need to shuffle the value to the correct position, so
18094 create a new pseudo to store the intermediate result. */
18095
18096 /* With SSE2, we can use the integer shuffle insns. */
18097 if (mode != V4SFmode && TARGET_SSE2)
18098 {
18099 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18100 GEN_INT (1),
18101 GEN_INT (one_var == 1 ? 0 : 1),
18102 GEN_INT (one_var == 2 ? 0 : 1),
18103 GEN_INT (one_var == 3 ? 0 : 1)));
18104 if (target != new_target)
18105 emit_move_insn (target, new_target);
18106 return true;
18107 }
18108
18109 /* Otherwise convert the intermediate result to V4SFmode and
18110 use the SSE1 shuffle instructions. */
18111 if (mode != V4SFmode)
18112 {
18113 tmp = gen_reg_rtx (V4SFmode);
18114 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18115 }
18116 else
18117 tmp = new_target;
18118
18119 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18120 GEN_INT (1),
18121 GEN_INT (one_var == 1 ? 0 : 1),
18122 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18123 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18124
18125 if (mode != V4SFmode)
18126 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18127 else if (tmp != target)
18128 emit_move_insn (target, tmp);
18129 }
18130 else if (target != new_target)
18131 emit_move_insn (target, new_target);
18132 return true;
18133
18134 case V8HImode:
18135 case V16QImode:
18136 vsimode = V4SImode;
18137 goto widen;
18138 case V4HImode:
18139 case V8QImode:
18140 if (!mmx_ok)
18141 return false;
18142 vsimode = V2SImode;
18143 goto widen;
18144 widen:
18145 if (one_var != 0)
18146 return false;
18147
18148 /* Zero extend the variable element to SImode and recurse. */
18149 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18150
18151 x = gen_reg_rtx (vsimode);
18152 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18153 var, one_var))
18154 gcc_unreachable ();
18155
18156 emit_move_insn (target, gen_lowpart (mode, x));
18157 return true;
18158
18159 default:
18160 return false;
18161 }
18162 }
18163
18164 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18165 consisting of the values in VALS. It is known that all elements
18166 except ONE_VAR are constants. Return true if successful. */
18167
18168 static bool
18169 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18170 rtx target, rtx vals, int one_var)
18171 {
18172 rtx var = XVECEXP (vals, 0, one_var);
18173 enum machine_mode wmode;
18174 rtx const_vec, x;
18175
18176 const_vec = copy_rtx (vals);
18177 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18178 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18179
18180 switch (mode)
18181 {
18182 case V2DFmode:
18183 case V2DImode:
18184 case V2SFmode:
18185 case V2SImode:
18186 /* For the two element vectors, it's just as easy to use
18187 the general case. */
18188 return false;
18189
18190 case V4SFmode:
18191 case V4SImode:
18192 case V8HImode:
18193 case V4HImode:
18194 break;
18195
18196 case V16QImode:
18197 wmode = V8HImode;
18198 goto widen;
18199 case V8QImode:
18200 wmode = V4HImode;
18201 goto widen;
18202 widen:
18203 /* There's no way to set one QImode entry easily. Combine
18204 the variable value with its adjacent constant value, and
18205 promote to an HImode set. */
18206 x = XVECEXP (vals, 0, one_var ^ 1);
18207 if (one_var & 1)
18208 {
18209 var = convert_modes (HImode, QImode, var, true);
18210 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18211 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18212 x = GEN_INT (INTVAL (x) & 0xff);
18213 }
18214 else
18215 {
18216 var = convert_modes (HImode, QImode, var, true);
18217 x = gen_int_mode (INTVAL (x) << 8, HImode);
18218 }
18219 if (x != const0_rtx)
18220 var = expand_simple_binop (HImode, IOR, var, x, var,
18221 1, OPTAB_LIB_WIDEN);
18222
18223 x = gen_reg_rtx (wmode);
18224 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18225 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18226
18227 emit_move_insn (target, gen_lowpart (mode, x));
18228 return true;
18229
18230 default:
18231 return false;
18232 }
18233
18234 emit_move_insn (target, const_vec);
18235 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18236 return true;
18237 }
18238
18239 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18240 all values variable, and none identical. */
18241
18242 static void
18243 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18244 rtx target, rtx vals)
18245 {
18246 enum machine_mode half_mode = GET_MODE_INNER (mode);
18247 rtx op0 = NULL, op1 = NULL;
18248 bool use_vec_concat = false;
18249
18250 switch (mode)
18251 {
18252 case V2SFmode:
18253 case V2SImode:
18254 if (!mmx_ok && !TARGET_SSE)
18255 break;
18256 /* FALLTHRU */
18257
18258 case V2DFmode:
18259 case V2DImode:
18260 /* For the two element vectors, we always implement VEC_CONCAT. */
18261 op0 = XVECEXP (vals, 0, 0);
18262 op1 = XVECEXP (vals, 0, 1);
18263 use_vec_concat = true;
18264 break;
18265
18266 case V4SFmode:
18267 half_mode = V2SFmode;
18268 goto half;
18269 case V4SImode:
18270 half_mode = V2SImode;
18271 goto half;
18272 half:
18273 {
18274 rtvec v;
18275
18276 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18277 Recurse to load the two halves. */
18278
18279 op0 = gen_reg_rtx (half_mode);
18280 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18281 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18282
18283 op1 = gen_reg_rtx (half_mode);
18284 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18285 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18286
18287 use_vec_concat = true;
18288 }
18289 break;
18290
18291 case V8HImode:
18292 case V16QImode:
18293 case V4HImode:
18294 case V8QImode:
18295 break;
18296
18297 default:
18298 gcc_unreachable ();
18299 }
18300
18301 if (use_vec_concat)
18302 {
18303 if (!register_operand (op0, half_mode))
18304 op0 = force_reg (half_mode, op0);
18305 if (!register_operand (op1, half_mode))
18306 op1 = force_reg (half_mode, op1);
18307
18308 emit_insn (gen_rtx_SET (VOIDmode, target,
18309 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18310 }
18311 else
18312 {
18313 int i, j, n_elts, n_words, n_elt_per_word;
18314 enum machine_mode inner_mode;
18315 rtx words[4], shift;
18316
18317 inner_mode = GET_MODE_INNER (mode);
18318 n_elts = GET_MODE_NUNITS (mode);
18319 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18320 n_elt_per_word = n_elts / n_words;
18321 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18322
18323 for (i = 0; i < n_words; ++i)
18324 {
18325 rtx word = NULL_RTX;
18326
18327 for (j = 0; j < n_elt_per_word; ++j)
18328 {
18329 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18330 elt = convert_modes (word_mode, inner_mode, elt, true);
18331
18332 if (j == 0)
18333 word = elt;
18334 else
18335 {
18336 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18337 word, 1, OPTAB_LIB_WIDEN);
18338 word = expand_simple_binop (word_mode, IOR, word, elt,
18339 word, 1, OPTAB_LIB_WIDEN);
18340 }
18341 }
18342
18343 words[i] = word;
18344 }
18345
18346 if (n_words == 1)
18347 emit_move_insn (target, gen_lowpart (mode, words[0]));
18348 else if (n_words == 2)
18349 {
18350 rtx tmp = gen_reg_rtx (mode);
18351 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18352 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18353 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18354 emit_move_insn (target, tmp);
18355 }
18356 else if (n_words == 4)
18357 {
18358 rtx tmp = gen_reg_rtx (V4SImode);
18359 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18360 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18361 emit_move_insn (target, gen_lowpart (mode, tmp));
18362 }
18363 else
18364 gcc_unreachable ();
18365 }
18366 }
18367
18368 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18369 instructions unless MMX_OK is true. */
18370
18371 void
18372 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18373 {
18374 enum machine_mode mode = GET_MODE (target);
18375 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18376 int n_elts = GET_MODE_NUNITS (mode);
18377 int n_var = 0, one_var = -1;
18378 bool all_same = true, all_const_zero = true;
18379 int i;
18380 rtx x;
18381
18382 for (i = 0; i < n_elts; ++i)
18383 {
18384 x = XVECEXP (vals, 0, i);
18385 if (!CONSTANT_P (x))
18386 n_var++, one_var = i;
18387 else if (x != CONST0_RTX (inner_mode))
18388 all_const_zero = false;
18389 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18390 all_same = false;
18391 }
18392
18393 /* Constants are best loaded from the constant pool. */
18394 if (n_var == 0)
18395 {
18396 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18397 return;
18398 }
18399
18400 /* If all values are identical, broadcast the value. */
18401 if (all_same
18402 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18403 XVECEXP (vals, 0, 0)))
18404 return;
18405
18406 /* Values where only one field is non-constant are best loaded from
18407 the pool and overwritten via move later. */
18408 if (n_var == 1)
18409 {
18410 if (all_const_zero
18411 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18412 XVECEXP (vals, 0, one_var),
18413 one_var))
18414 return;
18415
18416 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18417 return;
18418 }
18419
18420 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18421 }
18422
18423 void
18424 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18425 {
18426 enum machine_mode mode = GET_MODE (target);
18427 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18428 bool use_vec_merge = false;
18429 rtx tmp;
18430
18431 switch (mode)
18432 {
18433 case V2SFmode:
18434 case V2SImode:
18435 if (mmx_ok)
18436 {
18437 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18438 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18439 if (elt == 0)
18440 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18441 else
18442 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18443 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18444 return;
18445 }
18446 break;
18447
18448 case V2DFmode:
18449 case V2DImode:
18450 {
18451 rtx op0, op1;
18452
18453 /* For the two element vectors, we implement a VEC_CONCAT with
18454 the extraction of the other element. */
18455
18456 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18457 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18458
18459 if (elt == 0)
18460 op0 = val, op1 = tmp;
18461 else
18462 op0 = tmp, op1 = val;
18463
18464 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18465 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18466 }
18467 return;
18468
18469 case V4SFmode:
18470 switch (elt)
18471 {
18472 case 0:
18473 use_vec_merge = true;
18474 break;
18475
18476 case 1:
18477 /* tmp = target = A B C D */
18478 tmp = copy_to_reg (target);
18479 /* target = A A B B */
18480 emit_insn (gen_sse_unpcklps (target, target, target));
18481 /* target = X A B B */
18482 ix86_expand_vector_set (false, target, val, 0);
18483 /* target = A X C D */
18484 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18485 GEN_INT (1), GEN_INT (0),
18486 GEN_INT (2+4), GEN_INT (3+4)));
18487 return;
18488
18489 case 2:
18490 /* tmp = target = A B C D */
18491 tmp = copy_to_reg (target);
18492 /* tmp = X B C D */
18493 ix86_expand_vector_set (false, tmp, val, 0);
18494 /* target = A B X D */
18495 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18496 GEN_INT (0), GEN_INT (1),
18497 GEN_INT (0+4), GEN_INT (3+4)));
18498 return;
18499
18500 case 3:
18501 /* tmp = target = A B C D */
18502 tmp = copy_to_reg (target);
18503 /* tmp = X B C D */
18504 ix86_expand_vector_set (false, tmp, val, 0);
18505 /* target = A B X D */
18506 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18507 GEN_INT (0), GEN_INT (1),
18508 GEN_INT (2+4), GEN_INT (0+4)));
18509 return;
18510
18511 default:
18512 gcc_unreachable ();
18513 }
18514 break;
18515
18516 case V4SImode:
18517 /* Element 0 handled by vec_merge below. */
18518 if (elt == 0)
18519 {
18520 use_vec_merge = true;
18521 break;
18522 }
18523
18524 if (TARGET_SSE2)
18525 {
18526 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18527 store into element 0, then shuffle them back. */
18528
18529 rtx order[4];
18530
18531 order[0] = GEN_INT (elt);
18532 order[1] = const1_rtx;
18533 order[2] = const2_rtx;
18534 order[3] = GEN_INT (3);
18535 order[elt] = const0_rtx;
18536
18537 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18538 order[1], order[2], order[3]));
18539
18540 ix86_expand_vector_set (false, target, val, 0);
18541
18542 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18543 order[1], order[2], order[3]));
18544 }
18545 else
18546 {
18547 /* For SSE1, we have to reuse the V4SF code. */
18548 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18549 gen_lowpart (SFmode, val), elt);
18550 }
18551 return;
18552
18553 case V8HImode:
18554 use_vec_merge = TARGET_SSE2;
18555 break;
18556 case V4HImode:
18557 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18558 break;
18559
18560 case V16QImode:
18561 case V8QImode:
18562 default:
18563 break;
18564 }
18565
18566 if (use_vec_merge)
18567 {
18568 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18569 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18570 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18571 }
18572 else
18573 {
18574 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18575
18576 emit_move_insn (mem, target);
18577
18578 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18579 emit_move_insn (tmp, val);
18580
18581 emit_move_insn (target, mem);
18582 }
18583 }
18584
18585 void
18586 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18587 {
18588 enum machine_mode mode = GET_MODE (vec);
18589 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18590 bool use_vec_extr = false;
18591 rtx tmp;
18592
18593 switch (mode)
18594 {
18595 case V2SImode:
18596 case V2SFmode:
18597 if (!mmx_ok)
18598 break;
18599 /* FALLTHRU */
18600
18601 case V2DFmode:
18602 case V2DImode:
18603 use_vec_extr = true;
18604 break;
18605
18606 case V4SFmode:
18607 switch (elt)
18608 {
18609 case 0:
18610 tmp = vec;
18611 break;
18612
18613 case 1:
18614 case 3:
18615 tmp = gen_reg_rtx (mode);
18616 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18617 GEN_INT (elt), GEN_INT (elt),
18618 GEN_INT (elt+4), GEN_INT (elt+4)));
18619 break;
18620
18621 case 2:
18622 tmp = gen_reg_rtx (mode);
18623 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18624 break;
18625
18626 default:
18627 gcc_unreachable ();
18628 }
18629 vec = tmp;
18630 use_vec_extr = true;
18631 elt = 0;
18632 break;
18633
18634 case V4SImode:
18635 if (TARGET_SSE2)
18636 {
18637 switch (elt)
18638 {
18639 case 0:
18640 tmp = vec;
18641 break;
18642
18643 case 1:
18644 case 3:
18645 tmp = gen_reg_rtx (mode);
18646 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18647 GEN_INT (elt), GEN_INT (elt),
18648 GEN_INT (elt), GEN_INT (elt)));
18649 break;
18650
18651 case 2:
18652 tmp = gen_reg_rtx (mode);
18653 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18654 break;
18655
18656 default:
18657 gcc_unreachable ();
18658 }
18659 vec = tmp;
18660 use_vec_extr = true;
18661 elt = 0;
18662 }
18663 else
18664 {
18665 /* For SSE1, we have to reuse the V4SF code. */
18666 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18667 gen_lowpart (V4SFmode, vec), elt);
18668 return;
18669 }
18670 break;
18671
18672 case V8HImode:
18673 use_vec_extr = TARGET_SSE2;
18674 break;
18675 case V4HImode:
18676 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18677 break;
18678
18679 case V16QImode:
18680 case V8QImode:
18681 /* ??? Could extract the appropriate HImode element and shift. */
18682 default:
18683 break;
18684 }
18685
18686 if (use_vec_extr)
18687 {
18688 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18689 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18690
18691 /* Let the rtl optimizers know about the zero extension performed. */
18692 if (inner_mode == HImode)
18693 {
18694 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18695 target = gen_lowpart (SImode, target);
18696 }
18697
18698 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18699 }
18700 else
18701 {
18702 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18703
18704 emit_move_insn (mem, vec);
18705
18706 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18707 emit_move_insn (target, tmp);
18708 }
18709 }
18710
18711 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18712 pattern to reduce; DEST is the destination; IN is the input vector. */
18713
18714 void
18715 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18716 {
18717 rtx tmp1, tmp2, tmp3;
18718
18719 tmp1 = gen_reg_rtx (V4SFmode);
18720 tmp2 = gen_reg_rtx (V4SFmode);
18721 tmp3 = gen_reg_rtx (V4SFmode);
18722
18723 emit_insn (gen_sse_movhlps (tmp1, in, in));
18724 emit_insn (fn (tmp2, tmp1, in));
18725
18726 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18727 GEN_INT (1), GEN_INT (1),
18728 GEN_INT (1+4), GEN_INT (1+4)));
18729 emit_insn (fn (dest, tmp2, tmp3));
18730 }
18731 \f
18732 /* Target hook for scalar_mode_supported_p. */
18733 static bool
18734 ix86_scalar_mode_supported_p (enum machine_mode mode)
18735 {
18736 if (DECIMAL_FLOAT_MODE_P (mode))
18737 return true;
18738 else
18739 return default_scalar_mode_supported_p (mode);
18740 }
18741
18742 /* Implements target hook vector_mode_supported_p. */
18743 static bool
18744 ix86_vector_mode_supported_p (enum machine_mode mode)
18745 {
18746 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18747 return true;
18748 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18749 return true;
18750 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18751 return true;
18752 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18753 return true;
18754 return false;
18755 }
18756
18757 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18758
18759 We do this in the new i386 backend to maintain source compatibility
18760 with the old cc0-based compiler. */
18761
18762 static tree
18763 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18764 tree inputs ATTRIBUTE_UNUSED,
18765 tree clobbers)
18766 {
18767 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18768 clobbers);
18769 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18770 clobbers);
18771 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18772 clobbers);
18773 return clobbers;
18774 }
18775
18776 /* Return true if this goes in small data/bss. */
18777
18778 static bool
18779 ix86_in_large_data_p (tree exp)
18780 {
18781 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18782 return false;
18783
18784 /* Functions are never large data. */
18785 if (TREE_CODE (exp) == FUNCTION_DECL)
18786 return false;
18787
18788 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18789 {
18790 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18791 if (strcmp (section, ".ldata") == 0
18792 || strcmp (section, ".lbss") == 0)
18793 return true;
18794 return false;
18795 }
18796 else
18797 {
18798 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18799
18800 /* If this is an incomplete type with size 0, then we can't put it
18801 in data because it might be too big when completed. */
18802 if (!size || size > ix86_section_threshold)
18803 return true;
18804 }
18805
18806 return false;
18807 }
18808 static void
18809 ix86_encode_section_info (tree decl, rtx rtl, int first)
18810 {
18811 default_encode_section_info (decl, rtl, first);
18812
18813 if (TREE_CODE (decl) == VAR_DECL
18814 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18815 && ix86_in_large_data_p (decl))
18816 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
18817 }
18818
18819 /* Worker function for REVERSE_CONDITION. */
18820
18821 enum rtx_code
18822 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
18823 {
18824 return (mode != CCFPmode && mode != CCFPUmode
18825 ? reverse_condition (code)
18826 : reverse_condition_maybe_unordered (code));
18827 }
18828
18829 /* Output code to perform an x87 FP register move, from OPERANDS[1]
18830 to OPERANDS[0]. */
18831
18832 const char *
18833 output_387_reg_move (rtx insn, rtx *operands)
18834 {
18835 if (REG_P (operands[1])
18836 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18837 {
18838 if (REGNO (operands[0]) == FIRST_STACK_REG)
18839 return output_387_ffreep (operands, 0);
18840 return "fstp\t%y0";
18841 }
18842 if (STACK_TOP_P (operands[0]))
18843 return "fld%z1\t%y1";
18844 return "fst\t%y0";
18845 }
18846
18847 /* Output code to perform a conditional jump to LABEL, if C2 flag in
18848 FP status register is set. */
18849
18850 void
18851 ix86_emit_fp_unordered_jump (rtx label)
18852 {
18853 rtx reg = gen_reg_rtx (HImode);
18854 rtx temp;
18855
18856 emit_insn (gen_x86_fnstsw_1 (reg));
18857
18858 if (TARGET_USE_SAHF)
18859 {
18860 emit_insn (gen_x86_sahf_1 (reg));
18861
18862 temp = gen_rtx_REG (CCmode, FLAGS_REG);
18863 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
18864 }
18865 else
18866 {
18867 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
18868
18869 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18870 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
18871 }
18872
18873 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
18874 gen_rtx_LABEL_REF (VOIDmode, label),
18875 pc_rtx);
18876 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
18877 emit_jump_insn (temp);
18878 }
18879
18880 /* Output code to perform a log1p XFmode calculation. */
18881
18882 void ix86_emit_i387_log1p (rtx op0, rtx op1)
18883 {
18884 rtx label1 = gen_label_rtx ();
18885 rtx label2 = gen_label_rtx ();
18886
18887 rtx tmp = gen_reg_rtx (XFmode);
18888 rtx tmp2 = gen_reg_rtx (XFmode);
18889
18890 emit_insn (gen_absxf2 (tmp, op1));
18891 emit_insn (gen_cmpxf (tmp,
18892 CONST_DOUBLE_FROM_REAL_VALUE (
18893 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
18894 XFmode)));
18895 emit_jump_insn (gen_bge (label1));
18896
18897 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18898 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
18899 emit_jump (label2);
18900
18901 emit_label (label1);
18902 emit_move_insn (tmp, CONST1_RTX (XFmode));
18903 emit_insn (gen_addxf3 (tmp, op1, tmp));
18904 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18905 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
18906
18907 emit_label (label2);
18908 }
18909
18910 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
18911
18912 static void
18913 i386_solaris_elf_named_section (const char *name, unsigned int flags,
18914 tree decl)
18915 {
18916 /* With Binutils 2.15, the "@unwind" marker must be specified on
18917 every occurrence of the ".eh_frame" section, not just the first
18918 one. */
18919 if (TARGET_64BIT
18920 && strcmp (name, ".eh_frame") == 0)
18921 {
18922 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
18923 flags & SECTION_WRITE ? "aw" : "a");
18924 return;
18925 }
18926 default_elf_asm_named_section (name, flags, decl);
18927 }
18928
18929 /* Return the mangling of TYPE if it is an extended fundamental type. */
18930
18931 static const char *
18932 ix86_mangle_fundamental_type (tree type)
18933 {
18934 switch (TYPE_MODE (type))
18935 {
18936 case TFmode:
18937 /* __float128 is "g". */
18938 return "g";
18939 case XFmode:
18940 /* "long double" or __float80 is "e". */
18941 return "e";
18942 default:
18943 return NULL;
18944 }
18945 }
18946
18947 /* For 32-bit code we can save PIC register setup by using
18948 __stack_chk_fail_local hidden function instead of calling
18949 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
18950 register, so it is better to call __stack_chk_fail directly. */
18951
18952 static tree
18953 ix86_stack_protect_fail (void)
18954 {
18955 return TARGET_64BIT
18956 ? default_external_stack_protect_fail ()
18957 : default_hidden_stack_protect_fail ();
18958 }
18959
18960 /* Select a format to encode pointers in exception handling data. CODE
18961 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
18962 true if the symbol may be affected by dynamic relocations.
18963
18964 ??? All x86 object file formats are capable of representing this.
18965 After all, the relocation needed is the same as for the call insn.
18966 Whether or not a particular assembler allows us to enter such, I
18967 guess we'll have to see. */
18968 int
18969 asm_preferred_eh_data_format (int code, int global)
18970 {
18971 if (flag_pic)
18972 {
18973 int type = DW_EH_PE_sdata8;
18974 if (!TARGET_64BIT
18975 || ix86_cmodel == CM_SMALL_PIC
18976 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
18977 type = DW_EH_PE_sdata4;
18978 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
18979 }
18980 if (ix86_cmodel == CM_SMALL
18981 || (ix86_cmodel == CM_MEDIUM && code))
18982 return DW_EH_PE_udata4;
18983 return DW_EH_PE_absptr;
18984 }
18985
18986 #include "gt-i386.h"