]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
83e1262260fddee19eabe7435a539950f0738b42
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "df.h"
53 #include "tm-constrs.h"
54 #include "params.h"
55
56 #ifndef CHECK_STACK_LIMIT
57 #define CHECK_STACK_LIMIT (-1)
58 #endif
59
60 /* Return index of given mode in mult and division cost tables. */
61 #define MODE_INDEX(mode) \
62 ((mode) == QImode ? 0 \
63 : (mode) == HImode ? 1 \
64 : (mode) == SImode ? 2 \
65 : (mode) == DImode ? 3 \
66 : 4)
67
68 /* Processor costs (relative to an add) */
69 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
70 #define COSTS_N_BYTES(N) ((N) * 2)
71
72 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
73
74 static const
75 struct processor_costs size_cost = { /* costs for tuning for size */
76 COSTS_N_BYTES (2), /* cost of an add instruction */
77 COSTS_N_BYTES (3), /* cost of a lea instruction */
78 COSTS_N_BYTES (2), /* variable shift costs */
79 COSTS_N_BYTES (3), /* constant shift costs */
80 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
81 COSTS_N_BYTES (3), /* HI */
82 COSTS_N_BYTES (3), /* SI */
83 COSTS_N_BYTES (3), /* DI */
84 COSTS_N_BYTES (5)}, /* other */
85 0, /* cost of multiply per each bit set */
86 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
87 COSTS_N_BYTES (3), /* HI */
88 COSTS_N_BYTES (3), /* SI */
89 COSTS_N_BYTES (3), /* DI */
90 COSTS_N_BYTES (5)}, /* other */
91 COSTS_N_BYTES (3), /* cost of movsx */
92 COSTS_N_BYTES (3), /* cost of movzx */
93 0, /* "large" insn */
94 2, /* MOVE_RATIO */
95 2, /* cost for loading QImode using movzbl */
96 {2, 2, 2}, /* cost of loading integer registers
97 in QImode, HImode and SImode.
98 Relative to reg-reg move (2). */
99 {2, 2, 2}, /* cost of storing integer registers */
100 2, /* cost of reg,reg fld/fst */
101 {2, 2, 2}, /* cost of loading fp registers
102 in SFmode, DFmode and XFmode */
103 {2, 2, 2}, /* cost of storing fp registers
104 in SFmode, DFmode and XFmode */
105 3, /* cost of moving MMX register */
106 {3, 3}, /* cost of loading MMX registers
107 in SImode and DImode */
108 {3, 3}, /* cost of storing MMX registers
109 in SImode and DImode */
110 3, /* cost of moving SSE register */
111 {3, 3, 3}, /* cost of loading SSE registers
112 in SImode, DImode and TImode */
113 {3, 3, 3}, /* cost of storing SSE registers
114 in SImode, DImode and TImode */
115 3, /* MMX or SSE register to integer */
116 0, /* size of prefetch block */
117 0, /* number of parallel prefetches */
118 2, /* Branch cost */
119 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
120 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
121 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
122 COSTS_N_BYTES (2), /* cost of FABS instruction. */
123 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
124 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
125 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
126 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
127 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
128 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
129 };
130
131 /* Processor costs (relative to an add) */
132 static const
133 struct processor_costs i386_cost = { /* 386 specific costs */
134 COSTS_N_INSNS (1), /* cost of an add instruction */
135 COSTS_N_INSNS (1), /* cost of a lea instruction */
136 COSTS_N_INSNS (3), /* variable shift costs */
137 COSTS_N_INSNS (2), /* constant shift costs */
138 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
139 COSTS_N_INSNS (6), /* HI */
140 COSTS_N_INSNS (6), /* SI */
141 COSTS_N_INSNS (6), /* DI */
142 COSTS_N_INSNS (6)}, /* other */
143 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
144 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
145 COSTS_N_INSNS (23), /* HI */
146 COSTS_N_INSNS (23), /* SI */
147 COSTS_N_INSNS (23), /* DI */
148 COSTS_N_INSNS (23)}, /* other */
149 COSTS_N_INSNS (3), /* cost of movsx */
150 COSTS_N_INSNS (2), /* cost of movzx */
151 15, /* "large" insn */
152 3, /* MOVE_RATIO */
153 4, /* cost for loading QImode using movzbl */
154 {2, 4, 2}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 4, 2}, /* cost of storing integer registers */
158 2, /* cost of reg,reg fld/fst */
159 {8, 8, 8}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {8, 8, 8}, /* cost of storing fp registers
162 in SFmode, DFmode and XFmode */
163 2, /* cost of moving MMX register */
164 {4, 8}, /* cost of loading MMX registers
165 in SImode and DImode */
166 {4, 8}, /* cost of storing MMX registers
167 in SImode and DImode */
168 2, /* cost of moving SSE register */
169 {4, 8, 16}, /* cost of loading SSE registers
170 in SImode, DImode and TImode */
171 {4, 8, 16}, /* cost of storing SSE registers
172 in SImode, DImode and TImode */
173 3, /* MMX or SSE register to integer */
174 0, /* size of prefetch block */
175 0, /* number of parallel prefetches */
176 1, /* Branch cost */
177 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
178 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
179 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
180 COSTS_N_INSNS (22), /* cost of FABS instruction. */
181 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
182 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
183 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
184 DUMMY_STRINGOP_ALGS},
185 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
186 DUMMY_STRINGOP_ALGS},
187 };
188
189 static const
190 struct processor_costs i486_cost = { /* 486 specific costs */
191 COSTS_N_INSNS (1), /* cost of an add instruction */
192 COSTS_N_INSNS (1), /* cost of a lea instruction */
193 COSTS_N_INSNS (3), /* variable shift costs */
194 COSTS_N_INSNS (2), /* constant shift costs */
195 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
196 COSTS_N_INSNS (12), /* HI */
197 COSTS_N_INSNS (12), /* SI */
198 COSTS_N_INSNS (12), /* DI */
199 COSTS_N_INSNS (12)}, /* other */
200 1, /* cost of multiply per each bit set */
201 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
202 COSTS_N_INSNS (40), /* HI */
203 COSTS_N_INSNS (40), /* SI */
204 COSTS_N_INSNS (40), /* DI */
205 COSTS_N_INSNS (40)}, /* other */
206 COSTS_N_INSNS (3), /* cost of movsx */
207 COSTS_N_INSNS (2), /* cost of movzx */
208 15, /* "large" insn */
209 3, /* MOVE_RATIO */
210 4, /* cost for loading QImode using movzbl */
211 {2, 4, 2}, /* cost of loading integer registers
212 in QImode, HImode and SImode.
213 Relative to reg-reg move (2). */
214 {2, 4, 2}, /* cost of storing integer registers */
215 2, /* cost of reg,reg fld/fst */
216 {8, 8, 8}, /* cost of loading fp registers
217 in SFmode, DFmode and XFmode */
218 {8, 8, 8}, /* cost of storing fp registers
219 in SFmode, DFmode and XFmode */
220 2, /* cost of moving MMX register */
221 {4, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {4, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 1, /* Branch cost */
234 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
235 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
236 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
237 COSTS_N_INSNS (3), /* cost of FABS instruction. */
238 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
239 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
240 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
241 DUMMY_STRINGOP_ALGS},
242 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
243 DUMMY_STRINGOP_ALGS}
244 };
245
246 static const
247 struct processor_costs pentium_cost = {
248 COSTS_N_INSNS (1), /* cost of an add instruction */
249 COSTS_N_INSNS (1), /* cost of a lea instruction */
250 COSTS_N_INSNS (4), /* variable shift costs */
251 COSTS_N_INSNS (1), /* constant shift costs */
252 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
253 COSTS_N_INSNS (11), /* HI */
254 COSTS_N_INSNS (11), /* SI */
255 COSTS_N_INSNS (11), /* DI */
256 COSTS_N_INSNS (11)}, /* other */
257 0, /* cost of multiply per each bit set */
258 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
259 COSTS_N_INSNS (25), /* HI */
260 COSTS_N_INSNS (25), /* SI */
261 COSTS_N_INSNS (25), /* DI */
262 COSTS_N_INSNS (25)}, /* other */
263 COSTS_N_INSNS (3), /* cost of movsx */
264 COSTS_N_INSNS (2), /* cost of movzx */
265 8, /* "large" insn */
266 6, /* MOVE_RATIO */
267 6, /* cost for loading QImode using movzbl */
268 {2, 4, 2}, /* cost of loading integer registers
269 in QImode, HImode and SImode.
270 Relative to reg-reg move (2). */
271 {2, 4, 2}, /* cost of storing integer registers */
272 2, /* cost of reg,reg fld/fst */
273 {2, 2, 6}, /* cost of loading fp registers
274 in SFmode, DFmode and XFmode */
275 {4, 4, 6}, /* cost of storing fp registers
276 in SFmode, DFmode and XFmode */
277 8, /* cost of moving MMX register */
278 {8, 8}, /* cost of loading MMX registers
279 in SImode and DImode */
280 {8, 8}, /* cost of storing MMX registers
281 in SImode and DImode */
282 2, /* cost of moving SSE register */
283 {4, 8, 16}, /* cost of loading SSE registers
284 in SImode, DImode and TImode */
285 {4, 8, 16}, /* cost of storing SSE registers
286 in SImode, DImode and TImode */
287 3, /* MMX or SSE register to integer */
288 0, /* size of prefetch block */
289 0, /* number of parallel prefetches */
290 2, /* Branch cost */
291 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
292 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
293 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
294 COSTS_N_INSNS (1), /* cost of FABS instruction. */
295 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
296 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
297 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
298 DUMMY_STRINGOP_ALGS},
299 {{libcall, {{-1, rep_prefix_4_byte}}},
300 DUMMY_STRINGOP_ALGS}
301 };
302
303 static const
304 struct processor_costs pentiumpro_cost = {
305 COSTS_N_INSNS (1), /* cost of an add instruction */
306 COSTS_N_INSNS (1), /* cost of a lea instruction */
307 COSTS_N_INSNS (1), /* variable shift costs */
308 COSTS_N_INSNS (1), /* constant shift costs */
309 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
310 COSTS_N_INSNS (4), /* HI */
311 COSTS_N_INSNS (4), /* SI */
312 COSTS_N_INSNS (4), /* DI */
313 COSTS_N_INSNS (4)}, /* other */
314 0, /* cost of multiply per each bit set */
315 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
316 COSTS_N_INSNS (17), /* HI */
317 COSTS_N_INSNS (17), /* SI */
318 COSTS_N_INSNS (17), /* DI */
319 COSTS_N_INSNS (17)}, /* other */
320 COSTS_N_INSNS (1), /* cost of movsx */
321 COSTS_N_INSNS (1), /* cost of movzx */
322 8, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 4, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 2, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of storing fp registers
333 in SFmode, DFmode and XFmode */
334 2, /* cost of moving MMX register */
335 {2, 2}, /* cost of loading MMX registers
336 in SImode and DImode */
337 {2, 2}, /* cost of storing MMX registers
338 in SImode and DImode */
339 2, /* cost of moving SSE register */
340 {2, 2, 8}, /* cost of loading SSE registers
341 in SImode, DImode and TImode */
342 {2, 2, 8}, /* cost of storing SSE registers
343 in SImode, DImode and TImode */
344 3, /* MMX or SSE register to integer */
345 32, /* size of prefetch block */
346 6, /* number of parallel prefetches */
347 2, /* Branch cost */
348 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (2), /* cost of FABS instruction. */
352 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
354 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
355 the alignment). For small blocks inline loop is still a noticeable win, for bigger
356 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
357 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 */
359 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
360 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
361 DUMMY_STRINGOP_ALGS},
362 {{rep_prefix_4_byte, {{1024, unrolled_loop},
363 {8192, rep_prefix_4_byte}, {-1, libcall}}},
364 DUMMY_STRINGOP_ALGS}
365 };
366
367 static const
368 struct processor_costs geode_cost = {
369 COSTS_N_INSNS (1), /* cost of an add instruction */
370 COSTS_N_INSNS (1), /* cost of a lea instruction */
371 COSTS_N_INSNS (2), /* variable shift costs */
372 COSTS_N_INSNS (1), /* constant shift costs */
373 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
374 COSTS_N_INSNS (4), /* HI */
375 COSTS_N_INSNS (7), /* SI */
376 COSTS_N_INSNS (7), /* DI */
377 COSTS_N_INSNS (7)}, /* other */
378 0, /* cost of multiply per each bit set */
379 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
380 COSTS_N_INSNS (23), /* HI */
381 COSTS_N_INSNS (39), /* SI */
382 COSTS_N_INSNS (39), /* DI */
383 COSTS_N_INSNS (39)}, /* other */
384 COSTS_N_INSNS (1), /* cost of movsx */
385 COSTS_N_INSNS (1), /* cost of movzx */
386 8, /* "large" insn */
387 4, /* MOVE_RATIO */
388 1, /* cost for loading QImode using movzbl */
389 {1, 1, 1}, /* cost of loading integer registers
390 in QImode, HImode and SImode.
391 Relative to reg-reg move (2). */
392 {1, 1, 1}, /* cost of storing integer registers */
393 1, /* cost of reg,reg fld/fst */
394 {1, 1, 1}, /* cost of loading fp registers
395 in SFmode, DFmode and XFmode */
396 {4, 6, 6}, /* cost of storing fp registers
397 in SFmode, DFmode and XFmode */
398
399 1, /* cost of moving MMX register */
400 {1, 1}, /* cost of loading MMX registers
401 in SImode and DImode */
402 {1, 1}, /* cost of storing MMX registers
403 in SImode and DImode */
404 1, /* cost of moving SSE register */
405 {1, 1, 1}, /* cost of loading SSE registers
406 in SImode, DImode and TImode */
407 {1, 1, 1}, /* cost of storing SSE registers
408 in SImode, DImode and TImode */
409 1, /* MMX or SSE register to integer */
410 32, /* size of prefetch block */
411 1, /* number of parallel prefetches */
412 1, /* Branch cost */
413 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
414 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
415 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
416 COSTS_N_INSNS (1), /* cost of FABS instruction. */
417 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
418 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
419 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
420 DUMMY_STRINGOP_ALGS},
421 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
422 DUMMY_STRINGOP_ALGS}
423 };
424
425 static const
426 struct processor_costs k6_cost = {
427 COSTS_N_INSNS (1), /* cost of an add instruction */
428 COSTS_N_INSNS (2), /* cost of a lea instruction */
429 COSTS_N_INSNS (1), /* variable shift costs */
430 COSTS_N_INSNS (1), /* constant shift costs */
431 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
432 COSTS_N_INSNS (3), /* HI */
433 COSTS_N_INSNS (3), /* SI */
434 COSTS_N_INSNS (3), /* DI */
435 COSTS_N_INSNS (3)}, /* other */
436 0, /* cost of multiply per each bit set */
437 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
438 COSTS_N_INSNS (18), /* HI */
439 COSTS_N_INSNS (18), /* SI */
440 COSTS_N_INSNS (18), /* DI */
441 COSTS_N_INSNS (18)}, /* other */
442 COSTS_N_INSNS (2), /* cost of movsx */
443 COSTS_N_INSNS (2), /* cost of movzx */
444 8, /* "large" insn */
445 4, /* MOVE_RATIO */
446 3, /* cost for loading QImode using movzbl */
447 {4, 5, 4}, /* cost of loading integer registers
448 in QImode, HImode and SImode.
449 Relative to reg-reg move (2). */
450 {2, 3, 2}, /* cost of storing integer registers */
451 4, /* cost of reg,reg fld/fst */
452 {6, 6, 6}, /* cost of loading fp registers
453 in SFmode, DFmode and XFmode */
454 {4, 4, 4}, /* cost of storing fp registers
455 in SFmode, DFmode and XFmode */
456 2, /* cost of moving MMX register */
457 {2, 2}, /* cost of loading MMX registers
458 in SImode and DImode */
459 {2, 2}, /* cost of storing MMX registers
460 in SImode and DImode */
461 2, /* cost of moving SSE register */
462 {2, 2, 8}, /* cost of loading SSE registers
463 in SImode, DImode and TImode */
464 {2, 2, 8}, /* cost of storing SSE registers
465 in SImode, DImode and TImode */
466 6, /* MMX or SSE register to integer */
467 32, /* size of prefetch block */
468 1, /* number of parallel prefetches */
469 1, /* Branch cost */
470 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
471 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
472 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
473 COSTS_N_INSNS (2), /* cost of FABS instruction. */
474 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
475 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
476 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
477 DUMMY_STRINGOP_ALGS},
478 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
479 DUMMY_STRINGOP_ALGS}
480 };
481
482 static const
483 struct processor_costs athlon_cost = {
484 COSTS_N_INSNS (1), /* cost of an add instruction */
485 COSTS_N_INSNS (2), /* cost of a lea instruction */
486 COSTS_N_INSNS (1), /* variable shift costs */
487 COSTS_N_INSNS (1), /* constant shift costs */
488 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
489 COSTS_N_INSNS (5), /* HI */
490 COSTS_N_INSNS (5), /* SI */
491 COSTS_N_INSNS (5), /* DI */
492 COSTS_N_INSNS (5)}, /* other */
493 0, /* cost of multiply per each bit set */
494 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
495 COSTS_N_INSNS (26), /* HI */
496 COSTS_N_INSNS (42), /* SI */
497 COSTS_N_INSNS (74), /* DI */
498 COSTS_N_INSNS (74)}, /* other */
499 COSTS_N_INSNS (1), /* cost of movsx */
500 COSTS_N_INSNS (1), /* cost of movzx */
501 8, /* "large" insn */
502 9, /* MOVE_RATIO */
503 4, /* cost for loading QImode using movzbl */
504 {3, 4, 3}, /* cost of loading integer registers
505 in QImode, HImode and SImode.
506 Relative to reg-reg move (2). */
507 {3, 4, 3}, /* cost of storing integer registers */
508 4, /* cost of reg,reg fld/fst */
509 {4, 4, 12}, /* cost of loading fp registers
510 in SFmode, DFmode and XFmode */
511 {6, 6, 8}, /* cost of storing fp registers
512 in SFmode, DFmode and XFmode */
513 2, /* cost of moving MMX register */
514 {4, 4}, /* cost of loading MMX registers
515 in SImode and DImode */
516 {4, 4}, /* cost of storing MMX registers
517 in SImode and DImode */
518 2, /* cost of moving SSE register */
519 {4, 4, 6}, /* cost of loading SSE registers
520 in SImode, DImode and TImode */
521 {4, 4, 5}, /* cost of storing SSE registers
522 in SImode, DImode and TImode */
523 5, /* MMX or SSE register to integer */
524 64, /* size of prefetch block */
525 6, /* number of parallel prefetches */
526 5, /* Branch cost */
527 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
528 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
529 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
530 COSTS_N_INSNS (2), /* cost of FABS instruction. */
531 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
532 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
533 /* For some reason, Athlon deals better with REP prefix (relative to loops)
534 compared to K8. Alignment becomes important after 8 bytes for memcpy and
535 128 bytes for memset. */
536 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
537 DUMMY_STRINGOP_ALGS},
538 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
539 DUMMY_STRINGOP_ALGS}
540 };
541
542 static const
543 struct processor_costs k8_cost = {
544 COSTS_N_INSNS (1), /* cost of an add instruction */
545 COSTS_N_INSNS (2), /* cost of a lea instruction */
546 COSTS_N_INSNS (1), /* variable shift costs */
547 COSTS_N_INSNS (1), /* constant shift costs */
548 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
549 COSTS_N_INSNS (4), /* HI */
550 COSTS_N_INSNS (3), /* SI */
551 COSTS_N_INSNS (4), /* DI */
552 COSTS_N_INSNS (5)}, /* other */
553 0, /* cost of multiply per each bit set */
554 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
555 COSTS_N_INSNS (26), /* HI */
556 COSTS_N_INSNS (42), /* SI */
557 COSTS_N_INSNS (74), /* DI */
558 COSTS_N_INSNS (74)}, /* other */
559 COSTS_N_INSNS (1), /* cost of movsx */
560 COSTS_N_INSNS (1), /* cost of movzx */
561 8, /* "large" insn */
562 9, /* MOVE_RATIO */
563 4, /* cost for loading QImode using movzbl */
564 {3, 4, 3}, /* cost of loading integer registers
565 in QImode, HImode and SImode.
566 Relative to reg-reg move (2). */
567 {3, 4, 3}, /* cost of storing integer registers */
568 4, /* cost of reg,reg fld/fst */
569 {4, 4, 12}, /* cost of loading fp registers
570 in SFmode, DFmode and XFmode */
571 {6, 6, 8}, /* cost of storing fp registers
572 in SFmode, DFmode and XFmode */
573 2, /* cost of moving MMX register */
574 {3, 3}, /* cost of loading MMX registers
575 in SImode and DImode */
576 {4, 4}, /* cost of storing MMX registers
577 in SImode and DImode */
578 2, /* cost of moving SSE register */
579 {4, 3, 6}, /* cost of loading SSE registers
580 in SImode, DImode and TImode */
581 {4, 4, 5}, /* cost of storing SSE registers
582 in SImode, DImode and TImode */
583 5, /* MMX or SSE register to integer */
584 64, /* size of prefetch block */
585 /* New AMD processors never drop prefetches; if they cannot be performed
586 immediately, they are queued. We set number of simultaneous prefetches
587 to a large constant to reflect this (it probably is not a good idea not
588 to limit number of prefetches at all, as their execution also takes some
589 time). */
590 100, /* number of parallel prefetches */
591 5, /* Branch cost */
592 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
593 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
594 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
595 COSTS_N_INSNS (2), /* cost of FABS instruction. */
596 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
597 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
598 /* K8 has optimized REP instruction for medium sized blocks, but for very small
599 blocks it is better to use loop. For large blocks, libcall can do
600 nontemporary accesses and beat inline considerably. */
601 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
602 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
603 {{libcall, {{8, loop}, {24, unrolled_loop},
604 {2048, rep_prefix_4_byte}, {-1, libcall}}},
605 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
606 };
607
608 struct processor_costs amdfam10_cost = {
609 COSTS_N_INSNS (1), /* cost of an add instruction */
610 COSTS_N_INSNS (2), /* cost of a lea instruction */
611 COSTS_N_INSNS (1), /* variable shift costs */
612 COSTS_N_INSNS (1), /* constant shift costs */
613 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
614 COSTS_N_INSNS (4), /* HI */
615 COSTS_N_INSNS (3), /* SI */
616 COSTS_N_INSNS (4), /* DI */
617 COSTS_N_INSNS (5)}, /* other */
618 0, /* cost of multiply per each bit set */
619 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
620 COSTS_N_INSNS (35), /* HI */
621 COSTS_N_INSNS (51), /* SI */
622 COSTS_N_INSNS (83), /* DI */
623 COSTS_N_INSNS (83)}, /* other */
624 COSTS_N_INSNS (1), /* cost of movsx */
625 COSTS_N_INSNS (1), /* cost of movzx */
626 8, /* "large" insn */
627 9, /* MOVE_RATIO */
628 4, /* cost for loading QImode using movzbl */
629 {3, 4, 3}, /* cost of loading integer registers
630 in QImode, HImode and SImode.
631 Relative to reg-reg move (2). */
632 {3, 4, 3}, /* cost of storing integer registers */
633 4, /* cost of reg,reg fld/fst */
634 {4, 4, 12}, /* cost of loading fp registers
635 in SFmode, DFmode and XFmode */
636 {6, 6, 8}, /* cost of storing fp registers
637 in SFmode, DFmode and XFmode */
638 2, /* cost of moving MMX register */
639 {3, 3}, /* cost of loading MMX registers
640 in SImode and DImode */
641 {4, 4}, /* cost of storing MMX registers
642 in SImode and DImode */
643 2, /* cost of moving SSE register */
644 {4, 4, 3}, /* cost of loading SSE registers
645 in SImode, DImode and TImode */
646 {4, 4, 5}, /* cost of storing SSE registers
647 in SImode, DImode and TImode */
648 3, /* MMX or SSE register to integer */
649 /* On K8
650 MOVD reg64, xmmreg Double FSTORE 4
651 MOVD reg32, xmmreg Double FSTORE 4
652 On AMDFAM10
653 MOVD reg64, xmmreg Double FADD 3
654 1/1 1/1
655 MOVD reg32, xmmreg Double FADD 3
656 1/1 1/1 */
657 64, /* size of prefetch block */
658 /* New AMD processors never drop prefetches; if they cannot be performed
659 immediately, they are queued. We set number of simultaneous prefetches
660 to a large constant to reflect this (it probably is not a good idea not
661 to limit number of prefetches at all, as their execution also takes some
662 time). */
663 100, /* number of parallel prefetches */
664 5, /* Branch cost */
665 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
666 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
667 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
668 COSTS_N_INSNS (2), /* cost of FABS instruction. */
669 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
670 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671
672 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
673 very small blocks it is better to use loop. For large blocks, libcall can
674 do nontemporary accesses and beat inline considerably. */
675 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
676 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
677 {{libcall, {{8, loop}, {24, unrolled_loop},
678 {2048, rep_prefix_4_byte}, {-1, libcall}}},
679 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
680 };
681
682 static const
683 struct processor_costs pentium4_cost = {
684 COSTS_N_INSNS (1), /* cost of an add instruction */
685 COSTS_N_INSNS (3), /* cost of a lea instruction */
686 COSTS_N_INSNS (4), /* variable shift costs */
687 COSTS_N_INSNS (4), /* constant shift costs */
688 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
689 COSTS_N_INSNS (15), /* HI */
690 COSTS_N_INSNS (15), /* SI */
691 COSTS_N_INSNS (15), /* DI */
692 COSTS_N_INSNS (15)}, /* other */
693 0, /* cost of multiply per each bit set */
694 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
695 COSTS_N_INSNS (56), /* HI */
696 COSTS_N_INSNS (56), /* SI */
697 COSTS_N_INSNS (56), /* DI */
698 COSTS_N_INSNS (56)}, /* other */
699 COSTS_N_INSNS (1), /* cost of movsx */
700 COSTS_N_INSNS (1), /* cost of movzx */
701 16, /* "large" insn */
702 6, /* MOVE_RATIO */
703 2, /* cost for loading QImode using movzbl */
704 {4, 5, 4}, /* cost of loading integer registers
705 in QImode, HImode and SImode.
706 Relative to reg-reg move (2). */
707 {2, 3, 2}, /* cost of storing integer registers */
708 2, /* cost of reg,reg fld/fst */
709 {2, 2, 6}, /* cost of loading fp registers
710 in SFmode, DFmode and XFmode */
711 {4, 4, 6}, /* cost of storing fp registers
712 in SFmode, DFmode and XFmode */
713 2, /* cost of moving MMX register */
714 {2, 2}, /* cost of loading MMX registers
715 in SImode and DImode */
716 {2, 2}, /* cost of storing MMX registers
717 in SImode and DImode */
718 12, /* cost of moving SSE register */
719 {12, 12, 12}, /* cost of loading SSE registers
720 in SImode, DImode and TImode */
721 {2, 2, 8}, /* cost of storing SSE registers
722 in SImode, DImode and TImode */
723 10, /* MMX or SSE register to integer */
724 64, /* size of prefetch block */
725 6, /* number of parallel prefetches */
726 2, /* Branch cost */
727 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
728 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
729 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
730 COSTS_N_INSNS (2), /* cost of FABS instruction. */
731 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
732 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
733 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
734 DUMMY_STRINGOP_ALGS},
735 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
736 {-1, libcall}}},
737 DUMMY_STRINGOP_ALGS},
738 };
739
740 static const
741 struct processor_costs nocona_cost = {
742 COSTS_N_INSNS (1), /* cost of an add instruction */
743 COSTS_N_INSNS (1), /* cost of a lea instruction */
744 COSTS_N_INSNS (1), /* variable shift costs */
745 COSTS_N_INSNS (1), /* constant shift costs */
746 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
747 COSTS_N_INSNS (10), /* HI */
748 COSTS_N_INSNS (10), /* SI */
749 COSTS_N_INSNS (10), /* DI */
750 COSTS_N_INSNS (10)}, /* other */
751 0, /* cost of multiply per each bit set */
752 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
753 COSTS_N_INSNS (66), /* HI */
754 COSTS_N_INSNS (66), /* SI */
755 COSTS_N_INSNS (66), /* DI */
756 COSTS_N_INSNS (66)}, /* other */
757 COSTS_N_INSNS (1), /* cost of movsx */
758 COSTS_N_INSNS (1), /* cost of movzx */
759 16, /* "large" insn */
760 17, /* MOVE_RATIO */
761 4, /* cost for loading QImode using movzbl */
762 {4, 4, 4}, /* cost of loading integer registers
763 in QImode, HImode and SImode.
764 Relative to reg-reg move (2). */
765 {4, 4, 4}, /* cost of storing integer registers */
766 3, /* cost of reg,reg fld/fst */
767 {12, 12, 12}, /* cost of loading fp registers
768 in SFmode, DFmode and XFmode */
769 {4, 4, 4}, /* cost of storing fp registers
770 in SFmode, DFmode and XFmode */
771 6, /* cost of moving MMX register */
772 {12, 12}, /* cost of loading MMX registers
773 in SImode and DImode */
774 {12, 12}, /* cost of storing MMX registers
775 in SImode and DImode */
776 6, /* cost of moving SSE register */
777 {12, 12, 12}, /* cost of loading SSE registers
778 in SImode, DImode and TImode */
779 {12, 12, 12}, /* cost of storing SSE registers
780 in SImode, DImode and TImode */
781 8, /* MMX or SSE register to integer */
782 128, /* size of prefetch block */
783 8, /* number of parallel prefetches */
784 1, /* Branch cost */
785 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
786 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
787 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
788 COSTS_N_INSNS (3), /* cost of FABS instruction. */
789 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
790 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
791 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
792 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
793 {100000, unrolled_loop}, {-1, libcall}}}},
794 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
795 {-1, libcall}}},
796 {libcall, {{24, loop}, {64, unrolled_loop},
797 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
798 };
799
800 static const
801 struct processor_costs core2_cost = {
802 COSTS_N_INSNS (1), /* cost of an add instruction */
803 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
804 COSTS_N_INSNS (1), /* variable shift costs */
805 COSTS_N_INSNS (1), /* constant shift costs */
806 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
807 COSTS_N_INSNS (3), /* HI */
808 COSTS_N_INSNS (3), /* SI */
809 COSTS_N_INSNS (3), /* DI */
810 COSTS_N_INSNS (3)}, /* other */
811 0, /* cost of multiply per each bit set */
812 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
813 COSTS_N_INSNS (22), /* HI */
814 COSTS_N_INSNS (22), /* SI */
815 COSTS_N_INSNS (22), /* DI */
816 COSTS_N_INSNS (22)}, /* other */
817 COSTS_N_INSNS (1), /* cost of movsx */
818 COSTS_N_INSNS (1), /* cost of movzx */
819 8, /* "large" insn */
820 16, /* MOVE_RATIO */
821 2, /* cost for loading QImode using movzbl */
822 {6, 6, 6}, /* cost of loading integer registers
823 in QImode, HImode and SImode.
824 Relative to reg-reg move (2). */
825 {4, 4, 4}, /* cost of storing integer registers */
826 2, /* cost of reg,reg fld/fst */
827 {6, 6, 6}, /* cost of loading fp registers
828 in SFmode, DFmode and XFmode */
829 {4, 4, 4}, /* cost of loading integer registers */
830 2, /* cost of moving MMX register */
831 {6, 6}, /* cost of loading MMX registers
832 in SImode and DImode */
833 {4, 4}, /* cost of storing MMX registers
834 in SImode and DImode */
835 2, /* cost of moving SSE register */
836 {6, 6, 6}, /* cost of loading SSE registers
837 in SImode, DImode and TImode */
838 {4, 4, 4}, /* cost of storing SSE registers
839 in SImode, DImode and TImode */
840 2, /* MMX or SSE register to integer */
841 128, /* size of prefetch block */
842 8, /* number of parallel prefetches */
843 3, /* Branch cost */
844 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
845 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
846 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
847 COSTS_N_INSNS (1), /* cost of FABS instruction. */
848 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
849 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
850 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
851 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
852 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
853 {{libcall, {{8, loop}, {15, unrolled_loop},
854 {2048, rep_prefix_4_byte}, {-1, libcall}}},
855 {libcall, {{24, loop}, {32, unrolled_loop},
856 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
857 };
858
859 /* Generic64 should produce code tuned for Nocona and K8. */
860 static const
861 struct processor_costs generic64_cost = {
862 COSTS_N_INSNS (1), /* cost of an add instruction */
863 /* On all chips taken into consideration lea is 2 cycles and more. With
864 this cost however our current implementation of synth_mult results in
865 use of unnecessary temporary registers causing regression on several
866 SPECfp benchmarks. */
867 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
868 COSTS_N_INSNS (1), /* variable shift costs */
869 COSTS_N_INSNS (1), /* constant shift costs */
870 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
871 COSTS_N_INSNS (4), /* HI */
872 COSTS_N_INSNS (3), /* SI */
873 COSTS_N_INSNS (4), /* DI */
874 COSTS_N_INSNS (2)}, /* other */
875 0, /* cost of multiply per each bit set */
876 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
877 COSTS_N_INSNS (26), /* HI */
878 COSTS_N_INSNS (42), /* SI */
879 COSTS_N_INSNS (74), /* DI */
880 COSTS_N_INSNS (74)}, /* other */
881 COSTS_N_INSNS (1), /* cost of movsx */
882 COSTS_N_INSNS (1), /* cost of movzx */
883 8, /* "large" insn */
884 17, /* MOVE_RATIO */
885 4, /* cost for loading QImode using movzbl */
886 {4, 4, 4}, /* cost of loading integer registers
887 in QImode, HImode and SImode.
888 Relative to reg-reg move (2). */
889 {4, 4, 4}, /* cost of storing integer registers */
890 4, /* cost of reg,reg fld/fst */
891 {12, 12, 12}, /* cost of loading fp registers
892 in SFmode, DFmode and XFmode */
893 {6, 6, 8}, /* cost of storing fp registers
894 in SFmode, DFmode and XFmode */
895 2, /* cost of moving MMX register */
896 {8, 8}, /* cost of loading MMX registers
897 in SImode and DImode */
898 {8, 8}, /* cost of storing MMX registers
899 in SImode and DImode */
900 2, /* cost of moving SSE register */
901 {8, 8, 8}, /* cost of loading SSE registers
902 in SImode, DImode and TImode */
903 {8, 8, 8}, /* cost of storing SSE registers
904 in SImode, DImode and TImode */
905 5, /* MMX or SSE register to integer */
906 64, /* size of prefetch block */
907 6, /* number of parallel prefetches */
908 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
909 is increased to perhaps more appropriate value of 5. */
910 3, /* Branch cost */
911 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
912 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
913 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
914 COSTS_N_INSNS (8), /* cost of FABS instruction. */
915 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
916 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
917 {DUMMY_STRINGOP_ALGS,
918 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
919 {DUMMY_STRINGOP_ALGS,
920 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
921 };
922
923 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 static const
925 struct processor_costs generic32_cost = {
926 COSTS_N_INSNS (1), /* cost of an add instruction */
927 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
928 COSTS_N_INSNS (1), /* variable shift costs */
929 COSTS_N_INSNS (1), /* constant shift costs */
930 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
931 COSTS_N_INSNS (4), /* HI */
932 COSTS_N_INSNS (3), /* SI */
933 COSTS_N_INSNS (4), /* DI */
934 COSTS_N_INSNS (2)}, /* other */
935 0, /* cost of multiply per each bit set */
936 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
937 COSTS_N_INSNS (26), /* HI */
938 COSTS_N_INSNS (42), /* SI */
939 COSTS_N_INSNS (74), /* DI */
940 COSTS_N_INSNS (74)}, /* other */
941 COSTS_N_INSNS (1), /* cost of movsx */
942 COSTS_N_INSNS (1), /* cost of movzx */
943 8, /* "large" insn */
944 17, /* MOVE_RATIO */
945 4, /* cost for loading QImode using movzbl */
946 {4, 4, 4}, /* cost of loading integer registers
947 in QImode, HImode and SImode.
948 Relative to reg-reg move (2). */
949 {4, 4, 4}, /* cost of storing integer registers */
950 4, /* cost of reg,reg fld/fst */
951 {12, 12, 12}, /* cost of loading fp registers
952 in SFmode, DFmode and XFmode */
953 {6, 6, 8}, /* cost of storing fp registers
954 in SFmode, DFmode and XFmode */
955 2, /* cost of moving MMX register */
956 {8, 8}, /* cost of loading MMX registers
957 in SImode and DImode */
958 {8, 8}, /* cost of storing MMX registers
959 in SImode and DImode */
960 2, /* cost of moving SSE register */
961 {8, 8, 8}, /* cost of loading SSE registers
962 in SImode, DImode and TImode */
963 {8, 8, 8}, /* cost of storing SSE registers
964 in SImode, DImode and TImode */
965 5, /* MMX or SSE register to integer */
966 64, /* size of prefetch block */
967 6, /* number of parallel prefetches */
968 3, /* Branch cost */
969 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
970 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
971 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
972 COSTS_N_INSNS (8), /* cost of FABS instruction. */
973 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
974 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
975 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
976 DUMMY_STRINGOP_ALGS},
977 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
978 DUMMY_STRINGOP_ALGS},
979 };
980
981 const struct processor_costs *ix86_cost = &pentium_cost;
982
983 /* Processor feature/optimization bitmasks. */
984 #define m_386 (1<<PROCESSOR_I386)
985 #define m_486 (1<<PROCESSOR_I486)
986 #define m_PENT (1<<PROCESSOR_PENTIUM)
987 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
988 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
989 #define m_NOCONA (1<<PROCESSOR_NOCONA)
990 #define m_CORE2 (1<<PROCESSOR_CORE2)
991
992 #define m_GEODE (1<<PROCESSOR_GEODE)
993 #define m_K6 (1<<PROCESSOR_K6)
994 #define m_K6_GEODE (m_K6 | m_GEODE)
995 #define m_K8 (1<<PROCESSOR_K8)
996 #define m_ATHLON (1<<PROCESSOR_ATHLON)
997 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
998 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
999 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000
1001 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1002 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003
1004 /* Generic instruction choice should be common subset of supported CPUs
1005 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1006 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007
1008 /* Feature tests against the various tunings. */
1009 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1010 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1011 negatively, so enabling for Generic64 seems like good code size
1012 tradeoff. We can't enable it for 32bit generic because it does not
1013 work well with PPro base chips. */
1014 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1015
1016 /* X86_TUNE_PUSH_MEMORY */
1017 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1018 | m_NOCONA | m_CORE2 | m_GENERIC,
1019
1020 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1021 m_486 | m_PENT,
1022
1023 /* X86_TUNE_USE_BIT_TEST */
1024 m_386,
1025
1026 /* X86_TUNE_UNROLL_STRLEN */
1027 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1028
1029 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1030 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1031
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1036 0,
1037
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1039 ~m_386,
1040
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1043 | m_NOCONA | m_CORE2 | m_GENERIC,
1044
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1048 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1049
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1058 m_PPRO,
1059
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2 | m_GENERIC,
1062
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386 | m_486 | m_K6_GEODE,
1065
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1068
1069 /* X86_TUNE_USE_MOV0 */
1070 m_K6,
1071
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1074
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1076 m_PENT4,
1077
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1079 m_PPRO,
1080
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1082 ~m_PENT,
1083
1084 /* X86_TUNE_READ_MODIFY */
1085 ~(m_PENT | m_PPRO),
1086
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1089 | m_GENERIC /* | m_PENT4 ? */,
1090
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT | m_486 | m_386),
1093
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386 | m_PENT4 | m_NOCONA,
1096
1097 /* X86_TUNE_QIMODE_MATH */
1098 ~0,
1099
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1104 ~m_PPRO,
1105
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1107 0,
1108
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1110 m_PPRO,
1111
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1114
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1117 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1118
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1121
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1124 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1125
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1129 | m_GENERIC | m_GEODE),
1130
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1133
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1144
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1146 m_AMDFAM10,
1147
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1152 m_ATHLON_K8,
1153
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10,
1156
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO | m_PENT4 | m_NOCONA,
1159
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1162
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1165
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1168
1169 /* X86_TUNE_SHIFT1 */
1170 ~m_486,
1171
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10,
1174
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1177
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1181
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1184
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10,
1187
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1190
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1193
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1196
1197 /* X86_TUNE_SHORTEN_X87_SSE */
1198 ~m_K8,
1199
1200 /* X86_TUNE_AVOID_VECTOR_DECODE */
1201 m_K8 | m_GENERIC64,
1202
1203 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1204 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1205 ~(m_386 | m_486),
1206
1207 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1208 vector path on AMD machines. */
1209 m_K8 | m_GENERIC64 | m_AMDFAM10,
1210
1211 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1212 machines. */
1213 m_K8 | m_GENERIC64 | m_AMDFAM10,
1214
1215 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1216 than a MOV. */
1217 m_PENT,
1218
1219 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1220 but one byte longer. */
1221 m_PENT,
1222
1223 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1224 operand that cannot be represented using a modRM byte. The XOR
1225 replacement is long decoded, so this split helps here as well. */
1226 m_K6,
1227 };
1228
1229 /* Feature tests against the various architecture variations. */
1230 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1231 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1232 ~(m_386 | m_486 | m_PENT | m_K6),
1233
1234 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1235 ~m_386,
1236
1237 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1238 ~(m_386 | m_486),
1239
1240 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1241 ~m_386,
1242
1243 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1244 ~m_386,
1245 };
1246
1247 static const unsigned int x86_accumulate_outgoing_args
1248 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1249
1250 static const unsigned int x86_arch_always_fancy_math_387
1251 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1252 | m_NOCONA | m_CORE2 | m_GENERIC;
1253
1254 static enum stringop_alg stringop_alg = no_stringop;
1255
1256 /* In case the average insn count for single function invocation is
1257 lower than this constant, emit fast (but longer) prologue and
1258 epilogue code. */
1259 #define FAST_PROLOGUE_INSN_COUNT 20
1260
1261 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1262 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1263 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1264 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1265
1266 /* Array of the smallest class containing reg number REGNO, indexed by
1267 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1268
1269 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1270 {
1271 /* ax, dx, cx, bx */
1272 AREG, DREG, CREG, BREG,
1273 /* si, di, bp, sp */
1274 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1275 /* FP registers */
1276 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1277 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1278 /* arg pointer */
1279 NON_Q_REGS,
1280 /* flags, fpsr, fpcr, frame */
1281 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1282 /* SSE registers */
1283 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1284 SSE_REGS, SSE_REGS,
1285 /* MMX registers */
1286 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1287 MMX_REGS, MMX_REGS,
1288 /* REX registers */
1289 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1290 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1291 /* SSE REX registers */
1292 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1293 SSE_REGS, SSE_REGS,
1294 };
1295
1296 /* The "default" register map used in 32bit mode. */
1297
1298 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1299 {
1300 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1301 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1302 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1303 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1304 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1305 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1306 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1307 };
1308
1309 static int const x86_64_int_parameter_registers[6] =
1310 {
1311 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1312 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1313 };
1314
1315 static int const x86_64_ms_abi_int_parameter_registers[4] =
1316 {
1317 2 /*RCX*/, 1 /*RDX*/,
1318 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1319 };
1320
1321 static int const x86_64_int_return_registers[4] =
1322 {
1323 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1324 };
1325
1326 /* The "default" register map used in 64bit mode. */
1327 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1328 {
1329 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1330 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1331 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1332 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1333 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1334 8,9,10,11,12,13,14,15, /* extended integer registers */
1335 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1336 };
1337
1338 /* Define the register numbers to be used in Dwarf debugging information.
1339 The SVR4 reference port C compiler uses the following register numbers
1340 in its Dwarf output code:
1341 0 for %eax (gcc regno = 0)
1342 1 for %ecx (gcc regno = 2)
1343 2 for %edx (gcc regno = 1)
1344 3 for %ebx (gcc regno = 3)
1345 4 for %esp (gcc regno = 7)
1346 5 for %ebp (gcc regno = 6)
1347 6 for %esi (gcc regno = 4)
1348 7 for %edi (gcc regno = 5)
1349 The following three DWARF register numbers are never generated by
1350 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1351 believes these numbers have these meanings.
1352 8 for %eip (no gcc equivalent)
1353 9 for %eflags (gcc regno = 17)
1354 10 for %trapno (no gcc equivalent)
1355 It is not at all clear how we should number the FP stack registers
1356 for the x86 architecture. If the version of SDB on x86/svr4 were
1357 a bit less brain dead with respect to floating-point then we would
1358 have a precedent to follow with respect to DWARF register numbers
1359 for x86 FP registers, but the SDB on x86/svr4 is so completely
1360 broken with respect to FP registers that it is hardly worth thinking
1361 of it as something to strive for compatibility with.
1362 The version of x86/svr4 SDB I have at the moment does (partially)
1363 seem to believe that DWARF register number 11 is associated with
1364 the x86 register %st(0), but that's about all. Higher DWARF
1365 register numbers don't seem to be associated with anything in
1366 particular, and even for DWARF regno 11, SDB only seems to under-
1367 stand that it should say that a variable lives in %st(0) (when
1368 asked via an `=' command) if we said it was in DWARF regno 11,
1369 but SDB still prints garbage when asked for the value of the
1370 variable in question (via a `/' command).
1371 (Also note that the labels SDB prints for various FP stack regs
1372 when doing an `x' command are all wrong.)
1373 Note that these problems generally don't affect the native SVR4
1374 C compiler because it doesn't allow the use of -O with -g and
1375 because when it is *not* optimizing, it allocates a memory
1376 location for each floating-point variable, and the memory
1377 location is what gets described in the DWARF AT_location
1378 attribute for the variable in question.
1379 Regardless of the severe mental illness of the x86/svr4 SDB, we
1380 do something sensible here and we use the following DWARF
1381 register numbers. Note that these are all stack-top-relative
1382 numbers.
1383 11 for %st(0) (gcc regno = 8)
1384 12 for %st(1) (gcc regno = 9)
1385 13 for %st(2) (gcc regno = 10)
1386 14 for %st(3) (gcc regno = 11)
1387 15 for %st(4) (gcc regno = 12)
1388 16 for %st(5) (gcc regno = 13)
1389 17 for %st(6) (gcc regno = 14)
1390 18 for %st(7) (gcc regno = 15)
1391 */
1392 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1393 {
1394 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1395 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1396 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1397 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1398 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1399 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1400 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1401 };
1402
1403 /* Test and compare insns in i386.md store the information needed to
1404 generate branch and scc insns here. */
1405
1406 rtx ix86_compare_op0 = NULL_RTX;
1407 rtx ix86_compare_op1 = NULL_RTX;
1408 rtx ix86_compare_emitted = NULL_RTX;
1409
1410 /* Size of the register save area. */
1411 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1412
1413 /* Define the structure for the machine field in struct function. */
1414
1415 struct stack_local_entry GTY(())
1416 {
1417 unsigned short mode;
1418 unsigned short n;
1419 rtx rtl;
1420 struct stack_local_entry *next;
1421 };
1422
1423 /* Structure describing stack frame layout.
1424 Stack grows downward:
1425
1426 [arguments]
1427 <- ARG_POINTER
1428 saved pc
1429
1430 saved frame pointer if frame_pointer_needed
1431 <- HARD_FRAME_POINTER
1432 [saved regs]
1433
1434 [padding1] \
1435 )
1436 [va_arg registers] (
1437 > to_allocate <- FRAME_POINTER
1438 [frame] (
1439 )
1440 [padding2] /
1441 */
1442 struct ix86_frame
1443 {
1444 int nregs;
1445 int padding1;
1446 int va_arg_size;
1447 HOST_WIDE_INT frame;
1448 int padding2;
1449 int outgoing_arguments_size;
1450 int red_zone_size;
1451
1452 HOST_WIDE_INT to_allocate;
1453 /* The offsets relative to ARG_POINTER. */
1454 HOST_WIDE_INT frame_pointer_offset;
1455 HOST_WIDE_INT hard_frame_pointer_offset;
1456 HOST_WIDE_INT stack_pointer_offset;
1457
1458 /* When save_regs_using_mov is set, emit prologue using
1459 move instead of push instructions. */
1460 bool save_regs_using_mov;
1461 };
1462
1463 /* Code model option. */
1464 enum cmodel ix86_cmodel;
1465 /* Asm dialect. */
1466 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1467 /* TLS dialects. */
1468 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1469
1470 /* Which unit we are generating floating point math for. */
1471 enum fpmath_unit ix86_fpmath;
1472
1473 /* Which cpu are we scheduling for. */
1474 enum processor_type ix86_tune;
1475
1476 /* Which instruction set architecture to use. */
1477 enum processor_type ix86_arch;
1478
1479 /* true if sse prefetch instruction is not NOOP. */
1480 int x86_prefetch_sse;
1481
1482 /* ix86_regparm_string as a number */
1483 static int ix86_regparm;
1484
1485 /* -mstackrealign option */
1486 extern int ix86_force_align_arg_pointer;
1487 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1488
1489 /* Preferred alignment for stack boundary in bits. */
1490 unsigned int ix86_preferred_stack_boundary;
1491
1492 /* Values 1-5: see jump.c */
1493 int ix86_branch_cost;
1494
1495 /* Variables which are this size or smaller are put in the data/bss
1496 or ldata/lbss sections. */
1497
1498 int ix86_section_threshold = 65536;
1499
1500 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1501 char internal_label_prefix[16];
1502 int internal_label_prefix_len;
1503
1504 /* Fence to use after loop using movnt. */
1505 tree x86_mfence;
1506
1507 /* Register class used for passing given 64bit part of the argument.
1508 These represent classes as documented by the PS ABI, with the exception
1509 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1510 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1511
1512 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1513 whenever possible (upper half does contain padding). */
1514 enum x86_64_reg_class
1515 {
1516 X86_64_NO_CLASS,
1517 X86_64_INTEGER_CLASS,
1518 X86_64_INTEGERSI_CLASS,
1519 X86_64_SSE_CLASS,
1520 X86_64_SSESF_CLASS,
1521 X86_64_SSEDF_CLASS,
1522 X86_64_SSEUP_CLASS,
1523 X86_64_X87_CLASS,
1524 X86_64_X87UP_CLASS,
1525 X86_64_COMPLEX_X87_CLASS,
1526 X86_64_MEMORY_CLASS
1527 };
1528 static const char * const x86_64_reg_class_name[] =
1529 {
1530 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1531 "sseup", "x87", "x87up", "cplx87", "no"
1532 };
1533
1534 #define MAX_CLASSES 4
1535
1536 /* Table of constants used by fldpi, fldln2, etc.... */
1537 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1538 static bool ext_80387_constants_init = 0;
1539
1540 \f
1541 static struct machine_function * ix86_init_machine_status (void);
1542 static rtx ix86_function_value (tree, tree, bool);
1543 static int ix86_function_regparm (tree, tree);
1544 static void ix86_compute_frame_layout (struct ix86_frame *);
1545 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1546 rtx, rtx, int);
1547
1548 \f
1549 /* The svr4 ABI for the i386 says that records and unions are returned
1550 in memory. */
1551 #ifndef DEFAULT_PCC_STRUCT_RETURN
1552 #define DEFAULT_PCC_STRUCT_RETURN 1
1553 #endif
1554
1555 /* Bit flags that specify the ISA we are compiling for. */
1556 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1557
1558 /* A mask of ix86_isa_flags that includes bit X if X
1559 was set or cleared on the command line. */
1560 static int ix86_isa_flags_explicit;
1561
1562 /* Define a set of ISAs which aren't available for a given ISA. MMX
1563 and SSE ISAs are handled separately. */
1564
1565 #define OPTION_MASK_ISA_MMX_UNSET \
1566 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1567 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1568
1569 #define OPTION_MASK_ISA_SSE_UNSET \
1570 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1571 #define OPTION_MASK_ISA_SSE2_UNSET \
1572 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1573 #define OPTION_MASK_ISA_SSE3_UNSET \
1574 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1575 #define OPTION_MASK_ISA_SSSE3_UNSET \
1576 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1577 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1578 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1579 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1580
1581 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1582 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1583 #define OPTION_MASK_ISA_SSE4 \
1584 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1585 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1586
1587 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1588
1589 /* Implement TARGET_HANDLE_OPTION. */
1590
1591 static bool
1592 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1593 {
1594 switch (code)
1595 {
1596 case OPT_mmmx:
1597 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1598 if (!value)
1599 {
1600 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1601 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1602 }
1603 return true;
1604
1605 case OPT_m3dnow:
1606 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1607 if (!value)
1608 {
1609 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1610 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1611 }
1612 return true;
1613
1614 case OPT_m3dnowa:
1615 return false;
1616
1617 case OPT_msse:
1618 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1619 if (!value)
1620 {
1621 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1622 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1623 }
1624 return true;
1625
1626 case OPT_msse2:
1627 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1628 if (!value)
1629 {
1630 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1631 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1632 }
1633 return true;
1634
1635 case OPT_msse3:
1636 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1637 if (!value)
1638 {
1639 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1640 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1641 }
1642 return true;
1643
1644 case OPT_mssse3:
1645 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1646 if (!value)
1647 {
1648 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1649 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1650 }
1651 return true;
1652
1653 case OPT_msse4_1:
1654 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1655 if (!value)
1656 {
1657 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1658 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1659 }
1660 return true;
1661
1662 case OPT_msse4_2:
1663 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1664 if (!value)
1665 {
1666 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1667 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1668 }
1669 return true;
1670
1671 case OPT_msse4:
1672 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1673 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1674 return true;
1675
1676 case OPT_mno_sse4:
1677 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1678 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1679 return true;
1680
1681 case OPT_msse4a:
1682 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1683 if (!value)
1684 {
1685 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1686 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1687 }
1688 return true;
1689
1690 default:
1691 return true;
1692 }
1693 }
1694
1695 /* Sometimes certain combinations of command options do not make
1696 sense on a particular target machine. You can define a macro
1697 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1698 defined, is executed once just after all the command options have
1699 been parsed.
1700
1701 Don't use this macro to turn on various extra optimizations for
1702 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1703
1704 void
1705 override_options (void)
1706 {
1707 int i;
1708 int ix86_tune_defaulted = 0;
1709 int ix86_arch_specified = 0;
1710 unsigned int ix86_arch_mask, ix86_tune_mask;
1711
1712 /* Comes from final.c -- no real reason to change it. */
1713 #define MAX_CODE_ALIGN 16
1714
1715 static struct ptt
1716 {
1717 const struct processor_costs *cost; /* Processor costs */
1718 const int align_loop; /* Default alignments. */
1719 const int align_loop_max_skip;
1720 const int align_jump;
1721 const int align_jump_max_skip;
1722 const int align_func;
1723 }
1724 const processor_target_table[PROCESSOR_max] =
1725 {
1726 {&i386_cost, 4, 3, 4, 3, 4},
1727 {&i486_cost, 16, 15, 16, 15, 16},
1728 {&pentium_cost, 16, 7, 16, 7, 16},
1729 {&pentiumpro_cost, 16, 15, 16, 7, 16},
1730 {&geode_cost, 0, 0, 0, 0, 0},
1731 {&k6_cost, 32, 7, 32, 7, 32},
1732 {&athlon_cost, 16, 7, 16, 7, 16},
1733 {&pentium4_cost, 0, 0, 0, 0, 0},
1734 {&k8_cost, 16, 7, 16, 7, 16},
1735 {&nocona_cost, 0, 0, 0, 0, 0},
1736 {&core2_cost, 16, 7, 16, 7, 16},
1737 {&generic32_cost, 16, 7, 16, 7, 16},
1738 {&generic64_cost, 16, 7, 16, 7, 16},
1739 {&amdfam10_cost, 32, 24, 32, 7, 32}
1740 };
1741
1742 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1743 enum pta_flags
1744 {
1745 PTA_SSE = 1 << 0,
1746 PTA_SSE2 = 1 << 1,
1747 PTA_SSE3 = 1 << 2,
1748 PTA_MMX = 1 << 3,
1749 PTA_PREFETCH_SSE = 1 << 4,
1750 PTA_3DNOW = 1 << 5,
1751 PTA_3DNOW_A = 1 << 6,
1752 PTA_64BIT = 1 << 7,
1753 PTA_SSSE3 = 1 << 8,
1754 PTA_CX16 = 1 << 9,
1755 PTA_POPCNT = 1 << 10,
1756 PTA_ABM = 1 << 11,
1757 PTA_SSE4A = 1 << 12,
1758 PTA_NO_SAHF = 1 << 13,
1759 PTA_SSE4_1 = 1 << 14,
1760 PTA_SSE4_2 = 1 << 15
1761 };
1762
1763 static struct pta
1764 {
1765 const char *const name; /* processor name or nickname. */
1766 const enum processor_type processor;
1767 const unsigned /*enum pta_flags*/ flags;
1768 }
1769 const processor_alias_table[] =
1770 {
1771 {"i386", PROCESSOR_I386, 0},
1772 {"i486", PROCESSOR_I486, 0},
1773 {"i586", PROCESSOR_PENTIUM, 0},
1774 {"pentium", PROCESSOR_PENTIUM, 0},
1775 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1776 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1777 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1778 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1779 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1780 {"i686", PROCESSOR_PENTIUMPRO, 0},
1781 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1782 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1783 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1784 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1785 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
1786 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
1787 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
1788 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
1789 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
1790 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1791 | PTA_CX16 | PTA_NO_SAHF)},
1792 {"core2", PROCESSOR_CORE2, (PTA_64BIT
1793 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1794 | PTA_SSSE3
1795 | PTA_CX16)},
1796 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1797 |PTA_PREFETCH_SSE)},
1798 {"k6", PROCESSOR_K6, PTA_MMX},
1799 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1800 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1801 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1802 | PTA_PREFETCH_SSE)},
1803 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1804 | PTA_PREFETCH_SSE)},
1805 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1806 | PTA_SSE)},
1807 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1808 | PTA_SSE)},
1809 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1810 | PTA_SSE)},
1811 {"x86-64", PROCESSOR_K8, (PTA_64BIT
1812 | PTA_MMX | PTA_SSE | PTA_SSE2
1813 | PTA_NO_SAHF)},
1814 {"k8", PROCESSOR_K8, (PTA_64BIT
1815 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1816 | PTA_SSE | PTA_SSE2
1817 | PTA_NO_SAHF)},
1818 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
1819 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1820 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1821 | PTA_NO_SAHF)},
1822 {"opteron", PROCESSOR_K8, (PTA_64BIT
1823 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1824 | PTA_SSE | PTA_SSE2
1825 | PTA_NO_SAHF)},
1826 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
1827 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1828 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1829 | PTA_NO_SAHF)},
1830 {"athlon64", PROCESSOR_K8, (PTA_64BIT
1831 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1832 | PTA_SSE | PTA_SSE2
1833 | PTA_NO_SAHF)},
1834 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
1835 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1836 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1837 | PTA_NO_SAHF)},
1838 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
1839 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1840 | PTA_SSE | PTA_SSE2
1841 | PTA_NO_SAHF)},
1842 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
1843 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1844 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1845 | PTA_SSE4A
1846 | PTA_CX16 | PTA_ABM)},
1847 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
1848 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1849 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1850 | PTA_SSE4A
1851 | PTA_CX16 | PTA_ABM)},
1852 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1853 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1854 };
1855
1856 int const pta_size = ARRAY_SIZE (processor_alias_table);
1857
1858 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1859 SUBTARGET_OVERRIDE_OPTIONS;
1860 #endif
1861
1862 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1863 SUBSUBTARGET_OVERRIDE_OPTIONS;
1864 #endif
1865
1866 /* -fPIC is the default for x86_64. */
1867 if (TARGET_MACHO && TARGET_64BIT)
1868 flag_pic = 2;
1869
1870 /* Set the default values for switches whose default depends on TARGET_64BIT
1871 in case they weren't overwritten by command line options. */
1872 if (TARGET_64BIT)
1873 {
1874 /* Mach-O doesn't support omitting the frame pointer for now. */
1875 if (flag_omit_frame_pointer == 2)
1876 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1877 if (flag_asynchronous_unwind_tables == 2)
1878 flag_asynchronous_unwind_tables = 1;
1879 if (flag_pcc_struct_return == 2)
1880 flag_pcc_struct_return = 0;
1881 }
1882 else
1883 {
1884 if (flag_omit_frame_pointer == 2)
1885 flag_omit_frame_pointer = 0;
1886 if (flag_asynchronous_unwind_tables == 2)
1887 flag_asynchronous_unwind_tables = 0;
1888 if (flag_pcc_struct_return == 2)
1889 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1890 }
1891
1892 /* Need to check -mtune=generic first. */
1893 if (ix86_tune_string)
1894 {
1895 if (!strcmp (ix86_tune_string, "generic")
1896 || !strcmp (ix86_tune_string, "i686")
1897 /* As special support for cross compilers we read -mtune=native
1898 as -mtune=generic. With native compilers we won't see the
1899 -mtune=native, as it was changed by the driver. */
1900 || !strcmp (ix86_tune_string, "native"))
1901 {
1902 if (TARGET_64BIT)
1903 ix86_tune_string = "generic64";
1904 else
1905 ix86_tune_string = "generic32";
1906 }
1907 else if (!strncmp (ix86_tune_string, "generic", 7))
1908 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1909 }
1910 else
1911 {
1912 if (ix86_arch_string)
1913 ix86_tune_string = ix86_arch_string;
1914 if (!ix86_tune_string)
1915 {
1916 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1917 ix86_tune_defaulted = 1;
1918 }
1919
1920 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1921 need to use a sensible tune option. */
1922 if (!strcmp (ix86_tune_string, "generic")
1923 || !strcmp (ix86_tune_string, "x86-64")
1924 || !strcmp (ix86_tune_string, "i686"))
1925 {
1926 if (TARGET_64BIT)
1927 ix86_tune_string = "generic64";
1928 else
1929 ix86_tune_string = "generic32";
1930 }
1931 }
1932 if (ix86_stringop_string)
1933 {
1934 if (!strcmp (ix86_stringop_string, "rep_byte"))
1935 stringop_alg = rep_prefix_1_byte;
1936 else if (!strcmp (ix86_stringop_string, "libcall"))
1937 stringop_alg = libcall;
1938 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1939 stringop_alg = rep_prefix_4_byte;
1940 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1941 stringop_alg = rep_prefix_8_byte;
1942 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1943 stringop_alg = loop_1_byte;
1944 else if (!strcmp (ix86_stringop_string, "loop"))
1945 stringop_alg = loop;
1946 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1947 stringop_alg = unrolled_loop;
1948 else
1949 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1950 }
1951 if (!strcmp (ix86_tune_string, "x86-64"))
1952 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1953 "-mtune=generic instead as appropriate.");
1954
1955 if (!ix86_arch_string)
1956 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1957 else
1958 ix86_arch_specified = 1;
1959
1960 if (!strcmp (ix86_arch_string, "generic"))
1961 error ("generic CPU can be used only for -mtune= switch");
1962 if (!strncmp (ix86_arch_string, "generic", 7))
1963 error ("bad value (%s) for -march= switch", ix86_arch_string);
1964
1965 if (ix86_cmodel_string != 0)
1966 {
1967 if (!strcmp (ix86_cmodel_string, "small"))
1968 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1969 else if (!strcmp (ix86_cmodel_string, "medium"))
1970 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1971 else if (!strcmp (ix86_cmodel_string, "large"))
1972 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1973 else if (flag_pic)
1974 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1975 else if (!strcmp (ix86_cmodel_string, "32"))
1976 ix86_cmodel = CM_32;
1977 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1978 ix86_cmodel = CM_KERNEL;
1979 else
1980 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1981 }
1982 else
1983 {
1984 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1985 use of rip-relative addressing. This eliminates fixups that
1986 would otherwise be needed if this object is to be placed in a
1987 DLL, and is essentially just as efficient as direct addressing. */
1988 if (TARGET_64BIT_MS_ABI)
1989 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
1990 else if (TARGET_64BIT)
1991 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1992 else
1993 ix86_cmodel = CM_32;
1994 }
1995 if (ix86_asm_string != 0)
1996 {
1997 if (! TARGET_MACHO
1998 && !strcmp (ix86_asm_string, "intel"))
1999 ix86_asm_dialect = ASM_INTEL;
2000 else if (!strcmp (ix86_asm_string, "att"))
2001 ix86_asm_dialect = ASM_ATT;
2002 else
2003 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2004 }
2005 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2006 error ("code model %qs not supported in the %s bit mode",
2007 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2008 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2009 sorry ("%i-bit mode not compiled in",
2010 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2011
2012 for (i = 0; i < pta_size; i++)
2013 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2014 {
2015 ix86_arch = processor_alias_table[i].processor;
2016 /* Default cpu tuning to the architecture. */
2017 ix86_tune = ix86_arch;
2018
2019 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2020 error ("CPU you selected does not support x86-64 "
2021 "instruction set");
2022
2023 if (processor_alias_table[i].flags & PTA_MMX
2024 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2025 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2026 if (processor_alias_table[i].flags & PTA_3DNOW
2027 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2028 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2029 if (processor_alias_table[i].flags & PTA_3DNOW_A
2030 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2031 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2032 if (processor_alias_table[i].flags & PTA_SSE
2033 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2034 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2035 if (processor_alias_table[i].flags & PTA_SSE2
2036 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2037 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2038 if (processor_alias_table[i].flags & PTA_SSE3
2039 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2040 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2041 if (processor_alias_table[i].flags & PTA_SSSE3
2042 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2043 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2044 if (processor_alias_table[i].flags & PTA_SSE4_1
2045 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2046 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2047 if (processor_alias_table[i].flags & PTA_SSE4_2
2048 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2049 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2050 if (processor_alias_table[i].flags & PTA_SSE4A
2051 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2052 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2053
2054 if (processor_alias_table[i].flags & PTA_ABM)
2055 x86_abm = true;
2056 if (processor_alias_table[i].flags & PTA_CX16)
2057 x86_cmpxchg16b = true;
2058 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2059 x86_popcnt = true;
2060 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2061 x86_prefetch_sse = true;
2062 if ((processor_alias_table[i].flags & PTA_NO_SAHF) && !TARGET_64BIT)
2063 x86_sahf = true;
2064
2065 break;
2066 }
2067
2068 if (i == pta_size)
2069 error ("bad value (%s) for -march= switch", ix86_arch_string);
2070
2071 ix86_arch_mask = 1u << ix86_arch;
2072 for (i = 0; i < X86_ARCH_LAST; ++i)
2073 ix86_arch_features[i] &= ix86_arch_mask;
2074
2075 for (i = 0; i < pta_size; i++)
2076 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2077 {
2078 ix86_tune = processor_alias_table[i].processor;
2079 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2080 {
2081 if (ix86_tune_defaulted)
2082 {
2083 ix86_tune_string = "x86-64";
2084 for (i = 0; i < pta_size; i++)
2085 if (! strcmp (ix86_tune_string,
2086 processor_alias_table[i].name))
2087 break;
2088 ix86_tune = processor_alias_table[i].processor;
2089 }
2090 else
2091 error ("CPU you selected does not support x86-64 "
2092 "instruction set");
2093 }
2094 /* Intel CPUs have always interpreted SSE prefetch instructions as
2095 NOPs; so, we can enable SSE prefetch instructions even when
2096 -mtune (rather than -march) points us to a processor that has them.
2097 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2098 higher processors. */
2099 if (TARGET_CMOVE
2100 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2101 x86_prefetch_sse = true;
2102 break;
2103 }
2104 if (i == pta_size)
2105 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2106
2107 ix86_tune_mask = 1u << ix86_tune;
2108 for (i = 0; i < X86_TUNE_LAST; ++i)
2109 ix86_tune_features[i] &= ix86_tune_mask;
2110
2111 if (optimize_size)
2112 ix86_cost = &size_cost;
2113 else
2114 ix86_cost = processor_target_table[ix86_tune].cost;
2115
2116 /* Arrange to set up i386_stack_locals for all functions. */
2117 init_machine_status = ix86_init_machine_status;
2118
2119 /* Validate -mregparm= value. */
2120 if (ix86_regparm_string)
2121 {
2122 if (TARGET_64BIT)
2123 warning (0, "-mregparm is ignored in 64-bit mode");
2124 i = atoi (ix86_regparm_string);
2125 if (i < 0 || i > REGPARM_MAX)
2126 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2127 else
2128 ix86_regparm = i;
2129 }
2130 if (TARGET_64BIT)
2131 ix86_regparm = REGPARM_MAX;
2132
2133 /* If the user has provided any of the -malign-* options,
2134 warn and use that value only if -falign-* is not set.
2135 Remove this code in GCC 3.2 or later. */
2136 if (ix86_align_loops_string)
2137 {
2138 warning (0, "-malign-loops is obsolete, use -falign-loops");
2139 if (align_loops == 0)
2140 {
2141 i = atoi (ix86_align_loops_string);
2142 if (i < 0 || i > MAX_CODE_ALIGN)
2143 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2144 else
2145 align_loops = 1 << i;
2146 }
2147 }
2148
2149 if (ix86_align_jumps_string)
2150 {
2151 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2152 if (align_jumps == 0)
2153 {
2154 i = atoi (ix86_align_jumps_string);
2155 if (i < 0 || i > MAX_CODE_ALIGN)
2156 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2157 else
2158 align_jumps = 1 << i;
2159 }
2160 }
2161
2162 if (ix86_align_funcs_string)
2163 {
2164 warning (0, "-malign-functions is obsolete, use -falign-functions");
2165 if (align_functions == 0)
2166 {
2167 i = atoi (ix86_align_funcs_string);
2168 if (i < 0 || i > MAX_CODE_ALIGN)
2169 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2170 else
2171 align_functions = 1 << i;
2172 }
2173 }
2174
2175 /* Default align_* from the processor table. */
2176 if (align_loops == 0)
2177 {
2178 align_loops = processor_target_table[ix86_tune].align_loop;
2179 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2180 }
2181 if (align_jumps == 0)
2182 {
2183 align_jumps = processor_target_table[ix86_tune].align_jump;
2184 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2185 }
2186 if (align_functions == 0)
2187 {
2188 align_functions = processor_target_table[ix86_tune].align_func;
2189 }
2190
2191 /* Validate -mbranch-cost= value, or provide default. */
2192 ix86_branch_cost = ix86_cost->branch_cost;
2193 if (ix86_branch_cost_string)
2194 {
2195 i = atoi (ix86_branch_cost_string);
2196 if (i < 0 || i > 5)
2197 error ("-mbranch-cost=%d is not between 0 and 5", i);
2198 else
2199 ix86_branch_cost = i;
2200 }
2201 if (ix86_section_threshold_string)
2202 {
2203 i = atoi (ix86_section_threshold_string);
2204 if (i < 0)
2205 error ("-mlarge-data-threshold=%d is negative", i);
2206 else
2207 ix86_section_threshold = i;
2208 }
2209
2210 if (ix86_tls_dialect_string)
2211 {
2212 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2213 ix86_tls_dialect = TLS_DIALECT_GNU;
2214 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2215 ix86_tls_dialect = TLS_DIALECT_GNU2;
2216 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2217 ix86_tls_dialect = TLS_DIALECT_SUN;
2218 else
2219 error ("bad value (%s) for -mtls-dialect= switch",
2220 ix86_tls_dialect_string);
2221 }
2222
2223 if (ix87_precision_string)
2224 {
2225 i = atoi (ix87_precision_string);
2226 if (i != 32 && i != 64 && i != 80)
2227 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2228 }
2229
2230 if (TARGET_64BIT)
2231 {
2232 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2233
2234 /* Enable by default the SSE and MMX builtins. Do allow the user to
2235 explicitly disable any of these. In particular, disabling SSE and
2236 MMX for kernel code is extremely useful. */
2237 if (!ix86_arch_specified)
2238 ix86_isa_flags
2239 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2240 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2241
2242 if (TARGET_RTD)
2243 warning (0, "-mrtd is ignored in 64bit mode");
2244 }
2245 else
2246 {
2247 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2248
2249 if (!ix86_arch_specified)
2250 ix86_isa_flags
2251 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2252
2253 /* i386 ABI does not specify red zone. It still makes sense to use it
2254 when programmer takes care to stack from being destroyed. */
2255 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2256 target_flags |= MASK_NO_RED_ZONE;
2257 }
2258
2259 /* Keep nonleaf frame pointers. */
2260 if (flag_omit_frame_pointer)
2261 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2262 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2263 flag_omit_frame_pointer = 1;
2264
2265 /* If we're doing fast math, we don't care about comparison order
2266 wrt NaNs. This lets us use a shorter comparison sequence. */
2267 if (flag_finite_math_only)
2268 target_flags &= ~MASK_IEEE_FP;
2269
2270 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2271 since the insns won't need emulation. */
2272 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2273 target_flags &= ~MASK_NO_FANCY_MATH_387;
2274
2275 /* Likewise, if the target doesn't have a 387, or we've specified
2276 software floating point, don't use 387 inline intrinsics. */
2277 if (!TARGET_80387)
2278 target_flags |= MASK_NO_FANCY_MATH_387;
2279
2280 /* Turn on SSE4.1 builtins for -msse4.2. */
2281 if (TARGET_SSE4_2)
2282 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2283
2284 /* Turn on SSSE3 builtins for -msse4.1. */
2285 if (TARGET_SSE4_1)
2286 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2287
2288 /* Turn on SSE3 builtins for -mssse3. */
2289 if (TARGET_SSSE3)
2290 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2291
2292 /* Turn on SSE3 builtins for -msse4a. */
2293 if (TARGET_SSE4A)
2294 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2295
2296 /* Turn on SSE2 builtins for -msse3. */
2297 if (TARGET_SSE3)
2298 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2299
2300 /* Turn on SSE builtins for -msse2. */
2301 if (TARGET_SSE2)
2302 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2303
2304 /* Turn on MMX builtins for -msse. */
2305 if (TARGET_SSE)
2306 {
2307 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2308 x86_prefetch_sse = true;
2309 }
2310
2311 /* Turn on MMX builtins for 3Dnow. */
2312 if (TARGET_3DNOW)
2313 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2314
2315 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2316 if (TARGET_SSE4_2 || TARGET_ABM)
2317 x86_popcnt = true;
2318
2319 /* Validate -mpreferred-stack-boundary= value, or provide default.
2320 The default of 128 bits is for Pentium III's SSE __m128. We can't
2321 change it because of optimize_size. Otherwise, we can't mix object
2322 files compiled with -Os and -On. */
2323 ix86_preferred_stack_boundary = 128;
2324 if (ix86_preferred_stack_boundary_string)
2325 {
2326 i = atoi (ix86_preferred_stack_boundary_string);
2327 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2328 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2329 TARGET_64BIT ? 4 : 2);
2330 else
2331 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2332 }
2333
2334 /* Accept -msseregparm only if at least SSE support is enabled. */
2335 if (TARGET_SSEREGPARM
2336 && ! TARGET_SSE)
2337 error ("-msseregparm used without SSE enabled");
2338
2339 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2340 if (ix86_fpmath_string != 0)
2341 {
2342 if (! strcmp (ix86_fpmath_string, "387"))
2343 ix86_fpmath = FPMATH_387;
2344 else if (! strcmp (ix86_fpmath_string, "sse"))
2345 {
2346 if (!TARGET_SSE)
2347 {
2348 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2349 ix86_fpmath = FPMATH_387;
2350 }
2351 else
2352 ix86_fpmath = FPMATH_SSE;
2353 }
2354 else if (! strcmp (ix86_fpmath_string, "387,sse")
2355 || ! strcmp (ix86_fpmath_string, "sse,387"))
2356 {
2357 if (!TARGET_SSE)
2358 {
2359 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2360 ix86_fpmath = FPMATH_387;
2361 }
2362 else if (!TARGET_80387)
2363 {
2364 warning (0, "387 instruction set disabled, using SSE arithmetics");
2365 ix86_fpmath = FPMATH_SSE;
2366 }
2367 else
2368 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2369 }
2370 else
2371 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2372 }
2373
2374 /* If the i387 is disabled, then do not return values in it. */
2375 if (!TARGET_80387)
2376 target_flags &= ~MASK_FLOAT_RETURNS;
2377
2378 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2379 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2380 && !optimize_size)
2381 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2382
2383 /* ??? Unwind info is not correct around the CFG unless either a frame
2384 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2385 unwind info generation to be aware of the CFG and propagating states
2386 around edges. */
2387 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2388 || flag_exceptions || flag_non_call_exceptions)
2389 && flag_omit_frame_pointer
2390 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2391 {
2392 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2393 warning (0, "unwind tables currently require either a frame pointer "
2394 "or -maccumulate-outgoing-args for correctness");
2395 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2396 }
2397
2398 /* For sane SSE instruction set generation we need fcomi instruction.
2399 It is safe to enable all CMOVE instructions. */
2400 if (TARGET_SSE)
2401 TARGET_CMOVE = 1;
2402
2403 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2404 {
2405 char *p;
2406 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2407 p = strchr (internal_label_prefix, 'X');
2408 internal_label_prefix_len = p - internal_label_prefix;
2409 *p = '\0';
2410 }
2411
2412 /* When scheduling description is not available, disable scheduler pass
2413 so it won't slow down the compilation and make x87 code slower. */
2414 if (!TARGET_SCHEDULE)
2415 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2416
2417 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2418 set_param_value ("simultaneous-prefetches",
2419 ix86_cost->simultaneous_prefetches);
2420 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2421 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2422 }
2423 \f
2424 /* Return true if this goes in large data/bss. */
2425
2426 static bool
2427 ix86_in_large_data_p (tree exp)
2428 {
2429 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2430 return false;
2431
2432 /* Functions are never large data. */
2433 if (TREE_CODE (exp) == FUNCTION_DECL)
2434 return false;
2435
2436 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2437 {
2438 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2439 if (strcmp (section, ".ldata") == 0
2440 || strcmp (section, ".lbss") == 0)
2441 return true;
2442 return false;
2443 }
2444 else
2445 {
2446 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2447
2448 /* If this is an incomplete type with size 0, then we can't put it
2449 in data because it might be too big when completed. */
2450 if (!size || size > ix86_section_threshold)
2451 return true;
2452 }
2453
2454 return false;
2455 }
2456
2457 /* Switch to the appropriate section for output of DECL.
2458 DECL is either a `VAR_DECL' node or a constant of some sort.
2459 RELOC indicates whether forming the initial value of DECL requires
2460 link-time relocations. */
2461
2462 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2463 ATTRIBUTE_UNUSED;
2464
2465 static section *
2466 x86_64_elf_select_section (tree decl, int reloc,
2467 unsigned HOST_WIDE_INT align)
2468 {
2469 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2470 && ix86_in_large_data_p (decl))
2471 {
2472 const char *sname = NULL;
2473 unsigned int flags = SECTION_WRITE;
2474 switch (categorize_decl_for_section (decl, reloc))
2475 {
2476 case SECCAT_DATA:
2477 sname = ".ldata";
2478 break;
2479 case SECCAT_DATA_REL:
2480 sname = ".ldata.rel";
2481 break;
2482 case SECCAT_DATA_REL_LOCAL:
2483 sname = ".ldata.rel.local";
2484 break;
2485 case SECCAT_DATA_REL_RO:
2486 sname = ".ldata.rel.ro";
2487 break;
2488 case SECCAT_DATA_REL_RO_LOCAL:
2489 sname = ".ldata.rel.ro.local";
2490 break;
2491 case SECCAT_BSS:
2492 sname = ".lbss";
2493 flags |= SECTION_BSS;
2494 break;
2495 case SECCAT_RODATA:
2496 case SECCAT_RODATA_MERGE_STR:
2497 case SECCAT_RODATA_MERGE_STR_INIT:
2498 case SECCAT_RODATA_MERGE_CONST:
2499 sname = ".lrodata";
2500 flags = 0;
2501 break;
2502 case SECCAT_SRODATA:
2503 case SECCAT_SDATA:
2504 case SECCAT_SBSS:
2505 gcc_unreachable ();
2506 case SECCAT_TEXT:
2507 case SECCAT_TDATA:
2508 case SECCAT_TBSS:
2509 /* We don't split these for medium model. Place them into
2510 default sections and hope for best. */
2511 break;
2512 }
2513 if (sname)
2514 {
2515 /* We might get called with string constants, but get_named_section
2516 doesn't like them as they are not DECLs. Also, we need to set
2517 flags in that case. */
2518 if (!DECL_P (decl))
2519 return get_section (sname, flags, NULL);
2520 return get_named_section (decl, sname, reloc);
2521 }
2522 }
2523 return default_elf_select_section (decl, reloc, align);
2524 }
2525
2526 /* Build up a unique section name, expressed as a
2527 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2528 RELOC indicates whether the initial value of EXP requires
2529 link-time relocations. */
2530
2531 static void ATTRIBUTE_UNUSED
2532 x86_64_elf_unique_section (tree decl, int reloc)
2533 {
2534 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2535 && ix86_in_large_data_p (decl))
2536 {
2537 const char *prefix = NULL;
2538 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2539 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2540
2541 switch (categorize_decl_for_section (decl, reloc))
2542 {
2543 case SECCAT_DATA:
2544 case SECCAT_DATA_REL:
2545 case SECCAT_DATA_REL_LOCAL:
2546 case SECCAT_DATA_REL_RO:
2547 case SECCAT_DATA_REL_RO_LOCAL:
2548 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2549 break;
2550 case SECCAT_BSS:
2551 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2552 break;
2553 case SECCAT_RODATA:
2554 case SECCAT_RODATA_MERGE_STR:
2555 case SECCAT_RODATA_MERGE_STR_INIT:
2556 case SECCAT_RODATA_MERGE_CONST:
2557 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2558 break;
2559 case SECCAT_SRODATA:
2560 case SECCAT_SDATA:
2561 case SECCAT_SBSS:
2562 gcc_unreachable ();
2563 case SECCAT_TEXT:
2564 case SECCAT_TDATA:
2565 case SECCAT_TBSS:
2566 /* We don't split these for medium model. Place them into
2567 default sections and hope for best. */
2568 break;
2569 }
2570 if (prefix)
2571 {
2572 const char *name;
2573 size_t nlen, plen;
2574 char *string;
2575 plen = strlen (prefix);
2576
2577 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2578 name = targetm.strip_name_encoding (name);
2579 nlen = strlen (name);
2580
2581 string = (char *) alloca (nlen + plen + 1);
2582 memcpy (string, prefix, plen);
2583 memcpy (string + plen, name, nlen + 1);
2584
2585 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2586 return;
2587 }
2588 }
2589 default_unique_section (decl, reloc);
2590 }
2591
2592 #ifdef COMMON_ASM_OP
2593 /* This says how to output assembler code to declare an
2594 uninitialized external linkage data object.
2595
2596 For medium model x86-64 we need to use .largecomm opcode for
2597 large objects. */
2598 void
2599 x86_elf_aligned_common (FILE *file,
2600 const char *name, unsigned HOST_WIDE_INT size,
2601 int align)
2602 {
2603 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2604 && size > (unsigned int)ix86_section_threshold)
2605 fprintf (file, ".largecomm\t");
2606 else
2607 fprintf (file, "%s", COMMON_ASM_OP);
2608 assemble_name (file, name);
2609 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2610 size, align / BITS_PER_UNIT);
2611 }
2612 #endif
2613
2614 /* Utility function for targets to use in implementing
2615 ASM_OUTPUT_ALIGNED_BSS. */
2616
2617 void
2618 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2619 const char *name, unsigned HOST_WIDE_INT size,
2620 int align)
2621 {
2622 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2623 && size > (unsigned int)ix86_section_threshold)
2624 switch_to_section (get_named_section (decl, ".lbss", 0));
2625 else
2626 switch_to_section (bss_section);
2627 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2628 #ifdef ASM_DECLARE_OBJECT_NAME
2629 last_assemble_variable_decl = decl;
2630 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2631 #else
2632 /* Standard thing is just output label for the object. */
2633 ASM_OUTPUT_LABEL (file, name);
2634 #endif /* ASM_DECLARE_OBJECT_NAME */
2635 ASM_OUTPUT_SKIP (file, size ? size : 1);
2636 }
2637 \f
2638 void
2639 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2640 {
2641 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2642 make the problem with not enough registers even worse. */
2643 #ifdef INSN_SCHEDULING
2644 if (level > 1)
2645 flag_schedule_insns = 0;
2646 #endif
2647
2648 if (TARGET_MACHO)
2649 /* The Darwin libraries never set errno, so we might as well
2650 avoid calling them when that's the only reason we would. */
2651 flag_errno_math = 0;
2652
2653 /* The default values of these switches depend on the TARGET_64BIT
2654 that is not known at this moment. Mark these values with 2 and
2655 let user the to override these. In case there is no command line option
2656 specifying them, we will set the defaults in override_options. */
2657 if (optimize >= 1)
2658 flag_omit_frame_pointer = 2;
2659 flag_pcc_struct_return = 2;
2660 flag_asynchronous_unwind_tables = 2;
2661 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2662 SUBTARGET_OPTIMIZATION_OPTIONS;
2663 #endif
2664 }
2665 \f
2666 /* Decide whether we can make a sibling call to a function. DECL is the
2667 declaration of the function being targeted by the call and EXP is the
2668 CALL_EXPR representing the call. */
2669
2670 static bool
2671 ix86_function_ok_for_sibcall (tree decl, tree exp)
2672 {
2673 tree func;
2674 rtx a, b;
2675
2676 /* If we are generating position-independent code, we cannot sibcall
2677 optimize any indirect call, or a direct call to a global function,
2678 as the PLT requires %ebx be live. */
2679 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2680 return false;
2681
2682 if (decl)
2683 func = decl;
2684 else
2685 {
2686 func = TREE_TYPE (CALL_EXPR_FN (exp));
2687 if (POINTER_TYPE_P (func))
2688 func = TREE_TYPE (func);
2689 }
2690
2691 /* Check that the return value locations are the same. Like
2692 if we are returning floats on the 80387 register stack, we cannot
2693 make a sibcall from a function that doesn't return a float to a
2694 function that does or, conversely, from a function that does return
2695 a float to a function that doesn't; the necessary stack adjustment
2696 would not be executed. This is also the place we notice
2697 differences in the return value ABI. Note that it is ok for one
2698 of the functions to have void return type as long as the return
2699 value of the other is passed in a register. */
2700 a = ix86_function_value (TREE_TYPE (exp), func, false);
2701 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2702 cfun->decl, false);
2703 if (STACK_REG_P (a) || STACK_REG_P (b))
2704 {
2705 if (!rtx_equal_p (a, b))
2706 return false;
2707 }
2708 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2709 ;
2710 else if (!rtx_equal_p (a, b))
2711 return false;
2712
2713 /* If this call is indirect, we'll need to be able to use a call-clobbered
2714 register for the address of the target function. Make sure that all
2715 such registers are not used for passing parameters. */
2716 if (!decl && !TARGET_64BIT)
2717 {
2718 tree type;
2719
2720 /* We're looking at the CALL_EXPR, we need the type of the function. */
2721 type = CALL_EXPR_FN (exp); /* pointer expression */
2722 type = TREE_TYPE (type); /* pointer type */
2723 type = TREE_TYPE (type); /* function type */
2724
2725 if (ix86_function_regparm (type, NULL) >= 3)
2726 {
2727 /* ??? Need to count the actual number of registers to be used,
2728 not the possible number of registers. Fix later. */
2729 return false;
2730 }
2731 }
2732
2733 /* Dllimport'd functions are also called indirectly. */
2734 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2735 && decl && DECL_DLLIMPORT_P (decl)
2736 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2737 return false;
2738
2739 /* If we forced aligned the stack, then sibcalling would unalign the
2740 stack, which may break the called function. */
2741 if (cfun->machine->force_align_arg_pointer)
2742 return false;
2743
2744 /* Otherwise okay. That also includes certain types of indirect calls. */
2745 return true;
2746 }
2747
2748 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2749 calling convention attributes;
2750 arguments as in struct attribute_spec.handler. */
2751
2752 static tree
2753 ix86_handle_cconv_attribute (tree *node, tree name,
2754 tree args,
2755 int flags ATTRIBUTE_UNUSED,
2756 bool *no_add_attrs)
2757 {
2758 if (TREE_CODE (*node) != FUNCTION_TYPE
2759 && TREE_CODE (*node) != METHOD_TYPE
2760 && TREE_CODE (*node) != FIELD_DECL
2761 && TREE_CODE (*node) != TYPE_DECL)
2762 {
2763 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2764 IDENTIFIER_POINTER (name));
2765 *no_add_attrs = true;
2766 return NULL_TREE;
2767 }
2768
2769 /* Can combine regparm with all attributes but fastcall. */
2770 if (is_attribute_p ("regparm", name))
2771 {
2772 tree cst;
2773
2774 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2775 {
2776 error ("fastcall and regparm attributes are not compatible");
2777 }
2778
2779 cst = TREE_VALUE (args);
2780 if (TREE_CODE (cst) != INTEGER_CST)
2781 {
2782 warning (OPT_Wattributes,
2783 "%qs attribute requires an integer constant argument",
2784 IDENTIFIER_POINTER (name));
2785 *no_add_attrs = true;
2786 }
2787 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2788 {
2789 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2790 IDENTIFIER_POINTER (name), REGPARM_MAX);
2791 *no_add_attrs = true;
2792 }
2793
2794 if (!TARGET_64BIT
2795 && lookup_attribute (ix86_force_align_arg_pointer_string,
2796 TYPE_ATTRIBUTES (*node))
2797 && compare_tree_int (cst, REGPARM_MAX-1))
2798 {
2799 error ("%s functions limited to %d register parameters",
2800 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2801 }
2802
2803 return NULL_TREE;
2804 }
2805
2806 if (TARGET_64BIT)
2807 {
2808 /* Do not warn when emulating the MS ABI. */
2809 if (!TARGET_64BIT_MS_ABI)
2810 warning (OPT_Wattributes, "%qs attribute ignored",
2811 IDENTIFIER_POINTER (name));
2812 *no_add_attrs = true;
2813 return NULL_TREE;
2814 }
2815
2816 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2817 if (is_attribute_p ("fastcall", name))
2818 {
2819 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2820 {
2821 error ("fastcall and cdecl attributes are not compatible");
2822 }
2823 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2824 {
2825 error ("fastcall and stdcall attributes are not compatible");
2826 }
2827 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2828 {
2829 error ("fastcall and regparm attributes are not compatible");
2830 }
2831 }
2832
2833 /* Can combine stdcall with fastcall (redundant), regparm and
2834 sseregparm. */
2835 else if (is_attribute_p ("stdcall", name))
2836 {
2837 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2838 {
2839 error ("stdcall and cdecl attributes are not compatible");
2840 }
2841 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2842 {
2843 error ("stdcall and fastcall attributes are not compatible");
2844 }
2845 }
2846
2847 /* Can combine cdecl with regparm and sseregparm. */
2848 else if (is_attribute_p ("cdecl", name))
2849 {
2850 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2851 {
2852 error ("stdcall and cdecl attributes are not compatible");
2853 }
2854 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2855 {
2856 error ("fastcall and cdecl attributes are not compatible");
2857 }
2858 }
2859
2860 /* Can combine sseregparm with all attributes. */
2861
2862 return NULL_TREE;
2863 }
2864
2865 /* Return 0 if the attributes for two types are incompatible, 1 if they
2866 are compatible, and 2 if they are nearly compatible (which causes a
2867 warning to be generated). */
2868
2869 static int
2870 ix86_comp_type_attributes (tree type1, tree type2)
2871 {
2872 /* Check for mismatch of non-default calling convention. */
2873 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2874
2875 if (TREE_CODE (type1) != FUNCTION_TYPE)
2876 return 1;
2877
2878 /* Check for mismatched fastcall/regparm types. */
2879 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2880 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2881 || (ix86_function_regparm (type1, NULL)
2882 != ix86_function_regparm (type2, NULL)))
2883 return 0;
2884
2885 /* Check for mismatched sseregparm types. */
2886 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2887 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2888 return 0;
2889
2890 /* Check for mismatched return types (cdecl vs stdcall). */
2891 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2892 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2893 return 0;
2894
2895 return 1;
2896 }
2897 \f
2898 /* Return the regparm value for a function with the indicated TYPE and DECL.
2899 DECL may be NULL when calling function indirectly
2900 or considering a libcall. */
2901
2902 static int
2903 ix86_function_regparm (tree type, tree decl)
2904 {
2905 tree attr;
2906 int regparm = ix86_regparm;
2907
2908 if (TARGET_64BIT)
2909 return regparm;
2910
2911 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2912 if (attr)
2913 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2914
2915 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2916 return 2;
2917
2918 /* Use register calling convention for local functions when possible. */
2919 if (decl && TREE_CODE (decl) == FUNCTION_DECL
2920 && flag_unit_at_a_time && !profile_flag)
2921 {
2922 struct cgraph_local_info *i = cgraph_local_info (decl);
2923 if (i && i->local)
2924 {
2925 int local_regparm, globals = 0, regno;
2926 struct function *f;
2927
2928 /* Make sure no regparm register is taken by a
2929 global register variable. */
2930 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2931 if (global_regs[local_regparm])
2932 break;
2933
2934 /* We can't use regparm(3) for nested functions as these use
2935 static chain pointer in third argument. */
2936 if (local_regparm == 3
2937 && (decl_function_context (decl)
2938 || ix86_force_align_arg_pointer)
2939 && !DECL_NO_STATIC_CHAIN (decl))
2940 local_regparm = 2;
2941
2942 /* If the function realigns its stackpointer, the prologue will
2943 clobber %ecx. If we've already generated code for the callee,
2944 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2945 scanning the attributes for the self-realigning property. */
2946 f = DECL_STRUCT_FUNCTION (decl);
2947 if (local_regparm == 3
2948 && (f ? !!f->machine->force_align_arg_pointer
2949 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2950 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2951 local_regparm = 2;
2952
2953 /* Each global register variable increases register preassure,
2954 so the more global reg vars there are, the smaller regparm
2955 optimization use, unless requested by the user explicitly. */
2956 for (regno = 0; regno < 6; regno++)
2957 if (global_regs[regno])
2958 globals++;
2959 local_regparm
2960 = globals < local_regparm ? local_regparm - globals : 0;
2961
2962 if (local_regparm > regparm)
2963 regparm = local_regparm;
2964 }
2965 }
2966
2967 return regparm;
2968 }
2969
2970 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2971 DFmode (2) arguments in SSE registers for a function with the
2972 indicated TYPE and DECL. DECL may be NULL when calling function
2973 indirectly or considering a libcall. Otherwise return 0. */
2974
2975 static int
2976 ix86_function_sseregparm (tree type, tree decl)
2977 {
2978 gcc_assert (!TARGET_64BIT);
2979
2980 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2981 by the sseregparm attribute. */
2982 if (TARGET_SSEREGPARM
2983 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2984 {
2985 if (!TARGET_SSE)
2986 {
2987 if (decl)
2988 error ("Calling %qD with attribute sseregparm without "
2989 "SSE/SSE2 enabled", decl);
2990 else
2991 error ("Calling %qT with attribute sseregparm without "
2992 "SSE/SSE2 enabled", type);
2993 return 0;
2994 }
2995
2996 return 2;
2997 }
2998
2999 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3000 (and DFmode for SSE2) arguments in SSE registers. */
3001 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3002 {
3003 struct cgraph_local_info *i = cgraph_local_info (decl);
3004 if (i && i->local)
3005 return TARGET_SSE2 ? 2 : 1;
3006 }
3007
3008 return 0;
3009 }
3010
3011 /* Return true if EAX is live at the start of the function. Used by
3012 ix86_expand_prologue to determine if we need special help before
3013 calling allocate_stack_worker. */
3014
3015 static bool
3016 ix86_eax_live_at_start_p (void)
3017 {
3018 /* Cheat. Don't bother working forward from ix86_function_regparm
3019 to the function type to whether an actual argument is located in
3020 eax. Instead just look at cfg info, which is still close enough
3021 to correct at this point. This gives false positives for broken
3022 functions that might use uninitialized data that happens to be
3023 allocated in eax, but who cares? */
3024 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3025 }
3026
3027 /* Return true if TYPE has a variable argument list. */
3028
3029 static bool
3030 type_has_variadic_args_p (tree type)
3031 {
3032 tree n, t = TYPE_ARG_TYPES (type);
3033
3034 if (t == NULL)
3035 return false;
3036
3037 while ((n = TREE_CHAIN (t)) != NULL)
3038 t = n;
3039
3040 return TREE_VALUE (t) != void_type_node;
3041 }
3042
3043 /* Value is the number of bytes of arguments automatically
3044 popped when returning from a subroutine call.
3045 FUNDECL is the declaration node of the function (as a tree),
3046 FUNTYPE is the data type of the function (as a tree),
3047 or for a library call it is an identifier node for the subroutine name.
3048 SIZE is the number of bytes of arguments passed on the stack.
3049
3050 On the 80386, the RTD insn may be used to pop them if the number
3051 of args is fixed, but if the number is variable then the caller
3052 must pop them all. RTD can't be used for library calls now
3053 because the library is compiled with the Unix compiler.
3054 Use of RTD is a selectable option, since it is incompatible with
3055 standard Unix calling sequences. If the option is not selected,
3056 the caller must always pop the args.
3057
3058 The attribute stdcall is equivalent to RTD on a per module basis. */
3059
3060 int
3061 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3062 {
3063 int rtd;
3064
3065 /* None of the 64-bit ABIs pop arguments. */
3066 if (TARGET_64BIT)
3067 return 0;
3068
3069 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3070
3071 /* Cdecl functions override -mrtd, and never pop the stack. */
3072 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3073 {
3074 /* Stdcall and fastcall functions will pop the stack if not
3075 variable args. */
3076 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3077 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3078 rtd = 1;
3079
3080 if (rtd && ! type_has_variadic_args_p (funtype))
3081 return size;
3082 }
3083
3084 /* Lose any fake structure return argument if it is passed on the stack. */
3085 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3086 && !KEEP_AGGREGATE_RETURN_POINTER)
3087 {
3088 int nregs = ix86_function_regparm (funtype, fundecl);
3089 if (nregs == 0)
3090 return GET_MODE_SIZE (Pmode);
3091 }
3092
3093 return 0;
3094 }
3095 \f
3096 /* Argument support functions. */
3097
3098 /* Return true when register may be used to pass function parameters. */
3099 bool
3100 ix86_function_arg_regno_p (int regno)
3101 {
3102 int i;
3103 const int *parm_regs;
3104
3105 if (!TARGET_64BIT)
3106 {
3107 if (TARGET_MACHO)
3108 return (regno < REGPARM_MAX
3109 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3110 else
3111 return (regno < REGPARM_MAX
3112 || (TARGET_MMX && MMX_REGNO_P (regno)
3113 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3114 || (TARGET_SSE && SSE_REGNO_P (regno)
3115 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3116 }
3117
3118 if (TARGET_MACHO)
3119 {
3120 if (SSE_REGNO_P (regno) && TARGET_SSE)
3121 return true;
3122 }
3123 else
3124 {
3125 if (TARGET_SSE && SSE_REGNO_P (regno)
3126 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3127 return true;
3128 }
3129
3130 /* RAX is used as hidden argument to va_arg functions. */
3131 if (!TARGET_64BIT_MS_ABI && regno == 0)
3132 return true;
3133
3134 if (TARGET_64BIT_MS_ABI)
3135 parm_regs = x86_64_ms_abi_int_parameter_registers;
3136 else
3137 parm_regs = x86_64_int_parameter_registers;
3138 for (i = 0; i < REGPARM_MAX; i++)
3139 if (regno == parm_regs[i])
3140 return true;
3141 return false;
3142 }
3143
3144 /* Return if we do not know how to pass TYPE solely in registers. */
3145
3146 static bool
3147 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3148 {
3149 if (must_pass_in_stack_var_size_or_pad (mode, type))
3150 return true;
3151
3152 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3153 The layout_type routine is crafty and tries to trick us into passing
3154 currently unsupported vector types on the stack by using TImode. */
3155 return (!TARGET_64BIT && mode == TImode
3156 && type && TREE_CODE (type) != VECTOR_TYPE);
3157 }
3158
3159 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3160 for a call to a function whose data type is FNTYPE.
3161 For a library call, FNTYPE is 0. */
3162
3163 void
3164 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3165 tree fntype, /* tree ptr for function decl */
3166 rtx libname, /* SYMBOL_REF of library name or 0 */
3167 tree fndecl)
3168 {
3169 memset (cum, 0, sizeof (*cum));
3170
3171 /* Set up the number of registers to use for passing arguments. */
3172 cum->nregs = ix86_regparm;
3173 if (TARGET_SSE)
3174 cum->sse_nregs = SSE_REGPARM_MAX;
3175 if (TARGET_MMX)
3176 cum->mmx_nregs = MMX_REGPARM_MAX;
3177 cum->warn_sse = true;
3178 cum->warn_mmx = true;
3179 cum->maybe_vaarg = (fntype
3180 ? (!TYPE_ARG_TYPES (fntype)
3181 || type_has_variadic_args_p (fntype))
3182 : !libname);
3183
3184 if (!TARGET_64BIT)
3185 {
3186 /* If there are variable arguments, then we won't pass anything
3187 in registers in 32-bit mode. */
3188 if (cum->maybe_vaarg)
3189 {
3190 cum->nregs = 0;
3191 cum->sse_nregs = 0;
3192 cum->mmx_nregs = 0;
3193 cum->warn_sse = 0;
3194 cum->warn_mmx = 0;
3195 return;
3196 }
3197
3198 /* Use ecx and edx registers if function has fastcall attribute,
3199 else look for regparm information. */
3200 if (fntype)
3201 {
3202 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3203 {
3204 cum->nregs = 2;
3205 cum->fastcall = 1;
3206 }
3207 else
3208 cum->nregs = ix86_function_regparm (fntype, fndecl);
3209 }
3210
3211 /* Set up the number of SSE registers used for passing SFmode
3212 and DFmode arguments. Warn for mismatching ABI. */
3213 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3214 }
3215 }
3216
3217 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3218 But in the case of vector types, it is some vector mode.
3219
3220 When we have only some of our vector isa extensions enabled, then there
3221 are some modes for which vector_mode_supported_p is false. For these
3222 modes, the generic vector support in gcc will choose some non-vector mode
3223 in order to implement the type. By computing the natural mode, we'll
3224 select the proper ABI location for the operand and not depend on whatever
3225 the middle-end decides to do with these vector types. */
3226
3227 static enum machine_mode
3228 type_natural_mode (tree type)
3229 {
3230 enum machine_mode mode = TYPE_MODE (type);
3231
3232 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3233 {
3234 HOST_WIDE_INT size = int_size_in_bytes (type);
3235 if ((size == 8 || size == 16)
3236 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3237 && TYPE_VECTOR_SUBPARTS (type) > 1)
3238 {
3239 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3240
3241 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3242 mode = MIN_MODE_VECTOR_FLOAT;
3243 else
3244 mode = MIN_MODE_VECTOR_INT;
3245
3246 /* Get the mode which has this inner mode and number of units. */
3247 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3248 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3249 && GET_MODE_INNER (mode) == innermode)
3250 return mode;
3251
3252 gcc_unreachable ();
3253 }
3254 }
3255
3256 return mode;
3257 }
3258
3259 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3260 this may not agree with the mode that the type system has chosen for the
3261 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3262 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3263
3264 static rtx
3265 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3266 unsigned int regno)
3267 {
3268 rtx tmp;
3269
3270 if (orig_mode != BLKmode)
3271 tmp = gen_rtx_REG (orig_mode, regno);
3272 else
3273 {
3274 tmp = gen_rtx_REG (mode, regno);
3275 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3276 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3277 }
3278
3279 return tmp;
3280 }
3281
3282 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3283 of this code is to classify each 8bytes of incoming argument by the register
3284 class and assign registers accordingly. */
3285
3286 /* Return the union class of CLASS1 and CLASS2.
3287 See the x86-64 PS ABI for details. */
3288
3289 static enum x86_64_reg_class
3290 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3291 {
3292 /* Rule #1: If both classes are equal, this is the resulting class. */
3293 if (class1 == class2)
3294 return class1;
3295
3296 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3297 the other class. */
3298 if (class1 == X86_64_NO_CLASS)
3299 return class2;
3300 if (class2 == X86_64_NO_CLASS)
3301 return class1;
3302
3303 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3304 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3305 return X86_64_MEMORY_CLASS;
3306
3307 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3308 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3309 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3310 return X86_64_INTEGERSI_CLASS;
3311 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3312 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3313 return X86_64_INTEGER_CLASS;
3314
3315 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3316 MEMORY is used. */
3317 if (class1 == X86_64_X87_CLASS
3318 || class1 == X86_64_X87UP_CLASS
3319 || class1 == X86_64_COMPLEX_X87_CLASS
3320 || class2 == X86_64_X87_CLASS
3321 || class2 == X86_64_X87UP_CLASS
3322 || class2 == X86_64_COMPLEX_X87_CLASS)
3323 return X86_64_MEMORY_CLASS;
3324
3325 /* Rule #6: Otherwise class SSE is used. */
3326 return X86_64_SSE_CLASS;
3327 }
3328
3329 /* Classify the argument of type TYPE and mode MODE.
3330 CLASSES will be filled by the register class used to pass each word
3331 of the operand. The number of words is returned. In case the parameter
3332 should be passed in memory, 0 is returned. As a special case for zero
3333 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3334
3335 BIT_OFFSET is used internally for handling records and specifies offset
3336 of the offset in bits modulo 256 to avoid overflow cases.
3337
3338 See the x86-64 PS ABI for details.
3339 */
3340
3341 static int
3342 classify_argument (enum machine_mode mode, tree type,
3343 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3344 {
3345 HOST_WIDE_INT bytes =
3346 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3347 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3348
3349 /* Variable sized entities are always passed/returned in memory. */
3350 if (bytes < 0)
3351 return 0;
3352
3353 if (mode != VOIDmode
3354 && targetm.calls.must_pass_in_stack (mode, type))
3355 return 0;
3356
3357 if (type && AGGREGATE_TYPE_P (type))
3358 {
3359 int i;
3360 tree field;
3361 enum x86_64_reg_class subclasses[MAX_CLASSES];
3362
3363 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3364 if (bytes > 16)
3365 return 0;
3366
3367 for (i = 0; i < words; i++)
3368 classes[i] = X86_64_NO_CLASS;
3369
3370 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3371 signalize memory class, so handle it as special case. */
3372 if (!words)
3373 {
3374 classes[0] = X86_64_NO_CLASS;
3375 return 1;
3376 }
3377
3378 /* Classify each field of record and merge classes. */
3379 switch (TREE_CODE (type))
3380 {
3381 case RECORD_TYPE:
3382 /* And now merge the fields of structure. */
3383 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3384 {
3385 if (TREE_CODE (field) == FIELD_DECL)
3386 {
3387 int num;
3388
3389 if (TREE_TYPE (field) == error_mark_node)
3390 continue;
3391
3392 /* Bitfields are always classified as integer. Handle them
3393 early, since later code would consider them to be
3394 misaligned integers. */
3395 if (DECL_BIT_FIELD (field))
3396 {
3397 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3398 i < ((int_bit_position (field) + (bit_offset % 64))
3399 + tree_low_cst (DECL_SIZE (field), 0)
3400 + 63) / 8 / 8; i++)
3401 classes[i] =
3402 merge_classes (X86_64_INTEGER_CLASS,
3403 classes[i]);
3404 }
3405 else
3406 {
3407 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3408 TREE_TYPE (field), subclasses,
3409 (int_bit_position (field)
3410 + bit_offset) % 256);
3411 if (!num)
3412 return 0;
3413 for (i = 0; i < num; i++)
3414 {
3415 int pos =
3416 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3417 classes[i + pos] =
3418 merge_classes (subclasses[i], classes[i + pos]);
3419 }
3420 }
3421 }
3422 }
3423 break;
3424
3425 case ARRAY_TYPE:
3426 /* Arrays are handled as small records. */
3427 {
3428 int num;
3429 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3430 TREE_TYPE (type), subclasses, bit_offset);
3431 if (!num)
3432 return 0;
3433
3434 /* The partial classes are now full classes. */
3435 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3436 subclasses[0] = X86_64_SSE_CLASS;
3437 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3438 subclasses[0] = X86_64_INTEGER_CLASS;
3439
3440 for (i = 0; i < words; i++)
3441 classes[i] = subclasses[i % num];
3442
3443 break;
3444 }
3445 case UNION_TYPE:
3446 case QUAL_UNION_TYPE:
3447 /* Unions are similar to RECORD_TYPE but offset is always 0.
3448 */
3449 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3450 {
3451 if (TREE_CODE (field) == FIELD_DECL)
3452 {
3453 int num;
3454
3455 if (TREE_TYPE (field) == error_mark_node)
3456 continue;
3457
3458 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3459 TREE_TYPE (field), subclasses,
3460 bit_offset);
3461 if (!num)
3462 return 0;
3463 for (i = 0; i < num; i++)
3464 classes[i] = merge_classes (subclasses[i], classes[i]);
3465 }
3466 }
3467 break;
3468
3469 default:
3470 gcc_unreachable ();
3471 }
3472
3473 /* Final merger cleanup. */
3474 for (i = 0; i < words; i++)
3475 {
3476 /* If one class is MEMORY, everything should be passed in
3477 memory. */
3478 if (classes[i] == X86_64_MEMORY_CLASS)
3479 return 0;
3480
3481 /* The X86_64_SSEUP_CLASS should be always preceded by
3482 X86_64_SSE_CLASS. */
3483 if (classes[i] == X86_64_SSEUP_CLASS
3484 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3485 classes[i] = X86_64_SSE_CLASS;
3486
3487 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3488 if (classes[i] == X86_64_X87UP_CLASS
3489 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3490 classes[i] = X86_64_SSE_CLASS;
3491 }
3492 return words;
3493 }
3494
3495 /* Compute alignment needed. We align all types to natural boundaries with
3496 exception of XFmode that is aligned to 64bits. */
3497 if (mode != VOIDmode && mode != BLKmode)
3498 {
3499 int mode_alignment = GET_MODE_BITSIZE (mode);
3500
3501 if (mode == XFmode)
3502 mode_alignment = 128;
3503 else if (mode == XCmode)
3504 mode_alignment = 256;
3505 if (COMPLEX_MODE_P (mode))
3506 mode_alignment /= 2;
3507 /* Misaligned fields are always returned in memory. */
3508 if (bit_offset % mode_alignment)
3509 return 0;
3510 }
3511
3512 /* for V1xx modes, just use the base mode */
3513 if (VECTOR_MODE_P (mode)
3514 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3515 mode = GET_MODE_INNER (mode);
3516
3517 /* Classification of atomic types. */
3518 switch (mode)
3519 {
3520 case SDmode:
3521 case DDmode:
3522 classes[0] = X86_64_SSE_CLASS;
3523 return 1;
3524 case TDmode:
3525 classes[0] = X86_64_SSE_CLASS;
3526 classes[1] = X86_64_SSEUP_CLASS;
3527 return 2;
3528 case DImode:
3529 case SImode:
3530 case HImode:
3531 case QImode:
3532 case CSImode:
3533 case CHImode:
3534 case CQImode:
3535 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3536 classes[0] = X86_64_INTEGERSI_CLASS;
3537 else
3538 classes[0] = X86_64_INTEGER_CLASS;
3539 return 1;
3540 case CDImode:
3541 case TImode:
3542 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3543 return 2;
3544 case CTImode:
3545 return 0;
3546 case SFmode:
3547 if (!(bit_offset % 64))
3548 classes[0] = X86_64_SSESF_CLASS;
3549 else
3550 classes[0] = X86_64_SSE_CLASS;
3551 return 1;
3552 case DFmode:
3553 classes[0] = X86_64_SSEDF_CLASS;
3554 return 1;
3555 case XFmode:
3556 classes[0] = X86_64_X87_CLASS;
3557 classes[1] = X86_64_X87UP_CLASS;
3558 return 2;
3559 case TFmode:
3560 classes[0] = X86_64_SSE_CLASS;
3561 classes[1] = X86_64_SSEUP_CLASS;
3562 return 2;
3563 case SCmode:
3564 classes[0] = X86_64_SSE_CLASS;
3565 return 1;
3566 case DCmode:
3567 classes[0] = X86_64_SSEDF_CLASS;
3568 classes[1] = X86_64_SSEDF_CLASS;
3569 return 2;
3570 case XCmode:
3571 classes[0] = X86_64_COMPLEX_X87_CLASS;
3572 return 1;
3573 case TCmode:
3574 /* This modes is larger than 16 bytes. */
3575 return 0;
3576 case V4SFmode:
3577 case V4SImode:
3578 case V16QImode:
3579 case V8HImode:
3580 case V2DFmode:
3581 case V2DImode:
3582 classes[0] = X86_64_SSE_CLASS;
3583 classes[1] = X86_64_SSEUP_CLASS;
3584 return 2;
3585 case V2SFmode:
3586 case V2SImode:
3587 case V4HImode:
3588 case V8QImode:
3589 classes[0] = X86_64_SSE_CLASS;
3590 return 1;
3591 case BLKmode:
3592 case VOIDmode:
3593 return 0;
3594 default:
3595 gcc_assert (VECTOR_MODE_P (mode));
3596
3597 if (bytes > 16)
3598 return 0;
3599
3600 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3601
3602 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3603 classes[0] = X86_64_INTEGERSI_CLASS;
3604 else
3605 classes[0] = X86_64_INTEGER_CLASS;
3606 classes[1] = X86_64_INTEGER_CLASS;
3607 return 1 + (bytes > 8);
3608 }
3609 }
3610
3611 /* Examine the argument and return set number of register required in each
3612 class. Return 0 iff parameter should be passed in memory. */
3613 static int
3614 examine_argument (enum machine_mode mode, tree type, int in_return,
3615 int *int_nregs, int *sse_nregs)
3616 {
3617 enum x86_64_reg_class regclass[MAX_CLASSES];
3618 int n = classify_argument (mode, type, regclass, 0);
3619
3620 *int_nregs = 0;
3621 *sse_nregs = 0;
3622 if (!n)
3623 return 0;
3624 for (n--; n >= 0; n--)
3625 switch (regclass[n])
3626 {
3627 case X86_64_INTEGER_CLASS:
3628 case X86_64_INTEGERSI_CLASS:
3629 (*int_nregs)++;
3630 break;
3631 case X86_64_SSE_CLASS:
3632 case X86_64_SSESF_CLASS:
3633 case X86_64_SSEDF_CLASS:
3634 (*sse_nregs)++;
3635 break;
3636 case X86_64_NO_CLASS:
3637 case X86_64_SSEUP_CLASS:
3638 break;
3639 case X86_64_X87_CLASS:
3640 case X86_64_X87UP_CLASS:
3641 if (!in_return)
3642 return 0;
3643 break;
3644 case X86_64_COMPLEX_X87_CLASS:
3645 return in_return ? 2 : 0;
3646 case X86_64_MEMORY_CLASS:
3647 gcc_unreachable ();
3648 }
3649 return 1;
3650 }
3651
3652 /* Construct container for the argument used by GCC interface. See
3653 FUNCTION_ARG for the detailed description. */
3654
3655 static rtx
3656 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3657 tree type, int in_return, int nintregs, int nsseregs,
3658 const int *intreg, int sse_regno)
3659 {
3660 /* The following variables hold the static issued_error state. */
3661 static bool issued_sse_arg_error;
3662 static bool issued_sse_ret_error;
3663 static bool issued_x87_ret_error;
3664
3665 enum machine_mode tmpmode;
3666 int bytes =
3667 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3668 enum x86_64_reg_class regclass[MAX_CLASSES];
3669 int n;
3670 int i;
3671 int nexps = 0;
3672 int needed_sseregs, needed_intregs;
3673 rtx exp[MAX_CLASSES];
3674 rtx ret;
3675
3676 n = classify_argument (mode, type, regclass, 0);
3677 if (!n)
3678 return NULL;
3679 if (!examine_argument (mode, type, in_return, &needed_intregs,
3680 &needed_sseregs))
3681 return NULL;
3682 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3683 return NULL;
3684
3685 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3686 some less clueful developer tries to use floating-point anyway. */
3687 if (needed_sseregs && !TARGET_SSE)
3688 {
3689 if (in_return)
3690 {
3691 if (!issued_sse_ret_error)
3692 {
3693 error ("SSE register return with SSE disabled");
3694 issued_sse_ret_error = true;
3695 }
3696 }
3697 else if (!issued_sse_arg_error)
3698 {
3699 error ("SSE register argument with SSE disabled");
3700 issued_sse_arg_error = true;
3701 }
3702 return NULL;
3703 }
3704
3705 /* Likewise, error if the ABI requires us to return values in the
3706 x87 registers and the user specified -mno-80387. */
3707 if (!TARGET_80387 && in_return)
3708 for (i = 0; i < n; i++)
3709 if (regclass[i] == X86_64_X87_CLASS
3710 || regclass[i] == X86_64_X87UP_CLASS
3711 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3712 {
3713 if (!issued_x87_ret_error)
3714 {
3715 error ("x87 register return with x87 disabled");
3716 issued_x87_ret_error = true;
3717 }
3718 return NULL;
3719 }
3720
3721 /* First construct simple cases. Avoid SCmode, since we want to use
3722 single register to pass this type. */
3723 if (n == 1 && mode != SCmode)
3724 switch (regclass[0])
3725 {
3726 case X86_64_INTEGER_CLASS:
3727 case X86_64_INTEGERSI_CLASS:
3728 return gen_rtx_REG (mode, intreg[0]);
3729 case X86_64_SSE_CLASS:
3730 case X86_64_SSESF_CLASS:
3731 case X86_64_SSEDF_CLASS:
3732 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3733 case X86_64_X87_CLASS:
3734 case X86_64_COMPLEX_X87_CLASS:
3735 return gen_rtx_REG (mode, FIRST_STACK_REG);
3736 case X86_64_NO_CLASS:
3737 /* Zero sized array, struct or class. */
3738 return NULL;
3739 default:
3740 gcc_unreachable ();
3741 }
3742 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
3743 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
3744 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3745
3746 if (n == 2
3747 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
3748 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3749 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
3750 && regclass[1] == X86_64_INTEGER_CLASS
3751 && (mode == CDImode || mode == TImode || mode == TFmode)
3752 && intreg[0] + 1 == intreg[1])
3753 return gen_rtx_REG (mode, intreg[0]);
3754
3755 /* Otherwise figure out the entries of the PARALLEL. */
3756 for (i = 0; i < n; i++)
3757 {
3758 switch (regclass[i])
3759 {
3760 case X86_64_NO_CLASS:
3761 break;
3762 case X86_64_INTEGER_CLASS:
3763 case X86_64_INTEGERSI_CLASS:
3764 /* Merge TImodes on aligned occasions here too. */
3765 if (i * 8 + 8 > bytes)
3766 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3767 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
3768 tmpmode = SImode;
3769 else
3770 tmpmode = DImode;
3771 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3772 if (tmpmode == BLKmode)
3773 tmpmode = DImode;
3774 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3775 gen_rtx_REG (tmpmode, *intreg),
3776 GEN_INT (i*8));
3777 intreg++;
3778 break;
3779 case X86_64_SSESF_CLASS:
3780 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3781 gen_rtx_REG (SFmode,
3782 SSE_REGNO (sse_regno)),
3783 GEN_INT (i*8));
3784 sse_regno++;
3785 break;
3786 case X86_64_SSEDF_CLASS:
3787 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3788 gen_rtx_REG (DFmode,
3789 SSE_REGNO (sse_regno)),
3790 GEN_INT (i*8));
3791 sse_regno++;
3792 break;
3793 case X86_64_SSE_CLASS:
3794 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
3795 tmpmode = TImode;
3796 else
3797 tmpmode = DImode;
3798 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3799 gen_rtx_REG (tmpmode,
3800 SSE_REGNO (sse_regno)),
3801 GEN_INT (i*8));
3802 if (tmpmode == TImode)
3803 i++;
3804 sse_regno++;
3805 break;
3806 default:
3807 gcc_unreachable ();
3808 }
3809 }
3810
3811 /* Empty aligned struct, union or class. */
3812 if (nexps == 0)
3813 return NULL;
3814
3815 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3816 for (i = 0; i < nexps; i++)
3817 XVECEXP (ret, 0, i) = exp [i];
3818 return ret;
3819 }
3820
3821 /* Update the data in CUM to advance over an argument of mode MODE
3822 and data type TYPE. (TYPE is null for libcalls where that information
3823 may not be available.) */
3824
3825 static void
3826 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3827 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3828 {
3829 switch (mode)
3830 {
3831 default:
3832 break;
3833
3834 case BLKmode:
3835 if (bytes < 0)
3836 break;
3837 /* FALLTHRU */
3838
3839 case DImode:
3840 case SImode:
3841 case HImode:
3842 case QImode:
3843 cum->words += words;
3844 cum->nregs -= words;
3845 cum->regno += words;
3846
3847 if (cum->nregs <= 0)
3848 {
3849 cum->nregs = 0;
3850 cum->regno = 0;
3851 }
3852 break;
3853
3854 case DFmode:
3855 if (cum->float_in_sse < 2)
3856 break;
3857 case SFmode:
3858 if (cum->float_in_sse < 1)
3859 break;
3860 /* FALLTHRU */
3861
3862 case TImode:
3863 case V16QImode:
3864 case V8HImode:
3865 case V4SImode:
3866 case V2DImode:
3867 case V4SFmode:
3868 case V2DFmode:
3869 if (!type || !AGGREGATE_TYPE_P (type))
3870 {
3871 cum->sse_words += words;
3872 cum->sse_nregs -= 1;
3873 cum->sse_regno += 1;
3874 if (cum->sse_nregs <= 0)
3875 {
3876 cum->sse_nregs = 0;
3877 cum->sse_regno = 0;
3878 }
3879 }
3880 break;
3881
3882 case V8QImode:
3883 case V4HImode:
3884 case V2SImode:
3885 case V2SFmode:
3886 if (!type || !AGGREGATE_TYPE_P (type))
3887 {
3888 cum->mmx_words += words;
3889 cum->mmx_nregs -= 1;
3890 cum->mmx_regno += 1;
3891 if (cum->mmx_nregs <= 0)
3892 {
3893 cum->mmx_nregs = 0;
3894 cum->mmx_regno = 0;
3895 }
3896 }
3897 break;
3898 }
3899 }
3900
3901 static void
3902 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3903 tree type, HOST_WIDE_INT words)
3904 {
3905 int int_nregs, sse_nregs;
3906
3907 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3908 cum->words += words;
3909 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3910 {
3911 cum->nregs -= int_nregs;
3912 cum->sse_nregs -= sse_nregs;
3913 cum->regno += int_nregs;
3914 cum->sse_regno += sse_nregs;
3915 }
3916 else
3917 cum->words += words;
3918 }
3919
3920 static void
3921 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3922 HOST_WIDE_INT words)
3923 {
3924 /* Otherwise, this should be passed indirect. */
3925 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3926
3927 cum->words += words;
3928 if (cum->nregs > 0)
3929 {
3930 cum->nregs -= 1;
3931 cum->regno += 1;
3932 }
3933 }
3934
3935 void
3936 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3937 tree type, int named ATTRIBUTE_UNUSED)
3938 {
3939 HOST_WIDE_INT bytes, words;
3940
3941 if (mode == BLKmode)
3942 bytes = int_size_in_bytes (type);
3943 else
3944 bytes = GET_MODE_SIZE (mode);
3945 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3946
3947 if (type)
3948 mode = type_natural_mode (type);
3949
3950 if (TARGET_64BIT_MS_ABI)
3951 function_arg_advance_ms_64 (cum, bytes, words);
3952 else if (TARGET_64BIT)
3953 function_arg_advance_64 (cum, mode, type, words);
3954 else
3955 function_arg_advance_32 (cum, mode, type, bytes, words);
3956 }
3957
3958 /* Define where to put the arguments to a function.
3959 Value is zero to push the argument on the stack,
3960 or a hard register in which to store the argument.
3961
3962 MODE is the argument's machine mode.
3963 TYPE is the data type of the argument (as a tree).
3964 This is null for libcalls where that information may
3965 not be available.
3966 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3967 the preceding args and about the function being called.
3968 NAMED is nonzero if this argument is a named parameter
3969 (otherwise it is an extra parameter matching an ellipsis). */
3970
3971 static rtx
3972 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3973 enum machine_mode orig_mode, tree type,
3974 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3975 {
3976 static bool warnedsse, warnedmmx;
3977
3978 /* Avoid the AL settings for the Unix64 ABI. */
3979 if (mode == VOIDmode)
3980 return constm1_rtx;
3981
3982 switch (mode)
3983 {
3984 default:
3985 break;
3986
3987 case BLKmode:
3988 if (bytes < 0)
3989 break;
3990 /* FALLTHRU */
3991 case DImode:
3992 case SImode:
3993 case HImode:
3994 case QImode:
3995 if (words <= cum->nregs)
3996 {
3997 int regno = cum->regno;
3998
3999 /* Fastcall allocates the first two DWORD (SImode) or
4000 smaller arguments to ECX and EDX. */
4001 if (cum->fastcall)
4002 {
4003 if (mode == BLKmode || mode == DImode)
4004 break;
4005
4006 /* ECX not EAX is the first allocated register. */
4007 if (regno == 0)
4008 regno = 2;
4009 }
4010 return gen_rtx_REG (mode, regno);
4011 }
4012 break;
4013
4014 case DFmode:
4015 if (cum->float_in_sse < 2)
4016 break;
4017 case SFmode:
4018 if (cum->float_in_sse < 1)
4019 break;
4020 /* FALLTHRU */
4021 case TImode:
4022 case V16QImode:
4023 case V8HImode:
4024 case V4SImode:
4025 case V2DImode:
4026 case V4SFmode:
4027 case V2DFmode:
4028 if (!type || !AGGREGATE_TYPE_P (type))
4029 {
4030 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4031 {
4032 warnedsse = true;
4033 warning (0, "SSE vector argument without SSE enabled "
4034 "changes the ABI");
4035 }
4036 if (cum->sse_nregs)
4037 return gen_reg_or_parallel (mode, orig_mode,
4038 cum->sse_regno + FIRST_SSE_REG);
4039 }
4040 break;
4041
4042 case V8QImode:
4043 case V4HImode:
4044 case V2SImode:
4045 case V2SFmode:
4046 if (!type || !AGGREGATE_TYPE_P (type))
4047 {
4048 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4049 {
4050 warnedmmx = true;
4051 warning (0, "MMX vector argument without MMX enabled "
4052 "changes the ABI");
4053 }
4054 if (cum->mmx_nregs)
4055 return gen_reg_or_parallel (mode, orig_mode,
4056 cum->mmx_regno + FIRST_MMX_REG);
4057 }
4058 break;
4059 }
4060
4061 return NULL_RTX;
4062 }
4063
4064 static rtx
4065 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4066 enum machine_mode orig_mode, tree type)
4067 {
4068 /* Handle a hidden AL argument containing number of registers
4069 for varargs x86-64 functions. */
4070 if (mode == VOIDmode)
4071 return GEN_INT (cum->maybe_vaarg
4072 ? (cum->sse_nregs < 0
4073 ? SSE_REGPARM_MAX
4074 : cum->sse_regno)
4075 : -1);
4076
4077 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4078 cum->sse_nregs,
4079 &x86_64_int_parameter_registers [cum->regno],
4080 cum->sse_regno);
4081 }
4082
4083 static rtx
4084 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4085 enum machine_mode orig_mode, int named)
4086 {
4087 unsigned int regno;
4088
4089 /* Avoid the AL settings for the Unix64 ABI. */
4090 if (mode == VOIDmode)
4091 return constm1_rtx;
4092
4093 /* If we've run out of registers, it goes on the stack. */
4094 if (cum->nregs == 0)
4095 return NULL_RTX;
4096
4097 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4098
4099 /* Only floating point modes are passed in anything but integer regs. */
4100 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4101 {
4102 if (named)
4103 regno = cum->regno + FIRST_SSE_REG;
4104 else
4105 {
4106 rtx t1, t2;
4107
4108 /* Unnamed floating parameters are passed in both the
4109 SSE and integer registers. */
4110 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4111 t2 = gen_rtx_REG (mode, regno);
4112 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4113 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4114 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4115 }
4116 }
4117
4118 return gen_reg_or_parallel (mode, orig_mode, regno);
4119 }
4120
4121 rtx
4122 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4123 tree type, int named)
4124 {
4125 enum machine_mode mode = omode;
4126 HOST_WIDE_INT bytes, words;
4127
4128 if (mode == BLKmode)
4129 bytes = int_size_in_bytes (type);
4130 else
4131 bytes = GET_MODE_SIZE (mode);
4132 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4133
4134 /* To simplify the code below, represent vector types with a vector mode
4135 even if MMX/SSE are not active. */
4136 if (type && TREE_CODE (type) == VECTOR_TYPE)
4137 mode = type_natural_mode (type);
4138
4139 if (TARGET_64BIT_MS_ABI)
4140 return function_arg_ms_64 (cum, mode, omode, named);
4141 else if (TARGET_64BIT)
4142 return function_arg_64 (cum, mode, omode, type);
4143 else
4144 return function_arg_32 (cum, mode, omode, type, bytes, words);
4145 }
4146
4147 /* A C expression that indicates when an argument must be passed by
4148 reference. If nonzero for an argument, a copy of that argument is
4149 made in memory and a pointer to the argument is passed instead of
4150 the argument itself. The pointer is passed in whatever way is
4151 appropriate for passing a pointer to that type. */
4152
4153 static bool
4154 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4155 enum machine_mode mode ATTRIBUTE_UNUSED,
4156 tree type, bool named ATTRIBUTE_UNUSED)
4157 {
4158 if (TARGET_64BIT_MS_ABI)
4159 {
4160 if (type)
4161 {
4162 /* Arrays are passed by reference. */
4163 if (TREE_CODE (type) == ARRAY_TYPE)
4164 return true;
4165
4166 if (AGGREGATE_TYPE_P (type))
4167 {
4168 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4169 are passed by reference. */
4170 int el2 = exact_log2 (int_size_in_bytes (type));
4171 return !(el2 >= 0 && el2 <= 3);
4172 }
4173 }
4174
4175 /* __m128 is passed by reference. */
4176 /* ??? How to handle complex? For now treat them as structs,
4177 and pass them by reference if they're too large. */
4178 if (GET_MODE_SIZE (mode) > 8)
4179 return true;
4180 }
4181 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4182 return 1;
4183
4184 return 0;
4185 }
4186
4187 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4188 ABI. Only called if TARGET_SSE. */
4189 static bool
4190 contains_128bit_aligned_vector_p (tree type)
4191 {
4192 enum machine_mode mode = TYPE_MODE (type);
4193 if (SSE_REG_MODE_P (mode)
4194 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4195 return true;
4196 if (TYPE_ALIGN (type) < 128)
4197 return false;
4198
4199 if (AGGREGATE_TYPE_P (type))
4200 {
4201 /* Walk the aggregates recursively. */
4202 switch (TREE_CODE (type))
4203 {
4204 case RECORD_TYPE:
4205 case UNION_TYPE:
4206 case QUAL_UNION_TYPE:
4207 {
4208 tree field;
4209
4210 /* Walk all the structure fields. */
4211 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4212 {
4213 if (TREE_CODE (field) == FIELD_DECL
4214 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4215 return true;
4216 }
4217 break;
4218 }
4219
4220 case ARRAY_TYPE:
4221 /* Just for use if some languages passes arrays by value. */
4222 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4223 return true;
4224 break;
4225
4226 default:
4227 gcc_unreachable ();
4228 }
4229 }
4230 return false;
4231 }
4232
4233 /* Gives the alignment boundary, in bits, of an argument with the
4234 specified mode and type. */
4235
4236 int
4237 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4238 {
4239 int align;
4240 if (type)
4241 align = TYPE_ALIGN (type);
4242 else
4243 align = GET_MODE_ALIGNMENT (mode);
4244 if (align < PARM_BOUNDARY)
4245 align = PARM_BOUNDARY;
4246 if (!TARGET_64BIT)
4247 {
4248 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4249 make an exception for SSE modes since these require 128bit
4250 alignment.
4251
4252 The handling here differs from field_alignment. ICC aligns MMX
4253 arguments to 4 byte boundaries, while structure fields are aligned
4254 to 8 byte boundaries. */
4255 if (!TARGET_SSE)
4256 align = PARM_BOUNDARY;
4257 else if (!type)
4258 {
4259 if (!SSE_REG_MODE_P (mode))
4260 align = PARM_BOUNDARY;
4261 }
4262 else
4263 {
4264 if (!contains_128bit_aligned_vector_p (type))
4265 align = PARM_BOUNDARY;
4266 }
4267 }
4268 if (align > 128)
4269 align = 128;
4270 return align;
4271 }
4272
4273 /* Return true if N is a possible register number of function value. */
4274
4275 bool
4276 ix86_function_value_regno_p (int regno)
4277 {
4278 switch (regno)
4279 {
4280 case 0:
4281 return true;
4282
4283 case FIRST_FLOAT_REG:
4284 if (TARGET_64BIT_MS_ABI)
4285 return false;
4286 return TARGET_FLOAT_RETURNS_IN_80387;
4287
4288 case FIRST_SSE_REG:
4289 return TARGET_SSE;
4290
4291 case FIRST_MMX_REG:
4292 if (TARGET_MACHO || TARGET_64BIT)
4293 return false;
4294 return TARGET_MMX;
4295 }
4296
4297 return false;
4298 }
4299
4300 /* Define how to find the value returned by a function.
4301 VALTYPE is the data type of the value (as a tree).
4302 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4303 otherwise, FUNC is 0. */
4304
4305 static rtx
4306 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4307 tree fntype, tree fn)
4308 {
4309 unsigned int regno;
4310
4311 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4312 we normally prevent this case when mmx is not available. However
4313 some ABIs may require the result to be returned like DImode. */
4314 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4315 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4316
4317 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4318 we prevent this case when sse is not available. However some ABIs
4319 may require the result to be returned like integer TImode. */
4320 else if (mode == TImode
4321 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4322 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4323
4324 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4325 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4326 regno = FIRST_FLOAT_REG;
4327 else
4328 /* Most things go in %eax. */
4329 regno = 0;
4330
4331 /* Override FP return register with %xmm0 for local functions when
4332 SSE math is enabled or for functions with sseregparm attribute. */
4333 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4334 {
4335 int sse_level = ix86_function_sseregparm (fntype, fn);
4336 if ((sse_level >= 1 && mode == SFmode)
4337 || (sse_level == 2 && mode == DFmode))
4338 regno = FIRST_SSE_REG;
4339 }
4340
4341 return gen_rtx_REG (orig_mode, regno);
4342 }
4343
4344 static rtx
4345 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4346 tree valtype)
4347 {
4348 rtx ret;
4349
4350 /* Handle libcalls, which don't provide a type node. */
4351 if (valtype == NULL)
4352 {
4353 switch (mode)
4354 {
4355 case SFmode:
4356 case SCmode:
4357 case DFmode:
4358 case DCmode:
4359 case TFmode:
4360 case SDmode:
4361 case DDmode:
4362 case TDmode:
4363 return gen_rtx_REG (mode, FIRST_SSE_REG);
4364 case XFmode:
4365 case XCmode:
4366 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4367 case TCmode:
4368 return NULL;
4369 default:
4370 return gen_rtx_REG (mode, 0);
4371 }
4372 }
4373
4374 ret = construct_container (mode, orig_mode, valtype, 1,
4375 REGPARM_MAX, SSE_REGPARM_MAX,
4376 x86_64_int_return_registers, 0);
4377
4378 /* For zero sized structures, construct_container returns NULL, but we
4379 need to keep rest of compiler happy by returning meaningful value. */
4380 if (!ret)
4381 ret = gen_rtx_REG (orig_mode, 0);
4382
4383 return ret;
4384 }
4385
4386 static rtx
4387 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4388 {
4389 unsigned int regno = 0;
4390
4391 if (TARGET_SSE)
4392 {
4393 if (mode == SFmode || mode == DFmode)
4394 regno = FIRST_SSE_REG;
4395 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4396 regno = FIRST_SSE_REG;
4397 }
4398
4399 return gen_rtx_REG (orig_mode, regno);
4400 }
4401
4402 static rtx
4403 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4404 enum machine_mode orig_mode, enum machine_mode mode)
4405 {
4406 tree fn, fntype;
4407
4408 fn = NULL_TREE;
4409 if (fntype_or_decl && DECL_P (fntype_or_decl))
4410 fn = fntype_or_decl;
4411 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4412
4413 if (TARGET_64BIT_MS_ABI)
4414 return function_value_ms_64 (orig_mode, mode);
4415 else if (TARGET_64BIT)
4416 return function_value_64 (orig_mode, mode, valtype);
4417 else
4418 return function_value_32 (orig_mode, mode, fntype, fn);
4419 }
4420
4421 static rtx
4422 ix86_function_value (tree valtype, tree fntype_or_decl,
4423 bool outgoing ATTRIBUTE_UNUSED)
4424 {
4425 enum machine_mode mode, orig_mode;
4426
4427 orig_mode = TYPE_MODE (valtype);
4428 mode = type_natural_mode (valtype);
4429 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4430 }
4431
4432 rtx
4433 ix86_libcall_value (enum machine_mode mode)
4434 {
4435 return ix86_function_value_1 (NULL, NULL, mode, mode);
4436 }
4437
4438 /* Return true iff type is returned in memory. */
4439
4440 static int
4441 return_in_memory_32 (tree type, enum machine_mode mode)
4442 {
4443 HOST_WIDE_INT size;
4444
4445 if (mode == BLKmode)
4446 return 1;
4447
4448 size = int_size_in_bytes (type);
4449
4450 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4451 return 0;
4452
4453 if (VECTOR_MODE_P (mode) || mode == TImode)
4454 {
4455 /* User-created vectors small enough to fit in EAX. */
4456 if (size < 8)
4457 return 0;
4458
4459 /* MMX/3dNow values are returned in MM0,
4460 except when it doesn't exits. */
4461 if (size == 8)
4462 return (TARGET_MMX ? 0 : 1);
4463
4464 /* SSE values are returned in XMM0, except when it doesn't exist. */
4465 if (size == 16)
4466 return (TARGET_SSE ? 0 : 1);
4467 }
4468
4469 if (mode == XFmode)
4470 return 0;
4471
4472 if (mode == TDmode)
4473 return 1;
4474
4475 if (size > 12)
4476 return 1;
4477 return 0;
4478 }
4479
4480 static int
4481 return_in_memory_64 (tree type, enum machine_mode mode)
4482 {
4483 int needed_intregs, needed_sseregs;
4484 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4485 }
4486
4487 static int
4488 return_in_memory_ms_64 (tree type, enum machine_mode mode)
4489 {
4490 HOST_WIDE_INT size = int_size_in_bytes (type);
4491
4492 /* __m128 and friends are returned in xmm0. */
4493 if (size == 16 && VECTOR_MODE_P (mode))
4494 return 0;
4495
4496 /* Otherwise, the size must be exactly in [1248]. */
4497 return (size != 1 && size != 2 && size != 4 && size != 8);
4498 }
4499
4500 int
4501 ix86_return_in_memory (tree type)
4502 {
4503 enum machine_mode mode = type_natural_mode (type);
4504
4505 if (TARGET_64BIT_MS_ABI)
4506 return return_in_memory_ms_64 (type, mode);
4507 else if (TARGET_64BIT)
4508 return return_in_memory_64 (type, mode);
4509 else
4510 return return_in_memory_32 (type, mode);
4511 }
4512
4513 /* Return false iff TYPE is returned in memory. This version is used
4514 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4515 but differs notably in that when MMX is available, 8-byte vectors
4516 are returned in memory, rather than in MMX registers. */
4517
4518 int
4519 ix86_sol10_return_in_memory (tree type)
4520 {
4521 int size;
4522 enum machine_mode mode = type_natural_mode (type);
4523
4524 if (TARGET_64BIT)
4525 return return_in_memory_64 (type, mode);
4526
4527 if (mode == BLKmode)
4528 return 1;
4529
4530 size = int_size_in_bytes (type);
4531
4532 if (VECTOR_MODE_P (mode))
4533 {
4534 /* Return in memory only if MMX registers *are* available. This
4535 seems backwards, but it is consistent with the existing
4536 Solaris x86 ABI. */
4537 if (size == 8)
4538 return TARGET_MMX;
4539 if (size == 16)
4540 return !TARGET_SSE;
4541 }
4542 else if (mode == TImode)
4543 return !TARGET_SSE;
4544 else if (mode == XFmode)
4545 return 0;
4546
4547 return size > 12;
4548 }
4549
4550 /* When returning SSE vector types, we have a choice of either
4551 (1) being abi incompatible with a -march switch, or
4552 (2) generating an error.
4553 Given no good solution, I think the safest thing is one warning.
4554 The user won't be able to use -Werror, but....
4555
4556 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4557 called in response to actually generating a caller or callee that
4558 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4559 via aggregate_value_p for general type probing from tree-ssa. */
4560
4561 static rtx
4562 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4563 {
4564 static bool warnedsse, warnedmmx;
4565
4566 if (!TARGET_64BIT && type)
4567 {
4568 /* Look at the return type of the function, not the function type. */
4569 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4570
4571 if (!TARGET_SSE && !warnedsse)
4572 {
4573 if (mode == TImode
4574 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4575 {
4576 warnedsse = true;
4577 warning (0, "SSE vector return without SSE enabled "
4578 "changes the ABI");
4579 }
4580 }
4581
4582 if (!TARGET_MMX && !warnedmmx)
4583 {
4584 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4585 {
4586 warnedmmx = true;
4587 warning (0, "MMX vector return without MMX enabled "
4588 "changes the ABI");
4589 }
4590 }
4591 }
4592
4593 return NULL;
4594 }
4595
4596 \f
4597 /* Create the va_list data type. */
4598
4599 static tree
4600 ix86_build_builtin_va_list (void)
4601 {
4602 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4603
4604 /* For i386 we use plain pointer to argument area. */
4605 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4606 return build_pointer_type (char_type_node);
4607
4608 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4609 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4610
4611 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4612 unsigned_type_node);
4613 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4614 unsigned_type_node);
4615 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4616 ptr_type_node);
4617 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4618 ptr_type_node);
4619
4620 va_list_gpr_counter_field = f_gpr;
4621 va_list_fpr_counter_field = f_fpr;
4622
4623 DECL_FIELD_CONTEXT (f_gpr) = record;
4624 DECL_FIELD_CONTEXT (f_fpr) = record;
4625 DECL_FIELD_CONTEXT (f_ovf) = record;
4626 DECL_FIELD_CONTEXT (f_sav) = record;
4627
4628 TREE_CHAIN (record) = type_decl;
4629 TYPE_NAME (record) = type_decl;
4630 TYPE_FIELDS (record) = f_gpr;
4631 TREE_CHAIN (f_gpr) = f_fpr;
4632 TREE_CHAIN (f_fpr) = f_ovf;
4633 TREE_CHAIN (f_ovf) = f_sav;
4634
4635 layout_type (record);
4636
4637 /* The correct type is an array type of one element. */
4638 return build_array_type (record, build_index_type (size_zero_node));
4639 }
4640
4641 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4642
4643 static void
4644 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4645 {
4646 rtx save_area, mem;
4647 rtx label;
4648 rtx label_ref;
4649 rtx tmp_reg;
4650 rtx nsse_reg;
4651 int set;
4652 int i;
4653
4654 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4655 return;
4656
4657 /* Indicate to allocate space on the stack for varargs save area. */
4658 ix86_save_varrargs_registers = 1;
4659 cfun->stack_alignment_needed = 128;
4660
4661 save_area = frame_pointer_rtx;
4662 set = get_varargs_alias_set ();
4663
4664 for (i = cum->regno;
4665 i < ix86_regparm
4666 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4667 i++)
4668 {
4669 mem = gen_rtx_MEM (Pmode,
4670 plus_constant (save_area, i * UNITS_PER_WORD));
4671 MEM_NOTRAP_P (mem) = 1;
4672 set_mem_alias_set (mem, set);
4673 emit_move_insn (mem, gen_rtx_REG (Pmode,
4674 x86_64_int_parameter_registers[i]));
4675 }
4676
4677 if (cum->sse_nregs && cfun->va_list_fpr_size)
4678 {
4679 /* Now emit code to save SSE registers. The AX parameter contains number
4680 of SSE parameter registers used to call this function. We use
4681 sse_prologue_save insn template that produces computed jump across
4682 SSE saves. We need some preparation work to get this working. */
4683
4684 label = gen_label_rtx ();
4685 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4686
4687 /* Compute address to jump to :
4688 label - 5*eax + nnamed_sse_arguments*5 */
4689 tmp_reg = gen_reg_rtx (Pmode);
4690 nsse_reg = gen_reg_rtx (Pmode);
4691 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4692 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4693 gen_rtx_MULT (Pmode, nsse_reg,
4694 GEN_INT (4))));
4695 if (cum->sse_regno)
4696 emit_move_insn
4697 (nsse_reg,
4698 gen_rtx_CONST (DImode,
4699 gen_rtx_PLUS (DImode,
4700 label_ref,
4701 GEN_INT (cum->sse_regno * 4))));
4702 else
4703 emit_move_insn (nsse_reg, label_ref);
4704 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4705
4706 /* Compute address of memory block we save into. We always use pointer
4707 pointing 127 bytes after first byte to store - this is needed to keep
4708 instruction size limited by 4 bytes. */
4709 tmp_reg = gen_reg_rtx (Pmode);
4710 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4711 plus_constant (save_area,
4712 8 * REGPARM_MAX + 127)));
4713 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4714 MEM_NOTRAP_P (mem) = 1;
4715 set_mem_alias_set (mem, set);
4716 set_mem_align (mem, BITS_PER_WORD);
4717
4718 /* And finally do the dirty job! */
4719 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4720 GEN_INT (cum->sse_regno), label));
4721 }
4722 }
4723
4724 static void
4725 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4726 {
4727 int set = get_varargs_alias_set ();
4728 int i;
4729
4730 for (i = cum->regno; i < REGPARM_MAX; i++)
4731 {
4732 rtx reg, mem;
4733
4734 mem = gen_rtx_MEM (Pmode,
4735 plus_constant (virtual_incoming_args_rtx,
4736 i * UNITS_PER_WORD));
4737 MEM_NOTRAP_P (mem) = 1;
4738 set_mem_alias_set (mem, set);
4739
4740 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4741 emit_move_insn (mem, reg);
4742 }
4743 }
4744
4745 static void
4746 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4747 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4748 int no_rtl)
4749 {
4750 CUMULATIVE_ARGS next_cum;
4751 tree fntype;
4752 int stdarg_p;
4753
4754 /* This argument doesn't appear to be used anymore. Which is good,
4755 because the old code here didn't suppress rtl generation. */
4756 gcc_assert (!no_rtl);
4757
4758 if (!TARGET_64BIT)
4759 return;
4760
4761 fntype = TREE_TYPE (current_function_decl);
4762 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4763 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4764 != void_type_node));
4765
4766 /* For varargs, we do not want to skip the dummy va_dcl argument.
4767 For stdargs, we do want to skip the last named argument. */
4768 next_cum = *cum;
4769 if (stdarg_p)
4770 function_arg_advance (&next_cum, mode, type, 1);
4771
4772 if (TARGET_64BIT_MS_ABI)
4773 setup_incoming_varargs_ms_64 (&next_cum);
4774 else
4775 setup_incoming_varargs_64 (&next_cum);
4776 }
4777
4778 /* Implement va_start. */
4779
4780 void
4781 ix86_va_start (tree valist, rtx nextarg)
4782 {
4783 HOST_WIDE_INT words, n_gpr, n_fpr;
4784 tree f_gpr, f_fpr, f_ovf, f_sav;
4785 tree gpr, fpr, ovf, sav, t;
4786 tree type;
4787
4788 /* Only 64bit target needs something special. */
4789 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4790 {
4791 std_expand_builtin_va_start (valist, nextarg);
4792 return;
4793 }
4794
4795 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4796 f_fpr = TREE_CHAIN (f_gpr);
4797 f_ovf = TREE_CHAIN (f_fpr);
4798 f_sav = TREE_CHAIN (f_ovf);
4799
4800 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4801 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4802 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4803 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4804 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4805
4806 /* Count number of gp and fp argument registers used. */
4807 words = current_function_args_info.words;
4808 n_gpr = current_function_args_info.regno;
4809 n_fpr = current_function_args_info.sse_regno;
4810
4811 if (cfun->va_list_gpr_size)
4812 {
4813 type = TREE_TYPE (gpr);
4814 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4815 build_int_cst (type, n_gpr * 8));
4816 TREE_SIDE_EFFECTS (t) = 1;
4817 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4818 }
4819
4820 if (cfun->va_list_fpr_size)
4821 {
4822 type = TREE_TYPE (fpr);
4823 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4824 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4825 TREE_SIDE_EFFECTS (t) = 1;
4826 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4827 }
4828
4829 /* Find the overflow area. */
4830 type = TREE_TYPE (ovf);
4831 t = make_tree (type, virtual_incoming_args_rtx);
4832 if (words != 0)
4833 t = build2 (PLUS_EXPR, type, t,
4834 build_int_cst (type, words * UNITS_PER_WORD));
4835 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4836 TREE_SIDE_EFFECTS (t) = 1;
4837 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4838
4839 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4840 {
4841 /* Find the register save area.
4842 Prologue of the function save it right above stack frame. */
4843 type = TREE_TYPE (sav);
4844 t = make_tree (type, frame_pointer_rtx);
4845 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4846 TREE_SIDE_EFFECTS (t) = 1;
4847 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4848 }
4849 }
4850
4851 /* Implement va_arg. */
4852
4853 static tree
4854 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4855 {
4856 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4857 tree f_gpr, f_fpr, f_ovf, f_sav;
4858 tree gpr, fpr, ovf, sav, t;
4859 int size, rsize;
4860 tree lab_false, lab_over = NULL_TREE;
4861 tree addr, t2;
4862 rtx container;
4863 int indirect_p = 0;
4864 tree ptrtype;
4865 enum machine_mode nat_mode;
4866
4867 /* Only 64bit target needs something special. */
4868 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4869 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4870
4871 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4872 f_fpr = TREE_CHAIN (f_gpr);
4873 f_ovf = TREE_CHAIN (f_fpr);
4874 f_sav = TREE_CHAIN (f_ovf);
4875
4876 valist = build_va_arg_indirect_ref (valist);
4877 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4878 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4879 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4880 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4881
4882 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4883 if (indirect_p)
4884 type = build_pointer_type (type);
4885 size = int_size_in_bytes (type);
4886 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4887
4888 nat_mode = type_natural_mode (type);
4889 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4890 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4891
4892 /* Pull the value out of the saved registers. */
4893
4894 addr = create_tmp_var (ptr_type_node, "addr");
4895 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4896
4897 if (container)
4898 {
4899 int needed_intregs, needed_sseregs;
4900 bool need_temp;
4901 tree int_addr, sse_addr;
4902
4903 lab_false = create_artificial_label ();
4904 lab_over = create_artificial_label ();
4905
4906 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4907
4908 need_temp = (!REG_P (container)
4909 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4910 || TYPE_ALIGN (type) > 128));
4911
4912 /* In case we are passing structure, verify that it is consecutive block
4913 on the register save area. If not we need to do moves. */
4914 if (!need_temp && !REG_P (container))
4915 {
4916 /* Verify that all registers are strictly consecutive */
4917 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4918 {
4919 int i;
4920
4921 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4922 {
4923 rtx slot = XVECEXP (container, 0, i);
4924 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4925 || INTVAL (XEXP (slot, 1)) != i * 16)
4926 need_temp = 1;
4927 }
4928 }
4929 else
4930 {
4931 int i;
4932
4933 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4934 {
4935 rtx slot = XVECEXP (container, 0, i);
4936 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4937 || INTVAL (XEXP (slot, 1)) != i * 8)
4938 need_temp = 1;
4939 }
4940 }
4941 }
4942 if (!need_temp)
4943 {
4944 int_addr = addr;
4945 sse_addr = addr;
4946 }
4947 else
4948 {
4949 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4950 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4951 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4952 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4953 }
4954
4955 /* First ensure that we fit completely in registers. */
4956 if (needed_intregs)
4957 {
4958 t = build_int_cst (TREE_TYPE (gpr),
4959 (REGPARM_MAX - needed_intregs + 1) * 8);
4960 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4961 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4962 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4963 gimplify_and_add (t, pre_p);
4964 }
4965 if (needed_sseregs)
4966 {
4967 t = build_int_cst (TREE_TYPE (fpr),
4968 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4969 + REGPARM_MAX * 8);
4970 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4971 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4972 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4973 gimplify_and_add (t, pre_p);
4974 }
4975
4976 /* Compute index to start of area used for integer regs. */
4977 if (needed_intregs)
4978 {
4979 /* int_addr = gpr + sav; */
4980 t = fold_convert (ptr_type_node, fold_convert (size_type_node, gpr));
4981 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4982 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4983 gimplify_and_add (t, pre_p);
4984 }
4985 if (needed_sseregs)
4986 {
4987 /* sse_addr = fpr + sav; */
4988 t = fold_convert (ptr_type_node, fold_convert (size_type_node, fpr));
4989 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4990 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4991 gimplify_and_add (t, pre_p);
4992 }
4993 if (need_temp)
4994 {
4995 int i;
4996 tree temp = create_tmp_var (type, "va_arg_tmp");
4997
4998 /* addr = &temp; */
4999 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5000 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5001 gimplify_and_add (t, pre_p);
5002
5003 for (i = 0; i < XVECLEN (container, 0); i++)
5004 {
5005 rtx slot = XVECEXP (container, 0, i);
5006 rtx reg = XEXP (slot, 0);
5007 enum machine_mode mode = GET_MODE (reg);
5008 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5009 tree addr_type = build_pointer_type (piece_type);
5010 tree src_addr, src;
5011 int src_offset;
5012 tree dest_addr, dest;
5013
5014 if (SSE_REGNO_P (REGNO (reg)))
5015 {
5016 src_addr = sse_addr;
5017 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5018 }
5019 else
5020 {
5021 src_addr = int_addr;
5022 src_offset = REGNO (reg) * 8;
5023 }
5024 src_addr = fold_convert (addr_type, src_addr);
5025 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
5026 build_int_cst (addr_type, src_offset));
5027 src = build_va_arg_indirect_ref (src_addr);
5028
5029 dest_addr = fold_convert (addr_type, addr);
5030 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
5031 build_int_cst (addr_type, INTVAL (XEXP (slot, 1))));
5032 dest = build_va_arg_indirect_ref (dest_addr);
5033
5034 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5035 gimplify_and_add (t, pre_p);
5036 }
5037 }
5038
5039 if (needed_intregs)
5040 {
5041 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5042 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5043 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5044 gimplify_and_add (t, pre_p);
5045 }
5046 if (needed_sseregs)
5047 {
5048 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5049 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5050 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5051 gimplify_and_add (t, pre_p);
5052 }
5053
5054 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5055 gimplify_and_add (t, pre_p);
5056
5057 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5058 append_to_statement_list (t, pre_p);
5059 }
5060
5061 /* ... otherwise out of the overflow area. */
5062
5063 /* Care for on-stack alignment if needed. */
5064 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5065 || integer_zerop (TYPE_SIZE (type)))
5066 t = ovf;
5067 else
5068 {
5069 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5070 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
5071 build_int_cst (TREE_TYPE (ovf), align - 1));
5072 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5073 build_int_cst (TREE_TYPE (t), -align));
5074 }
5075 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5076
5077 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5078 gimplify_and_add (t2, pre_p);
5079
5080 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5081 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
5082 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5083 gimplify_and_add (t, pre_p);
5084
5085 if (container)
5086 {
5087 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5088 append_to_statement_list (t, pre_p);
5089 }
5090
5091 ptrtype = build_pointer_type (type);
5092 addr = fold_convert (ptrtype, addr);
5093
5094 if (indirect_p)
5095 addr = build_va_arg_indirect_ref (addr);
5096 return build_va_arg_indirect_ref (addr);
5097 }
5098 \f
5099 /* Return nonzero if OPNUM's MEM should be matched
5100 in movabs* patterns. */
5101
5102 int
5103 ix86_check_movabs (rtx insn, int opnum)
5104 {
5105 rtx set, mem;
5106
5107 set = PATTERN (insn);
5108 if (GET_CODE (set) == PARALLEL)
5109 set = XVECEXP (set, 0, 0);
5110 gcc_assert (GET_CODE (set) == SET);
5111 mem = XEXP (set, opnum);
5112 while (GET_CODE (mem) == SUBREG)
5113 mem = SUBREG_REG (mem);
5114 gcc_assert (MEM_P (mem));
5115 return (volatile_ok || !MEM_VOLATILE_P (mem));
5116 }
5117 \f
5118 /* Initialize the table of extra 80387 mathematical constants. */
5119
5120 static void
5121 init_ext_80387_constants (void)
5122 {
5123 static const char * cst[5] =
5124 {
5125 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5126 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5127 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5128 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5129 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5130 };
5131 int i;
5132
5133 for (i = 0; i < 5; i++)
5134 {
5135 real_from_string (&ext_80387_constants_table[i], cst[i]);
5136 /* Ensure each constant is rounded to XFmode precision. */
5137 real_convert (&ext_80387_constants_table[i],
5138 XFmode, &ext_80387_constants_table[i]);
5139 }
5140
5141 ext_80387_constants_init = 1;
5142 }
5143
5144 /* Return true if the constant is something that can be loaded with
5145 a special instruction. */
5146
5147 int
5148 standard_80387_constant_p (rtx x)
5149 {
5150 enum machine_mode mode = GET_MODE (x);
5151
5152 REAL_VALUE_TYPE r;
5153
5154 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5155 return -1;
5156
5157 if (x == CONST0_RTX (mode))
5158 return 1;
5159 if (x == CONST1_RTX (mode))
5160 return 2;
5161
5162 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5163
5164 /* For XFmode constants, try to find a special 80387 instruction when
5165 optimizing for size or on those CPUs that benefit from them. */
5166 if (mode == XFmode
5167 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5168 {
5169 int i;
5170
5171 if (! ext_80387_constants_init)
5172 init_ext_80387_constants ();
5173
5174 for (i = 0; i < 5; i++)
5175 if (real_identical (&r, &ext_80387_constants_table[i]))
5176 return i + 3;
5177 }
5178
5179 /* Load of the constant -0.0 or -1.0 will be split as
5180 fldz;fchs or fld1;fchs sequence. */
5181 if (real_isnegzero (&r))
5182 return 8;
5183 if (real_identical (&r, &dconstm1))
5184 return 9;
5185
5186 return 0;
5187 }
5188
5189 /* Return the opcode of the special instruction to be used to load
5190 the constant X. */
5191
5192 const char *
5193 standard_80387_constant_opcode (rtx x)
5194 {
5195 switch (standard_80387_constant_p (x))
5196 {
5197 case 1:
5198 return "fldz";
5199 case 2:
5200 return "fld1";
5201 case 3:
5202 return "fldlg2";
5203 case 4:
5204 return "fldln2";
5205 case 5:
5206 return "fldl2e";
5207 case 6:
5208 return "fldl2t";
5209 case 7:
5210 return "fldpi";
5211 case 8:
5212 case 9:
5213 return "#";
5214 default:
5215 gcc_unreachable ();
5216 }
5217 }
5218
5219 /* Return the CONST_DOUBLE representing the 80387 constant that is
5220 loaded by the specified special instruction. The argument IDX
5221 matches the return value from standard_80387_constant_p. */
5222
5223 rtx
5224 standard_80387_constant_rtx (int idx)
5225 {
5226 int i;
5227
5228 if (! ext_80387_constants_init)
5229 init_ext_80387_constants ();
5230
5231 switch (idx)
5232 {
5233 case 3:
5234 case 4:
5235 case 5:
5236 case 6:
5237 case 7:
5238 i = idx - 3;
5239 break;
5240
5241 default:
5242 gcc_unreachable ();
5243 }
5244
5245 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5246 XFmode);
5247 }
5248
5249 /* Return 1 if mode is a valid mode for sse. */
5250 static int
5251 standard_sse_mode_p (enum machine_mode mode)
5252 {
5253 switch (mode)
5254 {
5255 case V16QImode:
5256 case V8HImode:
5257 case V4SImode:
5258 case V2DImode:
5259 case V4SFmode:
5260 case V2DFmode:
5261 return 1;
5262
5263 default:
5264 return 0;
5265 }
5266 }
5267
5268 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5269 */
5270 int
5271 standard_sse_constant_p (rtx x)
5272 {
5273 enum machine_mode mode = GET_MODE (x);
5274
5275 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5276 return 1;
5277 if (vector_all_ones_operand (x, mode)
5278 && standard_sse_mode_p (mode))
5279 return TARGET_SSE2 ? 2 : -1;
5280
5281 return 0;
5282 }
5283
5284 /* Return the opcode of the special instruction to be used to load
5285 the constant X. */
5286
5287 const char *
5288 standard_sse_constant_opcode (rtx insn, rtx x)
5289 {
5290 switch (standard_sse_constant_p (x))
5291 {
5292 case 1:
5293 if (get_attr_mode (insn) == MODE_V4SF)
5294 return "xorps\t%0, %0";
5295 else if (get_attr_mode (insn) == MODE_V2DF)
5296 return "xorpd\t%0, %0";
5297 else
5298 return "pxor\t%0, %0";
5299 case 2:
5300 return "pcmpeqd\t%0, %0";
5301 }
5302 gcc_unreachable ();
5303 }
5304
5305 /* Returns 1 if OP contains a symbol reference */
5306
5307 int
5308 symbolic_reference_mentioned_p (rtx op)
5309 {
5310 const char *fmt;
5311 int i;
5312
5313 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5314 return 1;
5315
5316 fmt = GET_RTX_FORMAT (GET_CODE (op));
5317 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5318 {
5319 if (fmt[i] == 'E')
5320 {
5321 int j;
5322
5323 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5324 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5325 return 1;
5326 }
5327
5328 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5329 return 1;
5330 }
5331
5332 return 0;
5333 }
5334
5335 /* Return 1 if it is appropriate to emit `ret' instructions in the
5336 body of a function. Do this only if the epilogue is simple, needing a
5337 couple of insns. Prior to reloading, we can't tell how many registers
5338 must be saved, so return 0 then. Return 0 if there is no frame
5339 marker to de-allocate. */
5340
5341 int
5342 ix86_can_use_return_insn_p (void)
5343 {
5344 struct ix86_frame frame;
5345
5346 if (! reload_completed || frame_pointer_needed)
5347 return 0;
5348
5349 /* Don't allow more than 32 pop, since that's all we can do
5350 with one instruction. */
5351 if (current_function_pops_args
5352 && current_function_args_size >= 32768)
5353 return 0;
5354
5355 ix86_compute_frame_layout (&frame);
5356 return frame.to_allocate == 0 && frame.nregs == 0;
5357 }
5358 \f
5359 /* Value should be nonzero if functions must have frame pointers.
5360 Zero means the frame pointer need not be set up (and parms may
5361 be accessed via the stack pointer) in functions that seem suitable. */
5362
5363 int
5364 ix86_frame_pointer_required (void)
5365 {
5366 /* If we accessed previous frames, then the generated code expects
5367 to be able to access the saved ebp value in our frame. */
5368 if (cfun->machine->accesses_prev_frame)
5369 return 1;
5370
5371 /* Several x86 os'es need a frame pointer for other reasons,
5372 usually pertaining to setjmp. */
5373 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5374 return 1;
5375
5376 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5377 the frame pointer by default. Turn it back on now if we've not
5378 got a leaf function. */
5379 if (TARGET_OMIT_LEAF_FRAME_POINTER
5380 && (!current_function_is_leaf
5381 || ix86_current_function_calls_tls_descriptor))
5382 return 1;
5383
5384 if (current_function_profile)
5385 return 1;
5386
5387 return 0;
5388 }
5389
5390 /* Record that the current function accesses previous call frames. */
5391
5392 void
5393 ix86_setup_frame_addresses (void)
5394 {
5395 cfun->machine->accesses_prev_frame = 1;
5396 }
5397 \f
5398 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5399 # define USE_HIDDEN_LINKONCE 1
5400 #else
5401 # define USE_HIDDEN_LINKONCE 0
5402 #endif
5403
5404 static int pic_labels_used;
5405
5406 /* Fills in the label name that should be used for a pc thunk for
5407 the given register. */
5408
5409 static void
5410 get_pc_thunk_name (char name[32], unsigned int regno)
5411 {
5412 gcc_assert (!TARGET_64BIT);
5413
5414 if (USE_HIDDEN_LINKONCE)
5415 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5416 else
5417 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5418 }
5419
5420
5421 /* This function generates code for -fpic that loads %ebx with
5422 the return address of the caller and then returns. */
5423
5424 void
5425 ix86_file_end (void)
5426 {
5427 rtx xops[2];
5428 int regno;
5429
5430 for (regno = 0; regno < 8; ++regno)
5431 {
5432 char name[32];
5433
5434 if (! ((pic_labels_used >> regno) & 1))
5435 continue;
5436
5437 get_pc_thunk_name (name, regno);
5438
5439 #if TARGET_MACHO
5440 if (TARGET_MACHO)
5441 {
5442 switch_to_section (darwin_sections[text_coal_section]);
5443 fputs ("\t.weak_definition\t", asm_out_file);
5444 assemble_name (asm_out_file, name);
5445 fputs ("\n\t.private_extern\t", asm_out_file);
5446 assemble_name (asm_out_file, name);
5447 fputs ("\n", asm_out_file);
5448 ASM_OUTPUT_LABEL (asm_out_file, name);
5449 }
5450 else
5451 #endif
5452 if (USE_HIDDEN_LINKONCE)
5453 {
5454 tree decl;
5455
5456 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5457 error_mark_node);
5458 TREE_PUBLIC (decl) = 1;
5459 TREE_STATIC (decl) = 1;
5460 DECL_ONE_ONLY (decl) = 1;
5461
5462 (*targetm.asm_out.unique_section) (decl, 0);
5463 switch_to_section (get_named_section (decl, NULL, 0));
5464
5465 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5466 fputs ("\t.hidden\t", asm_out_file);
5467 assemble_name (asm_out_file, name);
5468 fputc ('\n', asm_out_file);
5469 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5470 }
5471 else
5472 {
5473 switch_to_section (text_section);
5474 ASM_OUTPUT_LABEL (asm_out_file, name);
5475 }
5476
5477 xops[0] = gen_rtx_REG (SImode, regno);
5478 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5479 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5480 output_asm_insn ("ret", xops);
5481 }
5482
5483 if (NEED_INDICATE_EXEC_STACK)
5484 file_end_indicate_exec_stack ();
5485 }
5486
5487 /* Emit code for the SET_GOT patterns. */
5488
5489 const char *
5490 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5491 {
5492 rtx xops[3];
5493
5494 xops[0] = dest;
5495
5496 if (TARGET_VXWORKS_RTP && flag_pic)
5497 {
5498 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5499 xops[2] = gen_rtx_MEM (Pmode,
5500 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5501 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5502
5503 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5504 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5505 an unadorned address. */
5506 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5507 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5508 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5509 return "";
5510 }
5511
5512 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5513
5514 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5515 {
5516 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5517
5518 if (!flag_pic)
5519 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5520 else
5521 output_asm_insn ("call\t%a2", xops);
5522
5523 #if TARGET_MACHO
5524 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5525 is what will be referenced by the Mach-O PIC subsystem. */
5526 if (!label)
5527 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5528 #endif
5529
5530 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5531 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5532
5533 if (flag_pic)
5534 output_asm_insn ("pop{l}\t%0", xops);
5535 }
5536 else
5537 {
5538 char name[32];
5539 get_pc_thunk_name (name, REGNO (dest));
5540 pic_labels_used |= 1 << REGNO (dest);
5541
5542 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5543 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5544 output_asm_insn ("call\t%X2", xops);
5545 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5546 is what will be referenced by the Mach-O PIC subsystem. */
5547 #if TARGET_MACHO
5548 if (!label)
5549 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5550 else
5551 targetm.asm_out.internal_label (asm_out_file, "L",
5552 CODE_LABEL_NUMBER (label));
5553 #endif
5554 }
5555
5556 if (TARGET_MACHO)
5557 return "";
5558
5559 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5560 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5561 else
5562 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5563
5564 return "";
5565 }
5566
5567 /* Generate an "push" pattern for input ARG. */
5568
5569 static rtx
5570 gen_push (rtx arg)
5571 {
5572 return gen_rtx_SET (VOIDmode,
5573 gen_rtx_MEM (Pmode,
5574 gen_rtx_PRE_DEC (Pmode,
5575 stack_pointer_rtx)),
5576 arg);
5577 }
5578
5579 /* Return >= 0 if there is an unused call-clobbered register available
5580 for the entire function. */
5581
5582 static unsigned int
5583 ix86_select_alt_pic_regnum (void)
5584 {
5585 if (current_function_is_leaf && !current_function_profile
5586 && !ix86_current_function_calls_tls_descriptor)
5587 {
5588 int i;
5589 for (i = 2; i >= 0; --i)
5590 if (!df_regs_ever_live_p (i))
5591 return i;
5592 }
5593
5594 return INVALID_REGNUM;
5595 }
5596
5597 /* Return 1 if we need to save REGNO. */
5598 static int
5599 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5600 {
5601 if (pic_offset_table_rtx
5602 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5603 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5604 || current_function_profile
5605 || current_function_calls_eh_return
5606 || current_function_uses_const_pool))
5607 {
5608 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5609 return 0;
5610 return 1;
5611 }
5612
5613 if (current_function_calls_eh_return && maybe_eh_return)
5614 {
5615 unsigned i;
5616 for (i = 0; ; i++)
5617 {
5618 unsigned test = EH_RETURN_DATA_REGNO (i);
5619 if (test == INVALID_REGNUM)
5620 break;
5621 if (test == regno)
5622 return 1;
5623 }
5624 }
5625
5626 if (cfun->machine->force_align_arg_pointer
5627 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5628 return 1;
5629
5630 return (df_regs_ever_live_p (regno)
5631 && !call_used_regs[regno]
5632 && !fixed_regs[regno]
5633 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5634 }
5635
5636 /* Return number of registers to be saved on the stack. */
5637
5638 static int
5639 ix86_nsaved_regs (void)
5640 {
5641 int nregs = 0;
5642 int regno;
5643
5644 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5645 if (ix86_save_reg (regno, true))
5646 nregs++;
5647 return nregs;
5648 }
5649
5650 /* Return the offset between two registers, one to be eliminated, and the other
5651 its replacement, at the start of a routine. */
5652
5653 HOST_WIDE_INT
5654 ix86_initial_elimination_offset (int from, int to)
5655 {
5656 struct ix86_frame frame;
5657 ix86_compute_frame_layout (&frame);
5658
5659 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5660 return frame.hard_frame_pointer_offset;
5661 else if (from == FRAME_POINTER_REGNUM
5662 && to == HARD_FRAME_POINTER_REGNUM)
5663 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5664 else
5665 {
5666 gcc_assert (to == STACK_POINTER_REGNUM);
5667
5668 if (from == ARG_POINTER_REGNUM)
5669 return frame.stack_pointer_offset;
5670
5671 gcc_assert (from == FRAME_POINTER_REGNUM);
5672 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5673 }
5674 }
5675
5676 /* Fill structure ix86_frame about frame of currently computed function. */
5677
5678 static void
5679 ix86_compute_frame_layout (struct ix86_frame *frame)
5680 {
5681 HOST_WIDE_INT total_size;
5682 unsigned int stack_alignment_needed;
5683 HOST_WIDE_INT offset;
5684 unsigned int preferred_alignment;
5685 HOST_WIDE_INT size = get_frame_size ();
5686
5687 frame->nregs = ix86_nsaved_regs ();
5688 total_size = size;
5689
5690 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5691 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5692
5693 /* During reload iteration the amount of registers saved can change.
5694 Recompute the value as needed. Do not recompute when amount of registers
5695 didn't change as reload does multiple calls to the function and does not
5696 expect the decision to change within single iteration. */
5697 if (!optimize_size
5698 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5699 {
5700 int count = frame->nregs;
5701
5702 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5703 /* The fast prologue uses move instead of push to save registers. This
5704 is significantly longer, but also executes faster as modern hardware
5705 can execute the moves in parallel, but can't do that for push/pop.
5706
5707 Be careful about choosing what prologue to emit: When function takes
5708 many instructions to execute we may use slow version as well as in
5709 case function is known to be outside hot spot (this is known with
5710 feedback only). Weight the size of function by number of registers
5711 to save as it is cheap to use one or two push instructions but very
5712 slow to use many of them. */
5713 if (count)
5714 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5715 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5716 || (flag_branch_probabilities
5717 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5718 cfun->machine->use_fast_prologue_epilogue = false;
5719 else
5720 cfun->machine->use_fast_prologue_epilogue
5721 = !expensive_function_p (count);
5722 }
5723 if (TARGET_PROLOGUE_USING_MOVE
5724 && cfun->machine->use_fast_prologue_epilogue)
5725 frame->save_regs_using_mov = true;
5726 else
5727 frame->save_regs_using_mov = false;
5728
5729
5730 /* Skip return address and saved base pointer. */
5731 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5732
5733 frame->hard_frame_pointer_offset = offset;
5734
5735 /* Do some sanity checking of stack_alignment_needed and
5736 preferred_alignment, since i386 port is the only using those features
5737 that may break easily. */
5738
5739 gcc_assert (!size || stack_alignment_needed);
5740 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5741 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5742 gcc_assert (stack_alignment_needed
5743 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5744
5745 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5746 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5747
5748 /* Register save area */
5749 offset += frame->nregs * UNITS_PER_WORD;
5750
5751 /* Va-arg area */
5752 if (ix86_save_varrargs_registers)
5753 {
5754 offset += X86_64_VARARGS_SIZE;
5755 frame->va_arg_size = X86_64_VARARGS_SIZE;
5756 }
5757 else
5758 frame->va_arg_size = 0;
5759
5760 /* Align start of frame for local function. */
5761 frame->padding1 = ((offset + stack_alignment_needed - 1)
5762 & -stack_alignment_needed) - offset;
5763
5764 offset += frame->padding1;
5765
5766 /* Frame pointer points here. */
5767 frame->frame_pointer_offset = offset;
5768
5769 offset += size;
5770
5771 /* Add outgoing arguments area. Can be skipped if we eliminated
5772 all the function calls as dead code.
5773 Skipping is however impossible when function calls alloca. Alloca
5774 expander assumes that last current_function_outgoing_args_size
5775 of stack frame are unused. */
5776 if (ACCUMULATE_OUTGOING_ARGS
5777 && (!current_function_is_leaf || current_function_calls_alloca
5778 || ix86_current_function_calls_tls_descriptor))
5779 {
5780 offset += current_function_outgoing_args_size;
5781 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5782 }
5783 else
5784 frame->outgoing_arguments_size = 0;
5785
5786 /* Align stack boundary. Only needed if we're calling another function
5787 or using alloca. */
5788 if (!current_function_is_leaf || current_function_calls_alloca
5789 || ix86_current_function_calls_tls_descriptor)
5790 frame->padding2 = ((offset + preferred_alignment - 1)
5791 & -preferred_alignment) - offset;
5792 else
5793 frame->padding2 = 0;
5794
5795 offset += frame->padding2;
5796
5797 /* We've reached end of stack frame. */
5798 frame->stack_pointer_offset = offset;
5799
5800 /* Size prologue needs to allocate. */
5801 frame->to_allocate =
5802 (size + frame->padding1 + frame->padding2
5803 + frame->outgoing_arguments_size + frame->va_arg_size);
5804
5805 if ((!frame->to_allocate && frame->nregs <= 1)
5806 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5807 frame->save_regs_using_mov = false;
5808
5809 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5810 && current_function_is_leaf
5811 && !ix86_current_function_calls_tls_descriptor)
5812 {
5813 frame->red_zone_size = frame->to_allocate;
5814 if (frame->save_regs_using_mov)
5815 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5816 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5817 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5818 }
5819 else
5820 frame->red_zone_size = 0;
5821 frame->to_allocate -= frame->red_zone_size;
5822 frame->stack_pointer_offset -= frame->red_zone_size;
5823 #if 0
5824 fprintf (stderr, "\n");
5825 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5826 fprintf (stderr, "size: %ld\n", (long)size);
5827 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5828 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5829 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5830 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5831 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5832 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5833 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5834 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5835 (long)frame->hard_frame_pointer_offset);
5836 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5837 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5838 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5839 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5840 #endif
5841 }
5842
5843 /* Emit code to save registers in the prologue. */
5844
5845 static void
5846 ix86_emit_save_regs (void)
5847 {
5848 unsigned int regno;
5849 rtx insn;
5850
5851 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5852 if (ix86_save_reg (regno, true))
5853 {
5854 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5855 RTX_FRAME_RELATED_P (insn) = 1;
5856 }
5857 }
5858
5859 /* Emit code to save registers using MOV insns. First register
5860 is restored from POINTER + OFFSET. */
5861 static void
5862 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5863 {
5864 unsigned int regno;
5865 rtx insn;
5866
5867 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5868 if (ix86_save_reg (regno, true))
5869 {
5870 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5871 Pmode, offset),
5872 gen_rtx_REG (Pmode, regno));
5873 RTX_FRAME_RELATED_P (insn) = 1;
5874 offset += UNITS_PER_WORD;
5875 }
5876 }
5877
5878 /* Expand prologue or epilogue stack adjustment.
5879 The pattern exist to put a dependency on all ebp-based memory accesses.
5880 STYLE should be negative if instructions should be marked as frame related,
5881 zero if %r11 register is live and cannot be freely used and positive
5882 otherwise. */
5883
5884 static void
5885 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5886 {
5887 rtx insn;
5888
5889 if (! TARGET_64BIT)
5890 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5891 else if (x86_64_immediate_operand (offset, DImode))
5892 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5893 else
5894 {
5895 rtx r11;
5896 /* r11 is used by indirect sibcall return as well, set before the
5897 epilogue and used after the epilogue. ATM indirect sibcall
5898 shouldn't be used together with huge frame sizes in one
5899 function because of the frame_size check in sibcall.c. */
5900 gcc_assert (style);
5901 r11 = gen_rtx_REG (DImode, R11_REG);
5902 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5903 if (style < 0)
5904 RTX_FRAME_RELATED_P (insn) = 1;
5905 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5906 offset));
5907 }
5908 if (style < 0)
5909 RTX_FRAME_RELATED_P (insn) = 1;
5910 }
5911
5912 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5913
5914 static rtx
5915 ix86_internal_arg_pointer (void)
5916 {
5917 bool has_force_align_arg_pointer =
5918 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5919 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5920 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5921 && DECL_NAME (current_function_decl)
5922 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5923 && DECL_FILE_SCOPE_P (current_function_decl))
5924 || ix86_force_align_arg_pointer
5925 || has_force_align_arg_pointer)
5926 {
5927 /* Nested functions can't realign the stack due to a register
5928 conflict. */
5929 if (DECL_CONTEXT (current_function_decl)
5930 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5931 {
5932 if (ix86_force_align_arg_pointer)
5933 warning (0, "-mstackrealign ignored for nested functions");
5934 if (has_force_align_arg_pointer)
5935 error ("%s not supported for nested functions",
5936 ix86_force_align_arg_pointer_string);
5937 return virtual_incoming_args_rtx;
5938 }
5939 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5940 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5941 }
5942 else
5943 return virtual_incoming_args_rtx;
5944 }
5945
5946 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5947 This is called from dwarf2out.c to emit call frame instructions
5948 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5949 static void
5950 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5951 {
5952 rtx unspec = SET_SRC (pattern);
5953 gcc_assert (GET_CODE (unspec) == UNSPEC);
5954
5955 switch (index)
5956 {
5957 case UNSPEC_REG_SAVE:
5958 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5959 SET_DEST (pattern));
5960 break;
5961 case UNSPEC_DEF_CFA:
5962 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5963 INTVAL (XVECEXP (unspec, 0, 0)));
5964 break;
5965 default:
5966 gcc_unreachable ();
5967 }
5968 }
5969
5970 /* Expand the prologue into a bunch of separate insns. */
5971
5972 void
5973 ix86_expand_prologue (void)
5974 {
5975 rtx insn;
5976 bool pic_reg_used;
5977 struct ix86_frame frame;
5978 HOST_WIDE_INT allocate;
5979
5980 ix86_compute_frame_layout (&frame);
5981
5982 if (cfun->machine->force_align_arg_pointer)
5983 {
5984 rtx x, y;
5985
5986 /* Grab the argument pointer. */
5987 x = plus_constant (stack_pointer_rtx, 4);
5988 y = cfun->machine->force_align_arg_pointer;
5989 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5990 RTX_FRAME_RELATED_P (insn) = 1;
5991
5992 /* The unwind info consists of two parts: install the fafp as the cfa,
5993 and record the fafp as the "save register" of the stack pointer.
5994 The later is there in order that the unwinder can see where it
5995 should restore the stack pointer across the and insn. */
5996 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5997 x = gen_rtx_SET (VOIDmode, y, x);
5998 RTX_FRAME_RELATED_P (x) = 1;
5999 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6000 UNSPEC_REG_SAVE);
6001 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6002 RTX_FRAME_RELATED_P (y) = 1;
6003 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6004 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6005 REG_NOTES (insn) = x;
6006
6007 /* Align the stack. */
6008 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6009 GEN_INT (-16)));
6010
6011 /* And here we cheat like madmen with the unwind info. We force the
6012 cfa register back to sp+4, which is exactly what it was at the
6013 start of the function. Re-pushing the return address results in
6014 the return at the same spot relative to the cfa, and thus is
6015 correct wrt the unwind info. */
6016 x = cfun->machine->force_align_arg_pointer;
6017 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6018 insn = emit_insn (gen_push (x));
6019 RTX_FRAME_RELATED_P (insn) = 1;
6020
6021 x = GEN_INT (4);
6022 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6023 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6024 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6025 REG_NOTES (insn) = x;
6026 }
6027
6028 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6029 slower on all targets. Also sdb doesn't like it. */
6030
6031 if (frame_pointer_needed)
6032 {
6033 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6034 RTX_FRAME_RELATED_P (insn) = 1;
6035
6036 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6037 RTX_FRAME_RELATED_P (insn) = 1;
6038 }
6039
6040 allocate = frame.to_allocate;
6041
6042 if (!frame.save_regs_using_mov)
6043 ix86_emit_save_regs ();
6044 else
6045 allocate += frame.nregs * UNITS_PER_WORD;
6046
6047 /* When using red zone we may start register saving before allocating
6048 the stack frame saving one cycle of the prologue. */
6049 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6050 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6051 : stack_pointer_rtx,
6052 -frame.nregs * UNITS_PER_WORD);
6053
6054 if (allocate == 0)
6055 ;
6056 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6057 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6058 GEN_INT (-allocate), -1);
6059 else
6060 {
6061 /* Only valid for Win32. */
6062 rtx eax = gen_rtx_REG (Pmode, 0);
6063 bool eax_live;
6064 rtx t;
6065
6066 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6067
6068 if (TARGET_64BIT_MS_ABI)
6069 eax_live = false;
6070 else
6071 eax_live = ix86_eax_live_at_start_p ();
6072
6073 if (eax_live)
6074 {
6075 emit_insn (gen_push (eax));
6076 allocate -= UNITS_PER_WORD;
6077 }
6078
6079 emit_move_insn (eax, GEN_INT (allocate));
6080
6081 if (TARGET_64BIT)
6082 insn = gen_allocate_stack_worker_64 (eax);
6083 else
6084 insn = gen_allocate_stack_worker_32 (eax);
6085 insn = emit_insn (insn);
6086 RTX_FRAME_RELATED_P (insn) = 1;
6087 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6088 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6089 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6090 t, REG_NOTES (insn));
6091
6092 if (eax_live)
6093 {
6094 if (frame_pointer_needed)
6095 t = plus_constant (hard_frame_pointer_rtx,
6096 allocate
6097 - frame.to_allocate
6098 - frame.nregs * UNITS_PER_WORD);
6099 else
6100 t = plus_constant (stack_pointer_rtx, allocate);
6101 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6102 }
6103 }
6104
6105 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6106 {
6107 if (!frame_pointer_needed || !frame.to_allocate)
6108 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6109 else
6110 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6111 -frame.nregs * UNITS_PER_WORD);
6112 }
6113
6114 pic_reg_used = false;
6115 if (pic_offset_table_rtx
6116 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6117 || current_function_profile))
6118 {
6119 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6120
6121 if (alt_pic_reg_used != INVALID_REGNUM)
6122 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6123
6124 pic_reg_used = true;
6125 }
6126
6127 if (pic_reg_used)
6128 {
6129 if (TARGET_64BIT)
6130 {
6131 if (ix86_cmodel == CM_LARGE_PIC)
6132 {
6133 rtx tmp_reg = gen_rtx_REG (DImode,
6134 FIRST_REX_INT_REG + 3 /* R11 */);
6135 rtx label = gen_label_rtx ();
6136 emit_label (label);
6137 LABEL_PRESERVE_P (label) = 1;
6138 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6139 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6140 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6141 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6142 pic_offset_table_rtx, tmp_reg));
6143 }
6144 else
6145 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6146 }
6147 else
6148 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6149 }
6150
6151 /* Prevent function calls from be scheduled before the call to mcount.
6152 In the pic_reg_used case, make sure that the got load isn't deleted. */
6153 if (current_function_profile)
6154 {
6155 if (pic_reg_used)
6156 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6157 emit_insn (gen_blockage ());
6158 }
6159 }
6160
6161 /* Emit code to restore saved registers using MOV insns. First register
6162 is restored from POINTER + OFFSET. */
6163 static void
6164 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6165 int maybe_eh_return)
6166 {
6167 int regno;
6168 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6169
6170 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6171 if (ix86_save_reg (regno, maybe_eh_return))
6172 {
6173 /* Ensure that adjust_address won't be forced to produce pointer
6174 out of range allowed by x86-64 instruction set. */
6175 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6176 {
6177 rtx r11;
6178
6179 r11 = gen_rtx_REG (DImode, R11_REG);
6180 emit_move_insn (r11, GEN_INT (offset));
6181 emit_insn (gen_adddi3 (r11, r11, pointer));
6182 base_address = gen_rtx_MEM (Pmode, r11);
6183 offset = 0;
6184 }
6185 emit_move_insn (gen_rtx_REG (Pmode, regno),
6186 adjust_address (base_address, Pmode, offset));
6187 offset += UNITS_PER_WORD;
6188 }
6189 }
6190
6191 /* Restore function stack, frame, and registers. */
6192
6193 void
6194 ix86_expand_epilogue (int style)
6195 {
6196 int regno;
6197 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6198 struct ix86_frame frame;
6199 HOST_WIDE_INT offset;
6200
6201 ix86_compute_frame_layout (&frame);
6202
6203 /* Calculate start of saved registers relative to ebp. Special care
6204 must be taken for the normal return case of a function using
6205 eh_return: the eax and edx registers are marked as saved, but not
6206 restored along this path. */
6207 offset = frame.nregs;
6208 if (current_function_calls_eh_return && style != 2)
6209 offset -= 2;
6210 offset *= -UNITS_PER_WORD;
6211
6212 /* If we're only restoring one register and sp is not valid then
6213 using a move instruction to restore the register since it's
6214 less work than reloading sp and popping the register.
6215
6216 The default code result in stack adjustment using add/lea instruction,
6217 while this code results in LEAVE instruction (or discrete equivalent),
6218 so it is profitable in some other cases as well. Especially when there
6219 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6220 and there is exactly one register to pop. This heuristic may need some
6221 tuning in future. */
6222 if ((!sp_valid && frame.nregs <= 1)
6223 || (TARGET_EPILOGUE_USING_MOVE
6224 && cfun->machine->use_fast_prologue_epilogue
6225 && (frame.nregs > 1 || frame.to_allocate))
6226 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6227 || (frame_pointer_needed && TARGET_USE_LEAVE
6228 && cfun->machine->use_fast_prologue_epilogue
6229 && frame.nregs == 1)
6230 || current_function_calls_eh_return)
6231 {
6232 /* Restore registers. We can use ebp or esp to address the memory
6233 locations. If both are available, default to ebp, since offsets
6234 are known to be small. Only exception is esp pointing directly to the
6235 end of block of saved registers, where we may simplify addressing
6236 mode. */
6237
6238 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6239 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6240 frame.to_allocate, style == 2);
6241 else
6242 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6243 offset, style == 2);
6244
6245 /* eh_return epilogues need %ecx added to the stack pointer. */
6246 if (style == 2)
6247 {
6248 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6249
6250 if (frame_pointer_needed)
6251 {
6252 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6253 tmp = plus_constant (tmp, UNITS_PER_WORD);
6254 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6255
6256 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6257 emit_move_insn (hard_frame_pointer_rtx, tmp);
6258
6259 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6260 const0_rtx, style);
6261 }
6262 else
6263 {
6264 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6265 tmp = plus_constant (tmp, (frame.to_allocate
6266 + frame.nregs * UNITS_PER_WORD));
6267 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6268 }
6269 }
6270 else if (!frame_pointer_needed)
6271 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6272 GEN_INT (frame.to_allocate
6273 + frame.nregs * UNITS_PER_WORD),
6274 style);
6275 /* If not an i386, mov & pop is faster than "leave". */
6276 else if (TARGET_USE_LEAVE || optimize_size
6277 || !cfun->machine->use_fast_prologue_epilogue)
6278 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6279 else
6280 {
6281 pro_epilogue_adjust_stack (stack_pointer_rtx,
6282 hard_frame_pointer_rtx,
6283 const0_rtx, style);
6284 if (TARGET_64BIT)
6285 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6286 else
6287 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6288 }
6289 }
6290 else
6291 {
6292 /* First step is to deallocate the stack frame so that we can
6293 pop the registers. */
6294 if (!sp_valid)
6295 {
6296 gcc_assert (frame_pointer_needed);
6297 pro_epilogue_adjust_stack (stack_pointer_rtx,
6298 hard_frame_pointer_rtx,
6299 GEN_INT (offset), style);
6300 }
6301 else if (frame.to_allocate)
6302 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6303 GEN_INT (frame.to_allocate), style);
6304
6305 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6306 if (ix86_save_reg (regno, false))
6307 {
6308 if (TARGET_64BIT)
6309 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6310 else
6311 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6312 }
6313 if (frame_pointer_needed)
6314 {
6315 /* Leave results in shorter dependency chains on CPUs that are
6316 able to grok it fast. */
6317 if (TARGET_USE_LEAVE)
6318 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6319 else if (TARGET_64BIT)
6320 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6321 else
6322 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6323 }
6324 }
6325
6326 if (cfun->machine->force_align_arg_pointer)
6327 {
6328 emit_insn (gen_addsi3 (stack_pointer_rtx,
6329 cfun->machine->force_align_arg_pointer,
6330 GEN_INT (-4)));
6331 }
6332
6333 /* Sibcall epilogues don't want a return instruction. */
6334 if (style == 0)
6335 return;
6336
6337 if (current_function_pops_args && current_function_args_size)
6338 {
6339 rtx popc = GEN_INT (current_function_pops_args);
6340
6341 /* i386 can only pop 64K bytes. If asked to pop more, pop
6342 return address, do explicit add, and jump indirectly to the
6343 caller. */
6344
6345 if (current_function_pops_args >= 65536)
6346 {
6347 rtx ecx = gen_rtx_REG (SImode, 2);
6348
6349 /* There is no "pascal" calling convention in any 64bit ABI. */
6350 gcc_assert (!TARGET_64BIT);
6351
6352 emit_insn (gen_popsi1 (ecx));
6353 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6354 emit_jump_insn (gen_return_indirect_internal (ecx));
6355 }
6356 else
6357 emit_jump_insn (gen_return_pop_internal (popc));
6358 }
6359 else
6360 emit_jump_insn (gen_return_internal ());
6361 }
6362
6363 /* Reset from the function's potential modifications. */
6364
6365 static void
6366 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6367 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6368 {
6369 if (pic_offset_table_rtx)
6370 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6371 #if TARGET_MACHO
6372 /* Mach-O doesn't support labels at the end of objects, so if
6373 it looks like we might want one, insert a NOP. */
6374 {
6375 rtx insn = get_last_insn ();
6376 while (insn
6377 && NOTE_P (insn)
6378 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6379 insn = PREV_INSN (insn);
6380 if (insn
6381 && (LABEL_P (insn)
6382 || (NOTE_P (insn)
6383 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6384 fputs ("\tnop\n", file);
6385 }
6386 #endif
6387
6388 }
6389 \f
6390 /* Extract the parts of an RTL expression that is a valid memory address
6391 for an instruction. Return 0 if the structure of the address is
6392 grossly off. Return -1 if the address contains ASHIFT, so it is not
6393 strictly valid, but still used for computing length of lea instruction. */
6394
6395 int
6396 ix86_decompose_address (rtx addr, struct ix86_address *out)
6397 {
6398 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6399 rtx base_reg, index_reg;
6400 HOST_WIDE_INT scale = 1;
6401 rtx scale_rtx = NULL_RTX;
6402 int retval = 1;
6403 enum ix86_address_seg seg = SEG_DEFAULT;
6404
6405 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6406 base = addr;
6407 else if (GET_CODE (addr) == PLUS)
6408 {
6409 rtx addends[4], op;
6410 int n = 0, i;
6411
6412 op = addr;
6413 do
6414 {
6415 if (n >= 4)
6416 return 0;
6417 addends[n++] = XEXP (op, 1);
6418 op = XEXP (op, 0);
6419 }
6420 while (GET_CODE (op) == PLUS);
6421 if (n >= 4)
6422 return 0;
6423 addends[n] = op;
6424
6425 for (i = n; i >= 0; --i)
6426 {
6427 op = addends[i];
6428 switch (GET_CODE (op))
6429 {
6430 case MULT:
6431 if (index)
6432 return 0;
6433 index = XEXP (op, 0);
6434 scale_rtx = XEXP (op, 1);
6435 break;
6436
6437 case UNSPEC:
6438 if (XINT (op, 1) == UNSPEC_TP
6439 && TARGET_TLS_DIRECT_SEG_REFS
6440 && seg == SEG_DEFAULT)
6441 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6442 else
6443 return 0;
6444 break;
6445
6446 case REG:
6447 case SUBREG:
6448 if (!base)
6449 base = op;
6450 else if (!index)
6451 index = op;
6452 else
6453 return 0;
6454 break;
6455
6456 case CONST:
6457 case CONST_INT:
6458 case SYMBOL_REF:
6459 case LABEL_REF:
6460 if (disp)
6461 return 0;
6462 disp = op;
6463 break;
6464
6465 default:
6466 return 0;
6467 }
6468 }
6469 }
6470 else if (GET_CODE (addr) == MULT)
6471 {
6472 index = XEXP (addr, 0); /* index*scale */
6473 scale_rtx = XEXP (addr, 1);
6474 }
6475 else if (GET_CODE (addr) == ASHIFT)
6476 {
6477 rtx tmp;
6478
6479 /* We're called for lea too, which implements ashift on occasion. */
6480 index = XEXP (addr, 0);
6481 tmp = XEXP (addr, 1);
6482 if (!CONST_INT_P (tmp))
6483 return 0;
6484 scale = INTVAL (tmp);
6485 if ((unsigned HOST_WIDE_INT) scale > 3)
6486 return 0;
6487 scale = 1 << scale;
6488 retval = -1;
6489 }
6490 else
6491 disp = addr; /* displacement */
6492
6493 /* Extract the integral value of scale. */
6494 if (scale_rtx)
6495 {
6496 if (!CONST_INT_P (scale_rtx))
6497 return 0;
6498 scale = INTVAL (scale_rtx);
6499 }
6500
6501 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6502 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6503
6504 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6505 if (base_reg && index_reg && scale == 1
6506 && (index_reg == arg_pointer_rtx
6507 || index_reg == frame_pointer_rtx
6508 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6509 {
6510 rtx tmp;
6511 tmp = base, base = index, index = tmp;
6512 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6513 }
6514
6515 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6516 if ((base_reg == hard_frame_pointer_rtx
6517 || base_reg == frame_pointer_rtx
6518 || base_reg == arg_pointer_rtx) && !disp)
6519 disp = const0_rtx;
6520
6521 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6522 Avoid this by transforming to [%esi+0]. */
6523 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6524 && base_reg && !index_reg && !disp
6525 && REG_P (base_reg)
6526 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6527 disp = const0_rtx;
6528
6529 /* Special case: encode reg+reg instead of reg*2. */
6530 if (!base && index && scale && scale == 2)
6531 base = index, base_reg = index_reg, scale = 1;
6532
6533 /* Special case: scaling cannot be encoded without base or displacement. */
6534 if (!base && !disp && index && scale != 1)
6535 disp = const0_rtx;
6536
6537 out->base = base;
6538 out->index = index;
6539 out->disp = disp;
6540 out->scale = scale;
6541 out->seg = seg;
6542
6543 return retval;
6544 }
6545 \f
6546 /* Return cost of the memory address x.
6547 For i386, it is better to use a complex address than let gcc copy
6548 the address into a reg and make a new pseudo. But not if the address
6549 requires to two regs - that would mean more pseudos with longer
6550 lifetimes. */
6551 static int
6552 ix86_address_cost (rtx x)
6553 {
6554 struct ix86_address parts;
6555 int cost = 1;
6556 int ok = ix86_decompose_address (x, &parts);
6557
6558 gcc_assert (ok);
6559
6560 if (parts.base && GET_CODE (parts.base) == SUBREG)
6561 parts.base = SUBREG_REG (parts.base);
6562 if (parts.index && GET_CODE (parts.index) == SUBREG)
6563 parts.index = SUBREG_REG (parts.index);
6564
6565 /* More complex memory references are better. */
6566 if (parts.disp && parts.disp != const0_rtx)
6567 cost--;
6568 if (parts.seg != SEG_DEFAULT)
6569 cost--;
6570
6571 /* Attempt to minimize number of registers in the address. */
6572 if ((parts.base
6573 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6574 || (parts.index
6575 && (!REG_P (parts.index)
6576 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6577 cost++;
6578
6579 if (parts.base
6580 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6581 && parts.index
6582 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6583 && parts.base != parts.index)
6584 cost++;
6585
6586 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6587 since it's predecode logic can't detect the length of instructions
6588 and it degenerates to vector decoded. Increase cost of such
6589 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6590 to split such addresses or even refuse such addresses at all.
6591
6592 Following addressing modes are affected:
6593 [base+scale*index]
6594 [scale*index+disp]
6595 [base+index]
6596
6597 The first and last case may be avoidable by explicitly coding the zero in
6598 memory address, but I don't have AMD-K6 machine handy to check this
6599 theory. */
6600
6601 if (TARGET_K6
6602 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6603 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6604 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6605 cost += 10;
6606
6607 return cost;
6608 }
6609 \f
6610 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6611 this is used for to form addresses to local data when -fPIC is in
6612 use. */
6613
6614 static bool
6615 darwin_local_data_pic (rtx disp)
6616 {
6617 if (GET_CODE (disp) == MINUS)
6618 {
6619 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6620 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6621 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6622 {
6623 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6624 if (! strcmp (sym_name, "<pic base>"))
6625 return true;
6626 }
6627 }
6628
6629 return false;
6630 }
6631
6632 /* Determine if a given RTX is a valid constant. We already know this
6633 satisfies CONSTANT_P. */
6634
6635 bool
6636 legitimate_constant_p (rtx x)
6637 {
6638 switch (GET_CODE (x))
6639 {
6640 case CONST:
6641 x = XEXP (x, 0);
6642
6643 if (GET_CODE (x) == PLUS)
6644 {
6645 if (!CONST_INT_P (XEXP (x, 1)))
6646 return false;
6647 x = XEXP (x, 0);
6648 }
6649
6650 if (TARGET_MACHO && darwin_local_data_pic (x))
6651 return true;
6652
6653 /* Only some unspecs are valid as "constants". */
6654 if (GET_CODE (x) == UNSPEC)
6655 switch (XINT (x, 1))
6656 {
6657 case UNSPEC_GOT:
6658 case UNSPEC_GOTOFF:
6659 case UNSPEC_PLTOFF:
6660 return TARGET_64BIT;
6661 case UNSPEC_TPOFF:
6662 case UNSPEC_NTPOFF:
6663 x = XVECEXP (x, 0, 0);
6664 return (GET_CODE (x) == SYMBOL_REF
6665 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6666 case UNSPEC_DTPOFF:
6667 x = XVECEXP (x, 0, 0);
6668 return (GET_CODE (x) == SYMBOL_REF
6669 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6670 default:
6671 return false;
6672 }
6673
6674 /* We must have drilled down to a symbol. */
6675 if (GET_CODE (x) == LABEL_REF)
6676 return true;
6677 if (GET_CODE (x) != SYMBOL_REF)
6678 return false;
6679 /* FALLTHRU */
6680
6681 case SYMBOL_REF:
6682 /* TLS symbols are never valid. */
6683 if (SYMBOL_REF_TLS_MODEL (x))
6684 return false;
6685
6686 /* DLLIMPORT symbols are never valid. */
6687 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6688 && SYMBOL_REF_DLLIMPORT_P (x))
6689 return false;
6690 break;
6691
6692 case CONST_DOUBLE:
6693 if (GET_MODE (x) == TImode
6694 && x != CONST0_RTX (TImode)
6695 && !TARGET_64BIT)
6696 return false;
6697 break;
6698
6699 case CONST_VECTOR:
6700 if (x == CONST0_RTX (GET_MODE (x)))
6701 return true;
6702 return false;
6703
6704 default:
6705 break;
6706 }
6707
6708 /* Otherwise we handle everything else in the move patterns. */
6709 return true;
6710 }
6711
6712 /* Determine if it's legal to put X into the constant pool. This
6713 is not possible for the address of thread-local symbols, which
6714 is checked above. */
6715
6716 static bool
6717 ix86_cannot_force_const_mem (rtx x)
6718 {
6719 /* We can always put integral constants and vectors in memory. */
6720 switch (GET_CODE (x))
6721 {
6722 case CONST_INT:
6723 case CONST_DOUBLE:
6724 case CONST_VECTOR:
6725 return false;
6726
6727 default:
6728 break;
6729 }
6730 return !legitimate_constant_p (x);
6731 }
6732
6733 /* Determine if a given RTX is a valid constant address. */
6734
6735 bool
6736 constant_address_p (rtx x)
6737 {
6738 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6739 }
6740
6741 /* Nonzero if the constant value X is a legitimate general operand
6742 when generating PIC code. It is given that flag_pic is on and
6743 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6744
6745 bool
6746 legitimate_pic_operand_p (rtx x)
6747 {
6748 rtx inner;
6749
6750 switch (GET_CODE (x))
6751 {
6752 case CONST:
6753 inner = XEXP (x, 0);
6754 if (GET_CODE (inner) == PLUS
6755 && CONST_INT_P (XEXP (inner, 1)))
6756 inner = XEXP (inner, 0);
6757
6758 /* Only some unspecs are valid as "constants". */
6759 if (GET_CODE (inner) == UNSPEC)
6760 switch (XINT (inner, 1))
6761 {
6762 case UNSPEC_GOT:
6763 case UNSPEC_GOTOFF:
6764 case UNSPEC_PLTOFF:
6765 return TARGET_64BIT;
6766 case UNSPEC_TPOFF:
6767 x = XVECEXP (inner, 0, 0);
6768 return (GET_CODE (x) == SYMBOL_REF
6769 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6770 default:
6771 return false;
6772 }
6773 /* FALLTHRU */
6774
6775 case SYMBOL_REF:
6776 case LABEL_REF:
6777 return legitimate_pic_address_disp_p (x);
6778
6779 default:
6780 return true;
6781 }
6782 }
6783
6784 /* Determine if a given CONST RTX is a valid memory displacement
6785 in PIC mode. */
6786
6787 int
6788 legitimate_pic_address_disp_p (rtx disp)
6789 {
6790 bool saw_plus;
6791
6792 /* In 64bit mode we can allow direct addresses of symbols and labels
6793 when they are not dynamic symbols. */
6794 if (TARGET_64BIT)
6795 {
6796 rtx op0 = disp, op1;
6797
6798 switch (GET_CODE (disp))
6799 {
6800 case LABEL_REF:
6801 return true;
6802
6803 case CONST:
6804 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6805 break;
6806 op0 = XEXP (XEXP (disp, 0), 0);
6807 op1 = XEXP (XEXP (disp, 0), 1);
6808 if (!CONST_INT_P (op1)
6809 || INTVAL (op1) >= 16*1024*1024
6810 || INTVAL (op1) < -16*1024*1024)
6811 break;
6812 if (GET_CODE (op0) == LABEL_REF)
6813 return true;
6814 if (GET_CODE (op0) != SYMBOL_REF)
6815 break;
6816 /* FALLTHRU */
6817
6818 case SYMBOL_REF:
6819 /* TLS references should always be enclosed in UNSPEC. */
6820 if (SYMBOL_REF_TLS_MODEL (op0))
6821 return false;
6822 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6823 && ix86_cmodel != CM_LARGE_PIC)
6824 return true;
6825 break;
6826
6827 default:
6828 break;
6829 }
6830 }
6831 if (GET_CODE (disp) != CONST)
6832 return 0;
6833 disp = XEXP (disp, 0);
6834
6835 if (TARGET_64BIT)
6836 {
6837 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6838 of GOT tables. We should not need these anyway. */
6839 if (GET_CODE (disp) != UNSPEC
6840 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6841 && XINT (disp, 1) != UNSPEC_GOTOFF
6842 && XINT (disp, 1) != UNSPEC_PLTOFF))
6843 return 0;
6844
6845 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6846 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6847 return 0;
6848 return 1;
6849 }
6850
6851 saw_plus = false;
6852 if (GET_CODE (disp) == PLUS)
6853 {
6854 if (!CONST_INT_P (XEXP (disp, 1)))
6855 return 0;
6856 disp = XEXP (disp, 0);
6857 saw_plus = true;
6858 }
6859
6860 if (TARGET_MACHO && darwin_local_data_pic (disp))
6861 return 1;
6862
6863 if (GET_CODE (disp) != UNSPEC)
6864 return 0;
6865
6866 switch (XINT (disp, 1))
6867 {
6868 case UNSPEC_GOT:
6869 if (saw_plus)
6870 return false;
6871 /* We need to check for both symbols and labels because VxWorks loads
6872 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6873 details. */
6874 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6875 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6876 case UNSPEC_GOTOFF:
6877 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6878 While ABI specify also 32bit relocation but we don't produce it in
6879 small PIC model at all. */
6880 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6881 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6882 && !TARGET_64BIT)
6883 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6884 return false;
6885 case UNSPEC_GOTTPOFF:
6886 case UNSPEC_GOTNTPOFF:
6887 case UNSPEC_INDNTPOFF:
6888 if (saw_plus)
6889 return false;
6890 disp = XVECEXP (disp, 0, 0);
6891 return (GET_CODE (disp) == SYMBOL_REF
6892 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6893 case UNSPEC_NTPOFF:
6894 disp = XVECEXP (disp, 0, 0);
6895 return (GET_CODE (disp) == SYMBOL_REF
6896 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6897 case UNSPEC_DTPOFF:
6898 disp = XVECEXP (disp, 0, 0);
6899 return (GET_CODE (disp) == SYMBOL_REF
6900 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6901 }
6902
6903 return 0;
6904 }
6905
6906 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6907 memory address for an instruction. The MODE argument is the machine mode
6908 for the MEM expression that wants to use this address.
6909
6910 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6911 convert common non-canonical forms to canonical form so that they will
6912 be recognized. */
6913
6914 int
6915 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6916 rtx addr, int strict)
6917 {
6918 struct ix86_address parts;
6919 rtx base, index, disp;
6920 HOST_WIDE_INT scale;
6921 const char *reason = NULL;
6922 rtx reason_rtx = NULL_RTX;
6923
6924 if (ix86_decompose_address (addr, &parts) <= 0)
6925 {
6926 reason = "decomposition failed";
6927 goto report_error;
6928 }
6929
6930 base = parts.base;
6931 index = parts.index;
6932 disp = parts.disp;
6933 scale = parts.scale;
6934
6935 /* Validate base register.
6936
6937 Don't allow SUBREG's that span more than a word here. It can lead to spill
6938 failures when the base is one word out of a two word structure, which is
6939 represented internally as a DImode int. */
6940
6941 if (base)
6942 {
6943 rtx reg;
6944 reason_rtx = base;
6945
6946 if (REG_P (base))
6947 reg = base;
6948 else if (GET_CODE (base) == SUBREG
6949 && REG_P (SUBREG_REG (base))
6950 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6951 <= UNITS_PER_WORD)
6952 reg = SUBREG_REG (base);
6953 else
6954 {
6955 reason = "base is not a register";
6956 goto report_error;
6957 }
6958
6959 if (GET_MODE (base) != Pmode)
6960 {
6961 reason = "base is not in Pmode";
6962 goto report_error;
6963 }
6964
6965 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6966 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6967 {
6968 reason = "base is not valid";
6969 goto report_error;
6970 }
6971 }
6972
6973 /* Validate index register.
6974
6975 Don't allow SUBREG's that span more than a word here -- same as above. */
6976
6977 if (index)
6978 {
6979 rtx reg;
6980 reason_rtx = index;
6981
6982 if (REG_P (index))
6983 reg = index;
6984 else if (GET_CODE (index) == SUBREG
6985 && REG_P (SUBREG_REG (index))
6986 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6987 <= UNITS_PER_WORD)
6988 reg = SUBREG_REG (index);
6989 else
6990 {
6991 reason = "index is not a register";
6992 goto report_error;
6993 }
6994
6995 if (GET_MODE (index) != Pmode)
6996 {
6997 reason = "index is not in Pmode";
6998 goto report_error;
6999 }
7000
7001 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7002 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7003 {
7004 reason = "index is not valid";
7005 goto report_error;
7006 }
7007 }
7008
7009 /* Validate scale factor. */
7010 if (scale != 1)
7011 {
7012 reason_rtx = GEN_INT (scale);
7013 if (!index)
7014 {
7015 reason = "scale without index";
7016 goto report_error;
7017 }
7018
7019 if (scale != 2 && scale != 4 && scale != 8)
7020 {
7021 reason = "scale is not a valid multiplier";
7022 goto report_error;
7023 }
7024 }
7025
7026 /* Validate displacement. */
7027 if (disp)
7028 {
7029 reason_rtx = disp;
7030
7031 if (GET_CODE (disp) == CONST
7032 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7033 switch (XINT (XEXP (disp, 0), 1))
7034 {
7035 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7036 used. While ABI specify also 32bit relocations, we don't produce
7037 them at all and use IP relative instead. */
7038 case UNSPEC_GOT:
7039 case UNSPEC_GOTOFF:
7040 gcc_assert (flag_pic);
7041 if (!TARGET_64BIT)
7042 goto is_legitimate_pic;
7043 reason = "64bit address unspec";
7044 goto report_error;
7045
7046 case UNSPEC_GOTPCREL:
7047 gcc_assert (flag_pic);
7048 goto is_legitimate_pic;
7049
7050 case UNSPEC_GOTTPOFF:
7051 case UNSPEC_GOTNTPOFF:
7052 case UNSPEC_INDNTPOFF:
7053 case UNSPEC_NTPOFF:
7054 case UNSPEC_DTPOFF:
7055 break;
7056
7057 default:
7058 reason = "invalid address unspec";
7059 goto report_error;
7060 }
7061
7062 else if (SYMBOLIC_CONST (disp)
7063 && (flag_pic
7064 || (TARGET_MACHO
7065 #if TARGET_MACHO
7066 && MACHOPIC_INDIRECT
7067 && !machopic_operand_p (disp)
7068 #endif
7069 )))
7070 {
7071
7072 is_legitimate_pic:
7073 if (TARGET_64BIT && (index || base))
7074 {
7075 /* foo@dtpoff(%rX) is ok. */
7076 if (GET_CODE (disp) != CONST
7077 || GET_CODE (XEXP (disp, 0)) != PLUS
7078 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7079 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7080 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7081 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7082 {
7083 reason = "non-constant pic memory reference";
7084 goto report_error;
7085 }
7086 }
7087 else if (! legitimate_pic_address_disp_p (disp))
7088 {
7089 reason = "displacement is an invalid pic construct";
7090 goto report_error;
7091 }
7092
7093 /* This code used to verify that a symbolic pic displacement
7094 includes the pic_offset_table_rtx register.
7095
7096 While this is good idea, unfortunately these constructs may
7097 be created by "adds using lea" optimization for incorrect
7098 code like:
7099
7100 int a;
7101 int foo(int i)
7102 {
7103 return *(&a+i);
7104 }
7105
7106 This code is nonsensical, but results in addressing
7107 GOT table with pic_offset_table_rtx base. We can't
7108 just refuse it easily, since it gets matched by
7109 "addsi3" pattern, that later gets split to lea in the
7110 case output register differs from input. While this
7111 can be handled by separate addsi pattern for this case
7112 that never results in lea, this seems to be easier and
7113 correct fix for crash to disable this test. */
7114 }
7115 else if (GET_CODE (disp) != LABEL_REF
7116 && !CONST_INT_P (disp)
7117 && (GET_CODE (disp) != CONST
7118 || !legitimate_constant_p (disp))
7119 && (GET_CODE (disp) != SYMBOL_REF
7120 || !legitimate_constant_p (disp)))
7121 {
7122 reason = "displacement is not constant";
7123 goto report_error;
7124 }
7125 else if (TARGET_64BIT
7126 && !x86_64_immediate_operand (disp, VOIDmode))
7127 {
7128 reason = "displacement is out of range";
7129 goto report_error;
7130 }
7131 }
7132
7133 /* Everything looks valid. */
7134 return TRUE;
7135
7136 report_error:
7137 return FALSE;
7138 }
7139 \f
7140 /* Return a unique alias set for the GOT. */
7141
7142 static HOST_WIDE_INT
7143 ix86_GOT_alias_set (void)
7144 {
7145 static HOST_WIDE_INT set = -1;
7146 if (set == -1)
7147 set = new_alias_set ();
7148 return set;
7149 }
7150
7151 /* Return a legitimate reference for ORIG (an address) using the
7152 register REG. If REG is 0, a new pseudo is generated.
7153
7154 There are two types of references that must be handled:
7155
7156 1. Global data references must load the address from the GOT, via
7157 the PIC reg. An insn is emitted to do this load, and the reg is
7158 returned.
7159
7160 2. Static data references, constant pool addresses, and code labels
7161 compute the address as an offset from the GOT, whose base is in
7162 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7163 differentiate them from global data objects. The returned
7164 address is the PIC reg + an unspec constant.
7165
7166 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7167 reg also appears in the address. */
7168
7169 static rtx
7170 legitimize_pic_address (rtx orig, rtx reg)
7171 {
7172 rtx addr = orig;
7173 rtx new_rtx = orig;
7174 rtx base;
7175
7176 #if TARGET_MACHO
7177 if (TARGET_MACHO && !TARGET_64BIT)
7178 {
7179 if (reg == 0)
7180 reg = gen_reg_rtx (Pmode);
7181 /* Use the generic Mach-O PIC machinery. */
7182 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7183 }
7184 #endif
7185
7186 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7187 new_rtx = addr;
7188 else if (TARGET_64BIT
7189 && ix86_cmodel != CM_SMALL_PIC
7190 && gotoff_operand (addr, Pmode))
7191 {
7192 rtx tmpreg;
7193 /* This symbol may be referenced via a displacement from the PIC
7194 base address (@GOTOFF). */
7195
7196 if (reload_in_progress)
7197 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7198 if (GET_CODE (addr) == CONST)
7199 addr = XEXP (addr, 0);
7200 if (GET_CODE (addr) == PLUS)
7201 {
7202 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7203 UNSPEC_GOTOFF);
7204 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7205 }
7206 else
7207 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7208 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7209 if (!reg)
7210 tmpreg = gen_reg_rtx (Pmode);
7211 else
7212 tmpreg = reg;
7213 emit_move_insn (tmpreg, new_rtx);
7214
7215 if (reg != 0)
7216 {
7217 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7218 tmpreg, 1, OPTAB_DIRECT);
7219 new_rtx = reg;
7220 }
7221 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7222 }
7223 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7224 {
7225 /* This symbol may be referenced via a displacement from the PIC
7226 base address (@GOTOFF). */
7227
7228 if (reload_in_progress)
7229 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7230 if (GET_CODE (addr) == CONST)
7231 addr = XEXP (addr, 0);
7232 if (GET_CODE (addr) == PLUS)
7233 {
7234 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7235 UNSPEC_GOTOFF);
7236 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7237 }
7238 else
7239 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7240 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7241 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7242
7243 if (reg != 0)
7244 {
7245 emit_move_insn (reg, new_rtx);
7246 new_rtx = reg;
7247 }
7248 }
7249 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7250 /* We can't use @GOTOFF for text labels on VxWorks;
7251 see gotoff_operand. */
7252 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7253 {
7254 /* Given that we've already handled dllimport variables separately
7255 in legitimize_address, and all other variables should satisfy
7256 legitimate_pic_address_disp_p, we should never arrive here. */
7257 gcc_assert (!TARGET_64BIT_MS_ABI);
7258
7259 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7260 {
7261 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7262 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7263 new_rtx = gen_const_mem (Pmode, new_rtx);
7264 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7265
7266 if (reg == 0)
7267 reg = gen_reg_rtx (Pmode);
7268 /* Use directly gen_movsi, otherwise the address is loaded
7269 into register for CSE. We don't want to CSE this addresses,
7270 instead we CSE addresses from the GOT table, so skip this. */
7271 emit_insn (gen_movsi (reg, new_rtx));
7272 new_rtx = reg;
7273 }
7274 else
7275 {
7276 /* This symbol must be referenced via a load from the
7277 Global Offset Table (@GOT). */
7278
7279 if (reload_in_progress)
7280 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7281 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7282 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7283 if (TARGET_64BIT)
7284 new_rtx = force_reg (Pmode, new_rtx);
7285 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7286 new_rtx = gen_const_mem (Pmode, new_rtx);
7287 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7288
7289 if (reg == 0)
7290 reg = gen_reg_rtx (Pmode);
7291 emit_move_insn (reg, new_rtx);
7292 new_rtx = reg;
7293 }
7294 }
7295 else
7296 {
7297 if (CONST_INT_P (addr)
7298 && !x86_64_immediate_operand (addr, VOIDmode))
7299 {
7300 if (reg)
7301 {
7302 emit_move_insn (reg, addr);
7303 new_rtx = reg;
7304 }
7305 else
7306 new_rtx = force_reg (Pmode, addr);
7307 }
7308 else if (GET_CODE (addr) == CONST)
7309 {
7310 addr = XEXP (addr, 0);
7311
7312 /* We must match stuff we generate before. Assume the only
7313 unspecs that can get here are ours. Not that we could do
7314 anything with them anyway.... */
7315 if (GET_CODE (addr) == UNSPEC
7316 || (GET_CODE (addr) == PLUS
7317 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7318 return orig;
7319 gcc_assert (GET_CODE (addr) == PLUS);
7320 }
7321 if (GET_CODE (addr) == PLUS)
7322 {
7323 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7324
7325 /* Check first to see if this is a constant offset from a @GOTOFF
7326 symbol reference. */
7327 if (gotoff_operand (op0, Pmode)
7328 && CONST_INT_P (op1))
7329 {
7330 if (!TARGET_64BIT)
7331 {
7332 if (reload_in_progress)
7333 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7334 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7335 UNSPEC_GOTOFF);
7336 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7337 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7338 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7339
7340 if (reg != 0)
7341 {
7342 emit_move_insn (reg, new_rtx);
7343 new_rtx = reg;
7344 }
7345 }
7346 else
7347 {
7348 if (INTVAL (op1) < -16*1024*1024
7349 || INTVAL (op1) >= 16*1024*1024)
7350 {
7351 if (!x86_64_immediate_operand (op1, Pmode))
7352 op1 = force_reg (Pmode, op1);
7353 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7354 }
7355 }
7356 }
7357 else
7358 {
7359 base = legitimize_pic_address (XEXP (addr, 0), reg);
7360 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7361 base == reg ? NULL_RTX : reg);
7362
7363 if (CONST_INT_P (new_rtx))
7364 new_rtx = plus_constant (base, INTVAL (new_rtx));
7365 else
7366 {
7367 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7368 {
7369 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7370 new_rtx = XEXP (new_rtx, 1);
7371 }
7372 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7373 }
7374 }
7375 }
7376 }
7377 return new_rtx;
7378 }
7379 \f
7380 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7381
7382 static rtx
7383 get_thread_pointer (int to_reg)
7384 {
7385 rtx tp, reg, insn;
7386
7387 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7388 if (!to_reg)
7389 return tp;
7390
7391 reg = gen_reg_rtx (Pmode);
7392 insn = gen_rtx_SET (VOIDmode, reg, tp);
7393 insn = emit_insn (insn);
7394
7395 return reg;
7396 }
7397
7398 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7399 false if we expect this to be used for a memory address and true if
7400 we expect to load the address into a register. */
7401
7402 static rtx
7403 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7404 {
7405 rtx dest, base, off, pic, tp;
7406 int type;
7407
7408 switch (model)
7409 {
7410 case TLS_MODEL_GLOBAL_DYNAMIC:
7411 dest = gen_reg_rtx (Pmode);
7412 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7413
7414 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7415 {
7416 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7417
7418 start_sequence ();
7419 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7420 insns = get_insns ();
7421 end_sequence ();
7422
7423 CONST_OR_PURE_CALL_P (insns) = 1;
7424 emit_libcall_block (insns, dest, rax, x);
7425 }
7426 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7427 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7428 else
7429 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7430
7431 if (TARGET_GNU2_TLS)
7432 {
7433 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7434
7435 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7436 }
7437 break;
7438
7439 case TLS_MODEL_LOCAL_DYNAMIC:
7440 base = gen_reg_rtx (Pmode);
7441 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7442
7443 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7444 {
7445 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7446
7447 start_sequence ();
7448 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7449 insns = get_insns ();
7450 end_sequence ();
7451
7452 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7453 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7454 CONST_OR_PURE_CALL_P (insns) = 1;
7455 emit_libcall_block (insns, base, rax, note);
7456 }
7457 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7458 emit_insn (gen_tls_local_dynamic_base_64 (base));
7459 else
7460 emit_insn (gen_tls_local_dynamic_base_32 (base));
7461
7462 if (TARGET_GNU2_TLS)
7463 {
7464 rtx x = ix86_tls_module_base ();
7465
7466 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7467 gen_rtx_MINUS (Pmode, x, tp));
7468 }
7469
7470 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7471 off = gen_rtx_CONST (Pmode, off);
7472
7473 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7474
7475 if (TARGET_GNU2_TLS)
7476 {
7477 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7478
7479 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7480 }
7481
7482 break;
7483
7484 case TLS_MODEL_INITIAL_EXEC:
7485 if (TARGET_64BIT)
7486 {
7487 pic = NULL;
7488 type = UNSPEC_GOTNTPOFF;
7489 }
7490 else if (flag_pic)
7491 {
7492 if (reload_in_progress)
7493 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7494 pic = pic_offset_table_rtx;
7495 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7496 }
7497 else if (!TARGET_ANY_GNU_TLS)
7498 {
7499 pic = gen_reg_rtx (Pmode);
7500 emit_insn (gen_set_got (pic));
7501 type = UNSPEC_GOTTPOFF;
7502 }
7503 else
7504 {
7505 pic = NULL;
7506 type = UNSPEC_INDNTPOFF;
7507 }
7508
7509 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7510 off = gen_rtx_CONST (Pmode, off);
7511 if (pic)
7512 off = gen_rtx_PLUS (Pmode, pic, off);
7513 off = gen_const_mem (Pmode, off);
7514 set_mem_alias_set (off, ix86_GOT_alias_set ());
7515
7516 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7517 {
7518 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7519 off = force_reg (Pmode, off);
7520 return gen_rtx_PLUS (Pmode, base, off);
7521 }
7522 else
7523 {
7524 base = get_thread_pointer (true);
7525 dest = gen_reg_rtx (Pmode);
7526 emit_insn (gen_subsi3 (dest, base, off));
7527 }
7528 break;
7529
7530 case TLS_MODEL_LOCAL_EXEC:
7531 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7532 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7533 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7534 off = gen_rtx_CONST (Pmode, off);
7535
7536 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7537 {
7538 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7539 return gen_rtx_PLUS (Pmode, base, off);
7540 }
7541 else
7542 {
7543 base = get_thread_pointer (true);
7544 dest = gen_reg_rtx (Pmode);
7545 emit_insn (gen_subsi3 (dest, base, off));
7546 }
7547 break;
7548
7549 default:
7550 gcc_unreachable ();
7551 }
7552
7553 return dest;
7554 }
7555
7556 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7557 to symbol DECL. */
7558
7559 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7560 htab_t dllimport_map;
7561
7562 static tree
7563 get_dllimport_decl (tree decl)
7564 {
7565 struct tree_map *h, in;
7566 void **loc;
7567 const char *name;
7568 const char *prefix;
7569 size_t namelen, prefixlen;
7570 char *imp_name;
7571 tree to;
7572 rtx rtl;
7573
7574 if (!dllimport_map)
7575 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7576
7577 in.hash = htab_hash_pointer (decl);
7578 in.base.from = decl;
7579 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7580 h = (struct tree_map *) *loc;
7581 if (h)
7582 return h->to;
7583
7584 *loc = h = GGC_NEW (struct tree_map);
7585 h->hash = in.hash;
7586 h->base.from = decl;
7587 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7588 DECL_ARTIFICIAL (to) = 1;
7589 DECL_IGNORED_P (to) = 1;
7590 DECL_EXTERNAL (to) = 1;
7591 TREE_READONLY (to) = 1;
7592
7593 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7594 name = targetm.strip_name_encoding (name);
7595 if (name[0] == FASTCALL_PREFIX)
7596 {
7597 name++;
7598 prefix = "*__imp_";
7599 }
7600 else
7601 prefix = "*__imp__";
7602
7603 namelen = strlen (name);
7604 prefixlen = strlen (prefix);
7605 imp_name = (char *) alloca (namelen + prefixlen + 1);
7606 memcpy (imp_name, prefix, prefixlen);
7607 memcpy (imp_name + prefixlen, name, namelen + 1);
7608
7609 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7610 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7611 SET_SYMBOL_REF_DECL (rtl, to);
7612 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7613
7614 rtl = gen_const_mem (Pmode, rtl);
7615 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7616
7617 SET_DECL_RTL (to, rtl);
7618
7619 return to;
7620 }
7621
7622 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7623 true if we require the result be a register. */
7624
7625 static rtx
7626 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7627 {
7628 tree imp_decl;
7629 rtx x;
7630
7631 gcc_assert (SYMBOL_REF_DECL (symbol));
7632 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7633
7634 x = DECL_RTL (imp_decl);
7635 if (want_reg)
7636 x = force_reg (Pmode, x);
7637 return x;
7638 }
7639
7640 /* Try machine-dependent ways of modifying an illegitimate address
7641 to be legitimate. If we find one, return the new, valid address.
7642 This macro is used in only one place: `memory_address' in explow.c.
7643
7644 OLDX is the address as it was before break_out_memory_refs was called.
7645 In some cases it is useful to look at this to decide what needs to be done.
7646
7647 MODE and WIN are passed so that this macro can use
7648 GO_IF_LEGITIMATE_ADDRESS.
7649
7650 It is always safe for this macro to do nothing. It exists to recognize
7651 opportunities to optimize the output.
7652
7653 For the 80386, we handle X+REG by loading X into a register R and
7654 using R+REG. R will go in a general reg and indexing will be used.
7655 However, if REG is a broken-out memory address or multiplication,
7656 nothing needs to be done because REG can certainly go in a general reg.
7657
7658 When -fpic is used, special handling is needed for symbolic references.
7659 See comments by legitimize_pic_address in i386.c for details. */
7660
7661 rtx
7662 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7663 {
7664 int changed = 0;
7665 unsigned log;
7666
7667 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7668 if (log)
7669 return legitimize_tls_address (x, (enum tls_model) log, false);
7670 if (GET_CODE (x) == CONST
7671 && GET_CODE (XEXP (x, 0)) == PLUS
7672 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7673 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7674 {
7675 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7676 (enum tls_model) log, false);
7677 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7678 }
7679
7680 if (flag_pic && SYMBOLIC_CONST (x))
7681 return legitimize_pic_address (x, 0);
7682
7683 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7684 {
7685 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7686 return legitimize_dllimport_symbol (x, true);
7687 if (GET_CODE (x) == CONST
7688 && GET_CODE (XEXP (x, 0)) == PLUS
7689 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7690 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7691 {
7692 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7693 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7694 }
7695 }
7696
7697 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7698 if (GET_CODE (x) == ASHIFT
7699 && CONST_INT_P (XEXP (x, 1))
7700 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7701 {
7702 changed = 1;
7703 log = INTVAL (XEXP (x, 1));
7704 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7705 GEN_INT (1 << log));
7706 }
7707
7708 if (GET_CODE (x) == PLUS)
7709 {
7710 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7711
7712 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7713 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7714 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7715 {
7716 changed = 1;
7717 log = INTVAL (XEXP (XEXP (x, 0), 1));
7718 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7719 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7720 GEN_INT (1 << log));
7721 }
7722
7723 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7724 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7725 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7726 {
7727 changed = 1;
7728 log = INTVAL (XEXP (XEXP (x, 1), 1));
7729 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7730 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7731 GEN_INT (1 << log));
7732 }
7733
7734 /* Put multiply first if it isn't already. */
7735 if (GET_CODE (XEXP (x, 1)) == MULT)
7736 {
7737 rtx tmp = XEXP (x, 0);
7738 XEXP (x, 0) = XEXP (x, 1);
7739 XEXP (x, 1) = tmp;
7740 changed = 1;
7741 }
7742
7743 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7744 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7745 created by virtual register instantiation, register elimination, and
7746 similar optimizations. */
7747 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7748 {
7749 changed = 1;
7750 x = gen_rtx_PLUS (Pmode,
7751 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7752 XEXP (XEXP (x, 1), 0)),
7753 XEXP (XEXP (x, 1), 1));
7754 }
7755
7756 /* Canonicalize
7757 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7758 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7759 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7760 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7761 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7762 && CONSTANT_P (XEXP (x, 1)))
7763 {
7764 rtx constant;
7765 rtx other = NULL_RTX;
7766
7767 if (CONST_INT_P (XEXP (x, 1)))
7768 {
7769 constant = XEXP (x, 1);
7770 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7771 }
7772 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7773 {
7774 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7775 other = XEXP (x, 1);
7776 }
7777 else
7778 constant = 0;
7779
7780 if (constant)
7781 {
7782 changed = 1;
7783 x = gen_rtx_PLUS (Pmode,
7784 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7785 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7786 plus_constant (other, INTVAL (constant)));
7787 }
7788 }
7789
7790 if (changed && legitimate_address_p (mode, x, FALSE))
7791 return x;
7792
7793 if (GET_CODE (XEXP (x, 0)) == MULT)
7794 {
7795 changed = 1;
7796 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7797 }
7798
7799 if (GET_CODE (XEXP (x, 1)) == MULT)
7800 {
7801 changed = 1;
7802 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7803 }
7804
7805 if (changed
7806 && REG_P (XEXP (x, 1))
7807 && REG_P (XEXP (x, 0)))
7808 return x;
7809
7810 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7811 {
7812 changed = 1;
7813 x = legitimize_pic_address (x, 0);
7814 }
7815
7816 if (changed && legitimate_address_p (mode, x, FALSE))
7817 return x;
7818
7819 if (REG_P (XEXP (x, 0)))
7820 {
7821 rtx temp = gen_reg_rtx (Pmode);
7822 rtx val = force_operand (XEXP (x, 1), temp);
7823 if (val != temp)
7824 emit_move_insn (temp, val);
7825
7826 XEXP (x, 1) = temp;
7827 return x;
7828 }
7829
7830 else if (REG_P (XEXP (x, 1)))
7831 {
7832 rtx temp = gen_reg_rtx (Pmode);
7833 rtx val = force_operand (XEXP (x, 0), temp);
7834 if (val != temp)
7835 emit_move_insn (temp, val);
7836
7837 XEXP (x, 0) = temp;
7838 return x;
7839 }
7840 }
7841
7842 return x;
7843 }
7844 \f
7845 /* Print an integer constant expression in assembler syntax. Addition
7846 and subtraction are the only arithmetic that may appear in these
7847 expressions. FILE is the stdio stream to write to, X is the rtx, and
7848 CODE is the operand print code from the output string. */
7849
7850 static void
7851 output_pic_addr_const (FILE *file, rtx x, int code)
7852 {
7853 char buf[256];
7854
7855 switch (GET_CODE (x))
7856 {
7857 case PC:
7858 gcc_assert (flag_pic);
7859 putc ('.', file);
7860 break;
7861
7862 case SYMBOL_REF:
7863 if (! TARGET_MACHO || TARGET_64BIT)
7864 output_addr_const (file, x);
7865 else
7866 {
7867 const char *name = XSTR (x, 0);
7868
7869 /* Mark the decl as referenced so that cgraph will
7870 output the function. */
7871 if (SYMBOL_REF_DECL (x))
7872 mark_decl_referenced (SYMBOL_REF_DECL (x));
7873
7874 #if TARGET_MACHO
7875 if (MACHOPIC_INDIRECT
7876 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7877 name = machopic_indirection_name (x, /*stub_p=*/true);
7878 #endif
7879 assemble_name (file, name);
7880 }
7881 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7882 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7883 fputs ("@PLT", file);
7884 break;
7885
7886 case LABEL_REF:
7887 x = XEXP (x, 0);
7888 /* FALLTHRU */
7889 case CODE_LABEL:
7890 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7891 assemble_name (asm_out_file, buf);
7892 break;
7893
7894 case CONST_INT:
7895 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7896 break;
7897
7898 case CONST:
7899 /* This used to output parentheses around the expression,
7900 but that does not work on the 386 (either ATT or BSD assembler). */
7901 output_pic_addr_const (file, XEXP (x, 0), code);
7902 break;
7903
7904 case CONST_DOUBLE:
7905 if (GET_MODE (x) == VOIDmode)
7906 {
7907 /* We can use %d if the number is <32 bits and positive. */
7908 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7909 fprintf (file, "0x%lx%08lx",
7910 (unsigned long) CONST_DOUBLE_HIGH (x),
7911 (unsigned long) CONST_DOUBLE_LOW (x));
7912 else
7913 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7914 }
7915 else
7916 /* We can't handle floating point constants;
7917 PRINT_OPERAND must handle them. */
7918 output_operand_lossage ("floating constant misused");
7919 break;
7920
7921 case PLUS:
7922 /* Some assemblers need integer constants to appear first. */
7923 if (CONST_INT_P (XEXP (x, 0)))
7924 {
7925 output_pic_addr_const (file, XEXP (x, 0), code);
7926 putc ('+', file);
7927 output_pic_addr_const (file, XEXP (x, 1), code);
7928 }
7929 else
7930 {
7931 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7932 output_pic_addr_const (file, XEXP (x, 1), code);
7933 putc ('+', file);
7934 output_pic_addr_const (file, XEXP (x, 0), code);
7935 }
7936 break;
7937
7938 case MINUS:
7939 if (!TARGET_MACHO)
7940 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7941 output_pic_addr_const (file, XEXP (x, 0), code);
7942 putc ('-', file);
7943 output_pic_addr_const (file, XEXP (x, 1), code);
7944 if (!TARGET_MACHO)
7945 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7946 break;
7947
7948 case UNSPEC:
7949 gcc_assert (XVECLEN (x, 0) == 1);
7950 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7951 switch (XINT (x, 1))
7952 {
7953 case UNSPEC_GOT:
7954 fputs ("@GOT", file);
7955 break;
7956 case UNSPEC_GOTOFF:
7957 fputs ("@GOTOFF", file);
7958 break;
7959 case UNSPEC_PLTOFF:
7960 fputs ("@PLTOFF", file);
7961 break;
7962 case UNSPEC_GOTPCREL:
7963 fputs ("@GOTPCREL(%rip)", file);
7964 break;
7965 case UNSPEC_GOTTPOFF:
7966 /* FIXME: This might be @TPOFF in Sun ld too. */
7967 fputs ("@GOTTPOFF", file);
7968 break;
7969 case UNSPEC_TPOFF:
7970 fputs ("@TPOFF", file);
7971 break;
7972 case UNSPEC_NTPOFF:
7973 if (TARGET_64BIT)
7974 fputs ("@TPOFF", file);
7975 else
7976 fputs ("@NTPOFF", file);
7977 break;
7978 case UNSPEC_DTPOFF:
7979 fputs ("@DTPOFF", file);
7980 break;
7981 case UNSPEC_GOTNTPOFF:
7982 if (TARGET_64BIT)
7983 fputs ("@GOTTPOFF(%rip)", file);
7984 else
7985 fputs ("@GOTNTPOFF", file);
7986 break;
7987 case UNSPEC_INDNTPOFF:
7988 fputs ("@INDNTPOFF", file);
7989 break;
7990 default:
7991 output_operand_lossage ("invalid UNSPEC as operand");
7992 break;
7993 }
7994 break;
7995
7996 default:
7997 output_operand_lossage ("invalid expression as operand");
7998 }
7999 }
8000
8001 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8002 We need to emit DTP-relative relocations. */
8003
8004 static void ATTRIBUTE_UNUSED
8005 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8006 {
8007 fputs (ASM_LONG, file);
8008 output_addr_const (file, x);
8009 fputs ("@DTPOFF", file);
8010 switch (size)
8011 {
8012 case 4:
8013 break;
8014 case 8:
8015 fputs (", 0", file);
8016 break;
8017 default:
8018 gcc_unreachable ();
8019 }
8020 }
8021
8022 /* In the name of slightly smaller debug output, and to cater to
8023 general assembler lossage, recognize PIC+GOTOFF and turn it back
8024 into a direct symbol reference.
8025
8026 On Darwin, this is necessary to avoid a crash, because Darwin
8027 has a different PIC label for each routine but the DWARF debugging
8028 information is not associated with any particular routine, so it's
8029 necessary to remove references to the PIC label from RTL stored by
8030 the DWARF output code. */
8031
8032 static rtx
8033 ix86_delegitimize_address (rtx orig_x)
8034 {
8035 rtx x = orig_x;
8036 /* reg_addend is NULL or a multiple of some register. */
8037 rtx reg_addend = NULL_RTX;
8038 /* const_addend is NULL or a const_int. */
8039 rtx const_addend = NULL_RTX;
8040 /* This is the result, or NULL. */
8041 rtx result = NULL_RTX;
8042
8043 if (MEM_P (x))
8044 x = XEXP (x, 0);
8045
8046 if (TARGET_64BIT)
8047 {
8048 if (GET_CODE (x) != CONST
8049 || GET_CODE (XEXP (x, 0)) != UNSPEC
8050 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8051 || !MEM_P (orig_x))
8052 return orig_x;
8053 return XVECEXP (XEXP (x, 0), 0, 0);
8054 }
8055
8056 if (GET_CODE (x) != PLUS
8057 || GET_CODE (XEXP (x, 1)) != CONST)
8058 return orig_x;
8059
8060 if (REG_P (XEXP (x, 0))
8061 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8062 /* %ebx + GOT/GOTOFF */
8063 ;
8064 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8065 {
8066 /* %ebx + %reg * scale + GOT/GOTOFF */
8067 reg_addend = XEXP (x, 0);
8068 if (REG_P (XEXP (reg_addend, 0))
8069 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8070 reg_addend = XEXP (reg_addend, 1);
8071 else if (REG_P (XEXP (reg_addend, 1))
8072 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8073 reg_addend = XEXP (reg_addend, 0);
8074 else
8075 return orig_x;
8076 if (!REG_P (reg_addend)
8077 && GET_CODE (reg_addend) != MULT
8078 && GET_CODE (reg_addend) != ASHIFT)
8079 return orig_x;
8080 }
8081 else
8082 return orig_x;
8083
8084 x = XEXP (XEXP (x, 1), 0);
8085 if (GET_CODE (x) == PLUS
8086 && CONST_INT_P (XEXP (x, 1)))
8087 {
8088 const_addend = XEXP (x, 1);
8089 x = XEXP (x, 0);
8090 }
8091
8092 if (GET_CODE (x) == UNSPEC
8093 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8094 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8095 result = XVECEXP (x, 0, 0);
8096
8097 if (TARGET_MACHO && darwin_local_data_pic (x)
8098 && !MEM_P (orig_x))
8099 result = XEXP (x, 0);
8100
8101 if (! result)
8102 return orig_x;
8103
8104 if (const_addend)
8105 result = gen_rtx_PLUS (Pmode, result, const_addend);
8106 if (reg_addend)
8107 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8108 return result;
8109 }
8110
8111 /* If X is a machine specific address (i.e. a symbol or label being
8112 referenced as a displacement from the GOT implemented using an
8113 UNSPEC), then return the base term. Otherwise return X. */
8114
8115 rtx
8116 ix86_find_base_term (rtx x)
8117 {
8118 rtx term;
8119
8120 if (TARGET_64BIT)
8121 {
8122 if (GET_CODE (x) != CONST)
8123 return x;
8124 term = XEXP (x, 0);
8125 if (GET_CODE (term) == PLUS
8126 && (CONST_INT_P (XEXP (term, 1))
8127 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8128 term = XEXP (term, 0);
8129 if (GET_CODE (term) != UNSPEC
8130 || XINT (term, 1) != UNSPEC_GOTPCREL)
8131 return x;
8132
8133 term = XVECEXP (term, 0, 0);
8134
8135 if (GET_CODE (term) != SYMBOL_REF
8136 && GET_CODE (term) != LABEL_REF)
8137 return x;
8138
8139 return term;
8140 }
8141
8142 term = ix86_delegitimize_address (x);
8143
8144 if (GET_CODE (term) != SYMBOL_REF
8145 && GET_CODE (term) != LABEL_REF)
8146 return x;
8147
8148 return term;
8149 }
8150 \f
8151 static void
8152 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8153 int fp, FILE *file)
8154 {
8155 const char *suffix;
8156
8157 if (mode == CCFPmode || mode == CCFPUmode)
8158 {
8159 enum rtx_code second_code, bypass_code;
8160 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8161 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8162 code = ix86_fp_compare_code_to_integer (code);
8163 mode = CCmode;
8164 }
8165 if (reverse)
8166 code = reverse_condition (code);
8167
8168 switch (code)
8169 {
8170 case EQ:
8171 switch (mode)
8172 {
8173 case CCAmode:
8174 suffix = "a";
8175 break;
8176
8177 case CCCmode:
8178 suffix = "c";
8179 break;
8180
8181 case CCOmode:
8182 suffix = "o";
8183 break;
8184
8185 case CCSmode:
8186 suffix = "s";
8187 break;
8188
8189 default:
8190 suffix = "e";
8191 }
8192 break;
8193 case NE:
8194 switch (mode)
8195 {
8196 case CCAmode:
8197 suffix = "na";
8198 break;
8199
8200 case CCCmode:
8201 suffix = "nc";
8202 break;
8203
8204 case CCOmode:
8205 suffix = "no";
8206 break;
8207
8208 case CCSmode:
8209 suffix = "ns";
8210 break;
8211
8212 default:
8213 suffix = "ne";
8214 }
8215 break;
8216 case GT:
8217 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8218 suffix = "g";
8219 break;
8220 case GTU:
8221 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8222 Those same assemblers have the same but opposite lossage on cmov. */
8223 gcc_assert (mode == CCmode);
8224 suffix = fp ? "nbe" : "a";
8225 break;
8226 case LT:
8227 switch (mode)
8228 {
8229 case CCNOmode:
8230 case CCGOCmode:
8231 suffix = "s";
8232 break;
8233
8234 case CCmode:
8235 case CCGCmode:
8236 suffix = "l";
8237 break;
8238
8239 default:
8240 gcc_unreachable ();
8241 }
8242 break;
8243 case LTU:
8244 gcc_assert (mode == CCmode);
8245 suffix = "b";
8246 break;
8247 case GE:
8248 switch (mode)
8249 {
8250 case CCNOmode:
8251 case CCGOCmode:
8252 suffix = "ns";
8253 break;
8254
8255 case CCmode:
8256 case CCGCmode:
8257 suffix = "ge";
8258 break;
8259
8260 default:
8261 gcc_unreachable ();
8262 }
8263 break;
8264 case GEU:
8265 /* ??? As above. */
8266 gcc_assert (mode == CCmode);
8267 suffix = fp ? "nb" : "ae";
8268 break;
8269 case LE:
8270 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8271 suffix = "le";
8272 break;
8273 case LEU:
8274 gcc_assert (mode == CCmode);
8275 suffix = "be";
8276 break;
8277 case UNORDERED:
8278 suffix = fp ? "u" : "p";
8279 break;
8280 case ORDERED:
8281 suffix = fp ? "nu" : "np";
8282 break;
8283 default:
8284 gcc_unreachable ();
8285 }
8286 fputs (suffix, file);
8287 }
8288
8289 /* Print the name of register X to FILE based on its machine mode and number.
8290 If CODE is 'w', pretend the mode is HImode.
8291 If CODE is 'b', pretend the mode is QImode.
8292 If CODE is 'k', pretend the mode is SImode.
8293 If CODE is 'q', pretend the mode is DImode.
8294 If CODE is 'h', pretend the reg is the 'high' byte register.
8295 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8296
8297 void
8298 print_reg (rtx x, int code, FILE *file)
8299 {
8300 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8301 && REGNO (x) != FRAME_POINTER_REGNUM
8302 && REGNO (x) != FLAGS_REG
8303 && REGNO (x) != FPSR_REG
8304 && REGNO (x) != FPCR_REG);
8305
8306 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8307 putc ('%', file);
8308
8309 if (code == 'w' || MMX_REG_P (x))
8310 code = 2;
8311 else if (code == 'b')
8312 code = 1;
8313 else if (code == 'k')
8314 code = 4;
8315 else if (code == 'q')
8316 code = 8;
8317 else if (code == 'y')
8318 code = 3;
8319 else if (code == 'h')
8320 code = 0;
8321 else
8322 code = GET_MODE_SIZE (GET_MODE (x));
8323
8324 /* Irritatingly, AMD extended registers use different naming convention
8325 from the normal registers. */
8326 if (REX_INT_REG_P (x))
8327 {
8328 gcc_assert (TARGET_64BIT);
8329 switch (code)
8330 {
8331 case 0:
8332 error ("extended registers have no high halves");
8333 break;
8334 case 1:
8335 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8336 break;
8337 case 2:
8338 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8339 break;
8340 case 4:
8341 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8342 break;
8343 case 8:
8344 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8345 break;
8346 default:
8347 error ("unsupported operand size for extended register");
8348 break;
8349 }
8350 return;
8351 }
8352 switch (code)
8353 {
8354 case 3:
8355 if (STACK_TOP_P (x))
8356 {
8357 fputs ("st(0)", file);
8358 break;
8359 }
8360 /* FALLTHRU */
8361 case 8:
8362 case 4:
8363 case 12:
8364 if (! ANY_FP_REG_P (x))
8365 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8366 /* FALLTHRU */
8367 case 16:
8368 case 2:
8369 normal:
8370 fputs (hi_reg_name[REGNO (x)], file);
8371 break;
8372 case 1:
8373 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8374 goto normal;
8375 fputs (qi_reg_name[REGNO (x)], file);
8376 break;
8377 case 0:
8378 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8379 goto normal;
8380 fputs (qi_high_reg_name[REGNO (x)], file);
8381 break;
8382 default:
8383 gcc_unreachable ();
8384 }
8385 }
8386
8387 /* Locate some local-dynamic symbol still in use by this function
8388 so that we can print its name in some tls_local_dynamic_base
8389 pattern. */
8390
8391 static int
8392 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8393 {
8394 rtx x = *px;
8395
8396 if (GET_CODE (x) == SYMBOL_REF
8397 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8398 {
8399 cfun->machine->some_ld_name = XSTR (x, 0);
8400 return 1;
8401 }
8402
8403 return 0;
8404 }
8405
8406 static const char *
8407 get_some_local_dynamic_name (void)
8408 {
8409 rtx insn;
8410
8411 if (cfun->machine->some_ld_name)
8412 return cfun->machine->some_ld_name;
8413
8414 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8415 if (INSN_P (insn)
8416 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8417 return cfun->machine->some_ld_name;
8418
8419 gcc_unreachable ();
8420 }
8421
8422 /* Meaning of CODE:
8423 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8424 C -- print opcode suffix for set/cmov insn.
8425 c -- like C, but print reversed condition
8426 F,f -- likewise, but for floating-point.
8427 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8428 otherwise nothing
8429 R -- print the prefix for register names.
8430 z -- print the opcode suffix for the size of the current operand.
8431 * -- print a star (in certain assembler syntax)
8432 A -- print an absolute memory reference.
8433 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8434 s -- print a shift double count, followed by the assemblers argument
8435 delimiter.
8436 b -- print the QImode name of the register for the indicated operand.
8437 %b0 would print %al if operands[0] is reg 0.
8438 w -- likewise, print the HImode name of the register.
8439 k -- likewise, print the SImode name of the register.
8440 q -- likewise, print the DImode name of the register.
8441 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8442 y -- print "st(0)" instead of "st" as a register.
8443 D -- print condition for SSE cmp instruction.
8444 P -- if PIC, print an @PLT suffix.
8445 X -- don't print any sort of PIC '@' suffix for a symbol.
8446 & -- print some in-use local-dynamic symbol name.
8447 H -- print a memory address offset by 8; used for sse high-parts
8448 */
8449
8450 void
8451 print_operand (FILE *file, rtx x, int code)
8452 {
8453 if (code)
8454 {
8455 switch (code)
8456 {
8457 case '*':
8458 if (ASSEMBLER_DIALECT == ASM_ATT)
8459 putc ('*', file);
8460 return;
8461
8462 case '&':
8463 assemble_name (file, get_some_local_dynamic_name ());
8464 return;
8465
8466 case 'A':
8467 switch (ASSEMBLER_DIALECT)
8468 {
8469 case ASM_ATT:
8470 putc ('*', file);
8471 break;
8472
8473 case ASM_INTEL:
8474 /* Intel syntax. For absolute addresses, registers should not
8475 be surrounded by braces. */
8476 if (!REG_P (x))
8477 {
8478 putc ('[', file);
8479 PRINT_OPERAND (file, x, 0);
8480 putc (']', file);
8481 return;
8482 }
8483 break;
8484
8485 default:
8486 gcc_unreachable ();
8487 }
8488
8489 PRINT_OPERAND (file, x, 0);
8490 return;
8491
8492
8493 case 'L':
8494 if (ASSEMBLER_DIALECT == ASM_ATT)
8495 putc ('l', file);
8496 return;
8497
8498 case 'W':
8499 if (ASSEMBLER_DIALECT == ASM_ATT)
8500 putc ('w', file);
8501 return;
8502
8503 case 'B':
8504 if (ASSEMBLER_DIALECT == ASM_ATT)
8505 putc ('b', file);
8506 return;
8507
8508 case 'Q':
8509 if (ASSEMBLER_DIALECT == ASM_ATT)
8510 putc ('l', file);
8511 return;
8512
8513 case 'S':
8514 if (ASSEMBLER_DIALECT == ASM_ATT)
8515 putc ('s', file);
8516 return;
8517
8518 case 'T':
8519 if (ASSEMBLER_DIALECT == ASM_ATT)
8520 putc ('t', file);
8521 return;
8522
8523 case 'z':
8524 /* 387 opcodes don't get size suffixes if the operands are
8525 registers. */
8526 if (STACK_REG_P (x))
8527 return;
8528
8529 /* Likewise if using Intel opcodes. */
8530 if (ASSEMBLER_DIALECT == ASM_INTEL)
8531 return;
8532
8533 /* This is the size of op from size of operand. */
8534 switch (GET_MODE_SIZE (GET_MODE (x)))
8535 {
8536 case 1:
8537 putc ('b', file);
8538 return;
8539
8540 case 2:
8541 if (MEM_P (x))
8542 {
8543 #ifdef HAVE_GAS_FILDS_FISTS
8544 putc ('s', file);
8545 #endif
8546 return;
8547 }
8548 else
8549 putc ('w', file);
8550 return;
8551
8552 case 4:
8553 if (GET_MODE (x) == SFmode)
8554 {
8555 putc ('s', file);
8556 return;
8557 }
8558 else
8559 putc ('l', file);
8560 return;
8561
8562 case 12:
8563 case 16:
8564 putc ('t', file);
8565 return;
8566
8567 case 8:
8568 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8569 {
8570 #ifdef GAS_MNEMONICS
8571 putc ('q', file);
8572 #else
8573 putc ('l', file);
8574 putc ('l', file);
8575 #endif
8576 }
8577 else
8578 putc ('l', file);
8579 return;
8580
8581 default:
8582 gcc_unreachable ();
8583 }
8584
8585 case 'b':
8586 case 'w':
8587 case 'k':
8588 case 'q':
8589 case 'h':
8590 case 'y':
8591 case 'X':
8592 case 'P':
8593 break;
8594
8595 case 's':
8596 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8597 {
8598 PRINT_OPERAND (file, x, 0);
8599 putc (',', file);
8600 }
8601 return;
8602
8603 case 'D':
8604 /* Little bit of braindamage here. The SSE compare instructions
8605 does use completely different names for the comparisons that the
8606 fp conditional moves. */
8607 switch (GET_CODE (x))
8608 {
8609 case EQ:
8610 case UNEQ:
8611 fputs ("eq", file);
8612 break;
8613 case LT:
8614 case UNLT:
8615 fputs ("lt", file);
8616 break;
8617 case LE:
8618 case UNLE:
8619 fputs ("le", file);
8620 break;
8621 case UNORDERED:
8622 fputs ("unord", file);
8623 break;
8624 case NE:
8625 case LTGT:
8626 fputs ("neq", file);
8627 break;
8628 case UNGE:
8629 case GE:
8630 fputs ("nlt", file);
8631 break;
8632 case UNGT:
8633 case GT:
8634 fputs ("nle", file);
8635 break;
8636 case ORDERED:
8637 fputs ("ord", file);
8638 break;
8639 default:
8640 gcc_unreachable ();
8641 }
8642 return;
8643 case 'O':
8644 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8645 if (ASSEMBLER_DIALECT == ASM_ATT)
8646 {
8647 switch (GET_MODE (x))
8648 {
8649 case HImode: putc ('w', file); break;
8650 case SImode:
8651 case SFmode: putc ('l', file); break;
8652 case DImode:
8653 case DFmode: putc ('q', file); break;
8654 default: gcc_unreachable ();
8655 }
8656 putc ('.', file);
8657 }
8658 #endif
8659 return;
8660 case 'C':
8661 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8662 return;
8663 case 'F':
8664 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8665 if (ASSEMBLER_DIALECT == ASM_ATT)
8666 putc ('.', file);
8667 #endif
8668 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8669 return;
8670
8671 /* Like above, but reverse condition */
8672 case 'c':
8673 /* Check to see if argument to %c is really a constant
8674 and not a condition code which needs to be reversed. */
8675 if (!COMPARISON_P (x))
8676 {
8677 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8678 return;
8679 }
8680 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8681 return;
8682 case 'f':
8683 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8684 if (ASSEMBLER_DIALECT == ASM_ATT)
8685 putc ('.', file);
8686 #endif
8687 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8688 return;
8689
8690 case 'H':
8691 /* It doesn't actually matter what mode we use here, as we're
8692 only going to use this for printing. */
8693 x = adjust_address_nv (x, DImode, 8);
8694 break;
8695
8696 case '+':
8697 {
8698 rtx x;
8699
8700 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8701 return;
8702
8703 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8704 if (x)
8705 {
8706 int pred_val = INTVAL (XEXP (x, 0));
8707
8708 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8709 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8710 {
8711 int taken = pred_val > REG_BR_PROB_BASE / 2;
8712 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8713
8714 /* Emit hints only in the case default branch prediction
8715 heuristics would fail. */
8716 if (taken != cputaken)
8717 {
8718 /* We use 3e (DS) prefix for taken branches and
8719 2e (CS) prefix for not taken branches. */
8720 if (taken)
8721 fputs ("ds ; ", file);
8722 else
8723 fputs ("cs ; ", file);
8724 }
8725 }
8726 }
8727 return;
8728 }
8729 default:
8730 output_operand_lossage ("invalid operand code '%c'", code);
8731 }
8732 }
8733
8734 if (REG_P (x))
8735 print_reg (x, code, file);
8736
8737 else if (MEM_P (x))
8738 {
8739 /* No `byte ptr' prefix for call instructions. */
8740 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8741 {
8742 const char * size;
8743 switch (GET_MODE_SIZE (GET_MODE (x)))
8744 {
8745 case 1: size = "BYTE"; break;
8746 case 2: size = "WORD"; break;
8747 case 4: size = "DWORD"; break;
8748 case 8: size = "QWORD"; break;
8749 case 12: size = "XWORD"; break;
8750 case 16: size = "XMMWORD"; break;
8751 default:
8752 gcc_unreachable ();
8753 }
8754
8755 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8756 if (code == 'b')
8757 size = "BYTE";
8758 else if (code == 'w')
8759 size = "WORD";
8760 else if (code == 'k')
8761 size = "DWORD";
8762
8763 fputs (size, file);
8764 fputs (" PTR ", file);
8765 }
8766
8767 x = XEXP (x, 0);
8768 /* Avoid (%rip) for call operands. */
8769 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8770 && !CONST_INT_P (x))
8771 output_addr_const (file, x);
8772 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8773 output_operand_lossage ("invalid constraints for operand");
8774 else
8775 output_address (x);
8776 }
8777
8778 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8779 {
8780 REAL_VALUE_TYPE r;
8781 long l;
8782
8783 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8784 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8785
8786 if (ASSEMBLER_DIALECT == ASM_ATT)
8787 putc ('$', file);
8788 fprintf (file, "0x%08lx", l);
8789 }
8790
8791 /* These float cases don't actually occur as immediate operands. */
8792 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8793 {
8794 char dstr[30];
8795
8796 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8797 fprintf (file, "%s", dstr);
8798 }
8799
8800 else if (GET_CODE (x) == CONST_DOUBLE
8801 && GET_MODE (x) == XFmode)
8802 {
8803 char dstr[30];
8804
8805 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8806 fprintf (file, "%s", dstr);
8807 }
8808
8809 else
8810 {
8811 /* We have patterns that allow zero sets of memory, for instance.
8812 In 64-bit mode, we should probably support all 8-byte vectors,
8813 since we can in fact encode that into an immediate. */
8814 if (GET_CODE (x) == CONST_VECTOR)
8815 {
8816 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8817 x = const0_rtx;
8818 }
8819
8820 if (code != 'P')
8821 {
8822 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8823 {
8824 if (ASSEMBLER_DIALECT == ASM_ATT)
8825 putc ('$', file);
8826 }
8827 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8828 || GET_CODE (x) == LABEL_REF)
8829 {
8830 if (ASSEMBLER_DIALECT == ASM_ATT)
8831 putc ('$', file);
8832 else
8833 fputs ("OFFSET FLAT:", file);
8834 }
8835 }
8836 if (CONST_INT_P (x))
8837 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8838 else if (flag_pic)
8839 output_pic_addr_const (file, x, code);
8840 else
8841 output_addr_const (file, x);
8842 }
8843 }
8844 \f
8845 /* Print a memory operand whose address is ADDR. */
8846
8847 void
8848 print_operand_address (FILE *file, rtx addr)
8849 {
8850 struct ix86_address parts;
8851 rtx base, index, disp;
8852 int scale;
8853 int ok = ix86_decompose_address (addr, &parts);
8854
8855 gcc_assert (ok);
8856
8857 base = parts.base;
8858 index = parts.index;
8859 disp = parts.disp;
8860 scale = parts.scale;
8861
8862 switch (parts.seg)
8863 {
8864 case SEG_DEFAULT:
8865 break;
8866 case SEG_FS:
8867 case SEG_GS:
8868 if (USER_LABEL_PREFIX[0] == 0)
8869 putc ('%', file);
8870 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8871 break;
8872 default:
8873 gcc_unreachable ();
8874 }
8875
8876 if (!base && !index)
8877 {
8878 /* Displacement only requires special attention. */
8879
8880 if (CONST_INT_P (disp))
8881 {
8882 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8883 {
8884 if (USER_LABEL_PREFIX[0] == 0)
8885 putc ('%', file);
8886 fputs ("ds:", file);
8887 }
8888 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8889 }
8890 else if (flag_pic)
8891 output_pic_addr_const (file, disp, 0);
8892 else
8893 output_addr_const (file, disp);
8894
8895 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8896 if (TARGET_64BIT)
8897 {
8898 if (GET_CODE (disp) == CONST
8899 && GET_CODE (XEXP (disp, 0)) == PLUS
8900 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8901 disp = XEXP (XEXP (disp, 0), 0);
8902 if (GET_CODE (disp) == LABEL_REF
8903 || (GET_CODE (disp) == SYMBOL_REF
8904 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8905 fputs ("(%rip)", file);
8906 }
8907 }
8908 else
8909 {
8910 if (ASSEMBLER_DIALECT == ASM_ATT)
8911 {
8912 if (disp)
8913 {
8914 if (flag_pic)
8915 output_pic_addr_const (file, disp, 0);
8916 else if (GET_CODE (disp) == LABEL_REF)
8917 output_asm_label (disp);
8918 else
8919 output_addr_const (file, disp);
8920 }
8921
8922 putc ('(', file);
8923 if (base)
8924 print_reg (base, 0, file);
8925 if (index)
8926 {
8927 putc (',', file);
8928 print_reg (index, 0, file);
8929 if (scale != 1)
8930 fprintf (file, ",%d", scale);
8931 }
8932 putc (')', file);
8933 }
8934 else
8935 {
8936 rtx offset = NULL_RTX;
8937
8938 if (disp)
8939 {
8940 /* Pull out the offset of a symbol; print any symbol itself. */
8941 if (GET_CODE (disp) == CONST
8942 && GET_CODE (XEXP (disp, 0)) == PLUS
8943 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8944 {
8945 offset = XEXP (XEXP (disp, 0), 1);
8946 disp = gen_rtx_CONST (VOIDmode,
8947 XEXP (XEXP (disp, 0), 0));
8948 }
8949
8950 if (flag_pic)
8951 output_pic_addr_const (file, disp, 0);
8952 else if (GET_CODE (disp) == LABEL_REF)
8953 output_asm_label (disp);
8954 else if (CONST_INT_P (disp))
8955 offset = disp;
8956 else
8957 output_addr_const (file, disp);
8958 }
8959
8960 putc ('[', file);
8961 if (base)
8962 {
8963 print_reg (base, 0, file);
8964 if (offset)
8965 {
8966 if (INTVAL (offset) >= 0)
8967 putc ('+', file);
8968 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8969 }
8970 }
8971 else if (offset)
8972 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8973 else
8974 putc ('0', file);
8975
8976 if (index)
8977 {
8978 putc ('+', file);
8979 print_reg (index, 0, file);
8980 if (scale != 1)
8981 fprintf (file, "*%d", scale);
8982 }
8983 putc (']', file);
8984 }
8985 }
8986 }
8987
8988 bool
8989 output_addr_const_extra (FILE *file, rtx x)
8990 {
8991 rtx op;
8992
8993 if (GET_CODE (x) != UNSPEC)
8994 return false;
8995
8996 op = XVECEXP (x, 0, 0);
8997 switch (XINT (x, 1))
8998 {
8999 case UNSPEC_GOTTPOFF:
9000 output_addr_const (file, op);
9001 /* FIXME: This might be @TPOFF in Sun ld. */
9002 fputs ("@GOTTPOFF", file);
9003 break;
9004 case UNSPEC_TPOFF:
9005 output_addr_const (file, op);
9006 fputs ("@TPOFF", file);
9007 break;
9008 case UNSPEC_NTPOFF:
9009 output_addr_const (file, op);
9010 if (TARGET_64BIT)
9011 fputs ("@TPOFF", file);
9012 else
9013 fputs ("@NTPOFF", file);
9014 break;
9015 case UNSPEC_DTPOFF:
9016 output_addr_const (file, op);
9017 fputs ("@DTPOFF", file);
9018 break;
9019 case UNSPEC_GOTNTPOFF:
9020 output_addr_const (file, op);
9021 if (TARGET_64BIT)
9022 fputs ("@GOTTPOFF(%rip)", file);
9023 else
9024 fputs ("@GOTNTPOFF", file);
9025 break;
9026 case UNSPEC_INDNTPOFF:
9027 output_addr_const (file, op);
9028 fputs ("@INDNTPOFF", file);
9029 break;
9030
9031 default:
9032 return false;
9033 }
9034
9035 return true;
9036 }
9037 \f
9038 /* Split one or more DImode RTL references into pairs of SImode
9039 references. The RTL can be REG, offsettable MEM, integer constant, or
9040 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9041 split and "num" is its length. lo_half and hi_half are output arrays
9042 that parallel "operands". */
9043
9044 void
9045 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9046 {
9047 while (num--)
9048 {
9049 rtx op = operands[num];
9050
9051 /* simplify_subreg refuse to split volatile memory addresses,
9052 but we still have to handle it. */
9053 if (MEM_P (op))
9054 {
9055 lo_half[num] = adjust_address (op, SImode, 0);
9056 hi_half[num] = adjust_address (op, SImode, 4);
9057 }
9058 else
9059 {
9060 lo_half[num] = simplify_gen_subreg (SImode, op,
9061 GET_MODE (op) == VOIDmode
9062 ? DImode : GET_MODE (op), 0);
9063 hi_half[num] = simplify_gen_subreg (SImode, op,
9064 GET_MODE (op) == VOIDmode
9065 ? DImode : GET_MODE (op), 4);
9066 }
9067 }
9068 }
9069 /* Split one or more TImode RTL references into pairs of DImode
9070 references. The RTL can be REG, offsettable MEM, integer constant, or
9071 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9072 split and "num" is its length. lo_half and hi_half are output arrays
9073 that parallel "operands". */
9074
9075 void
9076 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9077 {
9078 while (num--)
9079 {
9080 rtx op = operands[num];
9081
9082 /* simplify_subreg refuse to split volatile memory addresses, but we
9083 still have to handle it. */
9084 if (MEM_P (op))
9085 {
9086 lo_half[num] = adjust_address (op, DImode, 0);
9087 hi_half[num] = adjust_address (op, DImode, 8);
9088 }
9089 else
9090 {
9091 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9092 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9093 }
9094 }
9095 }
9096 \f
9097 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9098 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9099 is the expression of the binary operation. The output may either be
9100 emitted here, or returned to the caller, like all output_* functions.
9101
9102 There is no guarantee that the operands are the same mode, as they
9103 might be within FLOAT or FLOAT_EXTEND expressions. */
9104
9105 #ifndef SYSV386_COMPAT
9106 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9107 wants to fix the assemblers because that causes incompatibility
9108 with gcc. No-one wants to fix gcc because that causes
9109 incompatibility with assemblers... You can use the option of
9110 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9111 #define SYSV386_COMPAT 1
9112 #endif
9113
9114 const char *
9115 output_387_binary_op (rtx insn, rtx *operands)
9116 {
9117 static char buf[30];
9118 const char *p;
9119 const char *ssep;
9120 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9121
9122 #ifdef ENABLE_CHECKING
9123 /* Even if we do not want to check the inputs, this documents input
9124 constraints. Which helps in understanding the following code. */
9125 if (STACK_REG_P (operands[0])
9126 && ((REG_P (operands[1])
9127 && REGNO (operands[0]) == REGNO (operands[1])
9128 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9129 || (REG_P (operands[2])
9130 && REGNO (operands[0]) == REGNO (operands[2])
9131 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9132 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9133 ; /* ok */
9134 else
9135 gcc_assert (is_sse);
9136 #endif
9137
9138 switch (GET_CODE (operands[3]))
9139 {
9140 case PLUS:
9141 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9142 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9143 p = "fiadd";
9144 else
9145 p = "fadd";
9146 ssep = "add";
9147 break;
9148
9149 case MINUS:
9150 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9151 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9152 p = "fisub";
9153 else
9154 p = "fsub";
9155 ssep = "sub";
9156 break;
9157
9158 case MULT:
9159 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9160 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9161 p = "fimul";
9162 else
9163 p = "fmul";
9164 ssep = "mul";
9165 break;
9166
9167 case DIV:
9168 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9169 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9170 p = "fidiv";
9171 else
9172 p = "fdiv";
9173 ssep = "div";
9174 break;
9175
9176 default:
9177 gcc_unreachable ();
9178 }
9179
9180 if (is_sse)
9181 {
9182 strcpy (buf, ssep);
9183 if (GET_MODE (operands[0]) == SFmode)
9184 strcat (buf, "ss\t{%2, %0|%0, %2}");
9185 else
9186 strcat (buf, "sd\t{%2, %0|%0, %2}");
9187 return buf;
9188 }
9189 strcpy (buf, p);
9190
9191 switch (GET_CODE (operands[3]))
9192 {
9193 case MULT:
9194 case PLUS:
9195 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9196 {
9197 rtx temp = operands[2];
9198 operands[2] = operands[1];
9199 operands[1] = temp;
9200 }
9201
9202 /* know operands[0] == operands[1]. */
9203
9204 if (MEM_P (operands[2]))
9205 {
9206 p = "%z2\t%2";
9207 break;
9208 }
9209
9210 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9211 {
9212 if (STACK_TOP_P (operands[0]))
9213 /* How is it that we are storing to a dead operand[2]?
9214 Well, presumably operands[1] is dead too. We can't
9215 store the result to st(0) as st(0) gets popped on this
9216 instruction. Instead store to operands[2] (which I
9217 think has to be st(1)). st(1) will be popped later.
9218 gcc <= 2.8.1 didn't have this check and generated
9219 assembly code that the Unixware assembler rejected. */
9220 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9221 else
9222 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9223 break;
9224 }
9225
9226 if (STACK_TOP_P (operands[0]))
9227 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9228 else
9229 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9230 break;
9231
9232 case MINUS:
9233 case DIV:
9234 if (MEM_P (operands[1]))
9235 {
9236 p = "r%z1\t%1";
9237 break;
9238 }
9239
9240 if (MEM_P (operands[2]))
9241 {
9242 p = "%z2\t%2";
9243 break;
9244 }
9245
9246 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9247 {
9248 #if SYSV386_COMPAT
9249 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9250 derived assemblers, confusingly reverse the direction of
9251 the operation for fsub{r} and fdiv{r} when the
9252 destination register is not st(0). The Intel assembler
9253 doesn't have this brain damage. Read !SYSV386_COMPAT to
9254 figure out what the hardware really does. */
9255 if (STACK_TOP_P (operands[0]))
9256 p = "{p\t%0, %2|rp\t%2, %0}";
9257 else
9258 p = "{rp\t%2, %0|p\t%0, %2}";
9259 #else
9260 if (STACK_TOP_P (operands[0]))
9261 /* As above for fmul/fadd, we can't store to st(0). */
9262 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9263 else
9264 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9265 #endif
9266 break;
9267 }
9268
9269 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9270 {
9271 #if SYSV386_COMPAT
9272 if (STACK_TOP_P (operands[0]))
9273 p = "{rp\t%0, %1|p\t%1, %0}";
9274 else
9275 p = "{p\t%1, %0|rp\t%0, %1}";
9276 #else
9277 if (STACK_TOP_P (operands[0]))
9278 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9279 else
9280 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9281 #endif
9282 break;
9283 }
9284
9285 if (STACK_TOP_P (operands[0]))
9286 {
9287 if (STACK_TOP_P (operands[1]))
9288 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9289 else
9290 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9291 break;
9292 }
9293 else if (STACK_TOP_P (operands[1]))
9294 {
9295 #if SYSV386_COMPAT
9296 p = "{\t%1, %0|r\t%0, %1}";
9297 #else
9298 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9299 #endif
9300 }
9301 else
9302 {
9303 #if SYSV386_COMPAT
9304 p = "{r\t%2, %0|\t%0, %2}";
9305 #else
9306 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9307 #endif
9308 }
9309 break;
9310
9311 default:
9312 gcc_unreachable ();
9313 }
9314
9315 strcat (buf, p);
9316 return buf;
9317 }
9318
9319 /* Return needed mode for entity in optimize_mode_switching pass. */
9320
9321 int
9322 ix86_mode_needed (int entity, rtx insn)
9323 {
9324 enum attr_i387_cw mode;
9325
9326 /* The mode UNINITIALIZED is used to store control word after a
9327 function call or ASM pattern. The mode ANY specify that function
9328 has no requirements on the control word and make no changes in the
9329 bits we are interested in. */
9330
9331 if (CALL_P (insn)
9332 || (NONJUMP_INSN_P (insn)
9333 && (asm_noperands (PATTERN (insn)) >= 0
9334 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9335 return I387_CW_UNINITIALIZED;
9336
9337 if (recog_memoized (insn) < 0)
9338 return I387_CW_ANY;
9339
9340 mode = get_attr_i387_cw (insn);
9341
9342 switch (entity)
9343 {
9344 case I387_TRUNC:
9345 if (mode == I387_CW_TRUNC)
9346 return mode;
9347 break;
9348
9349 case I387_FLOOR:
9350 if (mode == I387_CW_FLOOR)
9351 return mode;
9352 break;
9353
9354 case I387_CEIL:
9355 if (mode == I387_CW_CEIL)
9356 return mode;
9357 break;
9358
9359 case I387_MASK_PM:
9360 if (mode == I387_CW_MASK_PM)
9361 return mode;
9362 break;
9363
9364 default:
9365 gcc_unreachable ();
9366 }
9367
9368 return I387_CW_ANY;
9369 }
9370
9371 /* Output code to initialize control word copies used by trunc?f?i and
9372 rounding patterns. CURRENT_MODE is set to current control word,
9373 while NEW_MODE is set to new control word. */
9374
9375 void
9376 emit_i387_cw_initialization (int mode)
9377 {
9378 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9379 rtx new_mode;
9380
9381 enum ix86_stack_slot slot;
9382
9383 rtx reg = gen_reg_rtx (HImode);
9384
9385 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9386 emit_move_insn (reg, copy_rtx (stored_mode));
9387
9388 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9389 {
9390 switch (mode)
9391 {
9392 case I387_CW_TRUNC:
9393 /* round toward zero (truncate) */
9394 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9395 slot = SLOT_CW_TRUNC;
9396 break;
9397
9398 case I387_CW_FLOOR:
9399 /* round down toward -oo */
9400 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9401 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9402 slot = SLOT_CW_FLOOR;
9403 break;
9404
9405 case I387_CW_CEIL:
9406 /* round up toward +oo */
9407 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9408 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9409 slot = SLOT_CW_CEIL;
9410 break;
9411
9412 case I387_CW_MASK_PM:
9413 /* mask precision exception for nearbyint() */
9414 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9415 slot = SLOT_CW_MASK_PM;
9416 break;
9417
9418 default:
9419 gcc_unreachable ();
9420 }
9421 }
9422 else
9423 {
9424 switch (mode)
9425 {
9426 case I387_CW_TRUNC:
9427 /* round toward zero (truncate) */
9428 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9429 slot = SLOT_CW_TRUNC;
9430 break;
9431
9432 case I387_CW_FLOOR:
9433 /* round down toward -oo */
9434 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9435 slot = SLOT_CW_FLOOR;
9436 break;
9437
9438 case I387_CW_CEIL:
9439 /* round up toward +oo */
9440 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9441 slot = SLOT_CW_CEIL;
9442 break;
9443
9444 case I387_CW_MASK_PM:
9445 /* mask precision exception for nearbyint() */
9446 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9447 slot = SLOT_CW_MASK_PM;
9448 break;
9449
9450 default:
9451 gcc_unreachable ();
9452 }
9453 }
9454
9455 gcc_assert (slot < MAX_386_STACK_LOCALS);
9456
9457 new_mode = assign_386_stack_local (HImode, slot);
9458 emit_move_insn (new_mode, reg);
9459 }
9460
9461 /* Output code for INSN to convert a float to a signed int. OPERANDS
9462 are the insn operands. The output may be [HSD]Imode and the input
9463 operand may be [SDX]Fmode. */
9464
9465 const char *
9466 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9467 {
9468 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9469 int dimode_p = GET_MODE (operands[0]) == DImode;
9470 int round_mode = get_attr_i387_cw (insn);
9471
9472 /* Jump through a hoop or two for DImode, since the hardware has no
9473 non-popping instruction. We used to do this a different way, but
9474 that was somewhat fragile and broke with post-reload splitters. */
9475 if ((dimode_p || fisttp) && !stack_top_dies)
9476 output_asm_insn ("fld\t%y1", operands);
9477
9478 gcc_assert (STACK_TOP_P (operands[1]));
9479 gcc_assert (MEM_P (operands[0]));
9480 gcc_assert (GET_MODE (operands[1]) != TFmode);
9481
9482 if (fisttp)
9483 output_asm_insn ("fisttp%z0\t%0", operands);
9484 else
9485 {
9486 if (round_mode != I387_CW_ANY)
9487 output_asm_insn ("fldcw\t%3", operands);
9488 if (stack_top_dies || dimode_p)
9489 output_asm_insn ("fistp%z0\t%0", operands);
9490 else
9491 output_asm_insn ("fist%z0\t%0", operands);
9492 if (round_mode != I387_CW_ANY)
9493 output_asm_insn ("fldcw\t%2", operands);
9494 }
9495
9496 return "";
9497 }
9498
9499 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9500 have the values zero or one, indicates the ffreep insn's operand
9501 from the OPERANDS array. */
9502
9503 static const char *
9504 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9505 {
9506 if (TARGET_USE_FFREEP)
9507 #if HAVE_AS_IX86_FFREEP
9508 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9509 #else
9510 {
9511 static char retval[] = ".word\t0xc_df";
9512 int regno = REGNO (operands[opno]);
9513
9514 gcc_assert (FP_REGNO_P (regno));
9515
9516 retval[9] = '0' + (regno - FIRST_STACK_REG);
9517 return retval;
9518 }
9519 #endif
9520
9521 return opno ? "fstp\t%y1" : "fstp\t%y0";
9522 }
9523
9524
9525 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9526 should be used. UNORDERED_P is true when fucom should be used. */
9527
9528 const char *
9529 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9530 {
9531 int stack_top_dies;
9532 rtx cmp_op0, cmp_op1;
9533 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9534
9535 if (eflags_p)
9536 {
9537 cmp_op0 = operands[0];
9538 cmp_op1 = operands[1];
9539 }
9540 else
9541 {
9542 cmp_op0 = operands[1];
9543 cmp_op1 = operands[2];
9544 }
9545
9546 if (is_sse)
9547 {
9548 if (GET_MODE (operands[0]) == SFmode)
9549 if (unordered_p)
9550 return "ucomiss\t{%1, %0|%0, %1}";
9551 else
9552 return "comiss\t{%1, %0|%0, %1}";
9553 else
9554 if (unordered_p)
9555 return "ucomisd\t{%1, %0|%0, %1}";
9556 else
9557 return "comisd\t{%1, %0|%0, %1}";
9558 }
9559
9560 gcc_assert (STACK_TOP_P (cmp_op0));
9561
9562 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9563
9564 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9565 {
9566 if (stack_top_dies)
9567 {
9568 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9569 return output_387_ffreep (operands, 1);
9570 }
9571 else
9572 return "ftst\n\tfnstsw\t%0";
9573 }
9574
9575 if (STACK_REG_P (cmp_op1)
9576 && stack_top_dies
9577 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9578 && REGNO (cmp_op1) != FIRST_STACK_REG)
9579 {
9580 /* If both the top of the 387 stack dies, and the other operand
9581 is also a stack register that dies, then this must be a
9582 `fcompp' float compare */
9583
9584 if (eflags_p)
9585 {
9586 /* There is no double popping fcomi variant. Fortunately,
9587 eflags is immune from the fstp's cc clobbering. */
9588 if (unordered_p)
9589 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9590 else
9591 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9592 return output_387_ffreep (operands, 0);
9593 }
9594 else
9595 {
9596 if (unordered_p)
9597 return "fucompp\n\tfnstsw\t%0";
9598 else
9599 return "fcompp\n\tfnstsw\t%0";
9600 }
9601 }
9602 else
9603 {
9604 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9605
9606 static const char * const alt[16] =
9607 {
9608 "fcom%z2\t%y2\n\tfnstsw\t%0",
9609 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9610 "fucom%z2\t%y2\n\tfnstsw\t%0",
9611 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9612
9613 "ficom%z2\t%y2\n\tfnstsw\t%0",
9614 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9615 NULL,
9616 NULL,
9617
9618 "fcomi\t{%y1, %0|%0, %y1}",
9619 "fcomip\t{%y1, %0|%0, %y1}",
9620 "fucomi\t{%y1, %0|%0, %y1}",
9621 "fucomip\t{%y1, %0|%0, %y1}",
9622
9623 NULL,
9624 NULL,
9625 NULL,
9626 NULL
9627 };
9628
9629 int mask;
9630 const char *ret;
9631
9632 mask = eflags_p << 3;
9633 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9634 mask |= unordered_p << 1;
9635 mask |= stack_top_dies;
9636
9637 gcc_assert (mask < 16);
9638 ret = alt[mask];
9639 gcc_assert (ret);
9640
9641 return ret;
9642 }
9643 }
9644
9645 void
9646 ix86_output_addr_vec_elt (FILE *file, int value)
9647 {
9648 const char *directive = ASM_LONG;
9649
9650 #ifdef ASM_QUAD
9651 if (TARGET_64BIT)
9652 directive = ASM_QUAD;
9653 #else
9654 gcc_assert (!TARGET_64BIT);
9655 #endif
9656
9657 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9658 }
9659
9660 void
9661 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9662 {
9663 const char *directive = ASM_LONG;
9664
9665 #ifdef ASM_QUAD
9666 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9667 directive = ASM_QUAD;
9668 #else
9669 gcc_assert (!TARGET_64BIT);
9670 #endif
9671 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9672 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9673 fprintf (file, "%s%s%d-%s%d\n",
9674 directive, LPREFIX, value, LPREFIX, rel);
9675 else if (HAVE_AS_GOTOFF_IN_DATA)
9676 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9677 #if TARGET_MACHO
9678 else if (TARGET_MACHO)
9679 {
9680 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9681 machopic_output_function_base_name (file);
9682 fprintf(file, "\n");
9683 }
9684 #endif
9685 else
9686 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9687 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9688 }
9689 \f
9690 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9691 for the target. */
9692
9693 void
9694 ix86_expand_clear (rtx dest)
9695 {
9696 rtx tmp;
9697
9698 /* We play register width games, which are only valid after reload. */
9699 gcc_assert (reload_completed);
9700
9701 /* Avoid HImode and its attendant prefix byte. */
9702 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9703 dest = gen_rtx_REG (SImode, REGNO (dest));
9704 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9705
9706 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9707 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9708 {
9709 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9710 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9711 }
9712
9713 emit_insn (tmp);
9714 }
9715
9716 /* X is an unchanging MEM. If it is a constant pool reference, return
9717 the constant pool rtx, else NULL. */
9718
9719 rtx
9720 maybe_get_pool_constant (rtx x)
9721 {
9722 x = ix86_delegitimize_address (XEXP (x, 0));
9723
9724 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9725 return get_pool_constant (x);
9726
9727 return NULL_RTX;
9728 }
9729
9730 void
9731 ix86_expand_move (enum machine_mode mode, rtx operands[])
9732 {
9733 int strict = (reload_in_progress || reload_completed);
9734 rtx op0, op1;
9735 enum tls_model model;
9736
9737 op0 = operands[0];
9738 op1 = operands[1];
9739
9740 if (GET_CODE (op1) == SYMBOL_REF)
9741 {
9742 model = SYMBOL_REF_TLS_MODEL (op1);
9743 if (model)
9744 {
9745 op1 = legitimize_tls_address (op1, model, true);
9746 op1 = force_operand (op1, op0);
9747 if (op1 == op0)
9748 return;
9749 }
9750 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9751 && SYMBOL_REF_DLLIMPORT_P (op1))
9752 op1 = legitimize_dllimport_symbol (op1, false);
9753 }
9754 else if (GET_CODE (op1) == CONST
9755 && GET_CODE (XEXP (op1, 0)) == PLUS
9756 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9757 {
9758 rtx addend = XEXP (XEXP (op1, 0), 1);
9759 rtx symbol = XEXP (XEXP (op1, 0), 0);
9760 rtx tmp = NULL;
9761
9762 model = SYMBOL_REF_TLS_MODEL (symbol);
9763 if (model)
9764 tmp = legitimize_tls_address (symbol, model, true);
9765 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9766 && SYMBOL_REF_DLLIMPORT_P (symbol))
9767 tmp = legitimize_dllimport_symbol (symbol, true);
9768
9769 if (tmp)
9770 {
9771 tmp = force_operand (tmp, NULL);
9772 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9773 op0, 1, OPTAB_DIRECT);
9774 if (tmp == op0)
9775 return;
9776 }
9777 }
9778
9779 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9780 {
9781 if (TARGET_MACHO && !TARGET_64BIT)
9782 {
9783 #if TARGET_MACHO
9784 if (MACHOPIC_PURE)
9785 {
9786 rtx temp = ((reload_in_progress
9787 || ((op0 && REG_P (op0))
9788 && mode == Pmode))
9789 ? op0 : gen_reg_rtx (Pmode));
9790 op1 = machopic_indirect_data_reference (op1, temp);
9791 op1 = machopic_legitimize_pic_address (op1, mode,
9792 temp == op1 ? 0 : temp);
9793 }
9794 else if (MACHOPIC_INDIRECT)
9795 op1 = machopic_indirect_data_reference (op1, 0);
9796 if (op0 == op1)
9797 return;
9798 #endif
9799 }
9800 else
9801 {
9802 if (MEM_P (op0))
9803 op1 = force_reg (Pmode, op1);
9804 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9805 {
9806 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9807 op1 = legitimize_pic_address (op1, reg);
9808 if (op0 == op1)
9809 return;
9810 }
9811 }
9812 }
9813 else
9814 {
9815 if (MEM_P (op0)
9816 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9817 || !push_operand (op0, mode))
9818 && MEM_P (op1))
9819 op1 = force_reg (mode, op1);
9820
9821 if (push_operand (op0, mode)
9822 && ! general_no_elim_operand (op1, mode))
9823 op1 = copy_to_mode_reg (mode, op1);
9824
9825 /* Force large constants in 64bit compilation into register
9826 to get them CSEed. */
9827 if (TARGET_64BIT && mode == DImode
9828 && immediate_operand (op1, mode)
9829 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9830 && !register_operand (op0, mode)
9831 && optimize && !reload_completed && !reload_in_progress)
9832 op1 = copy_to_mode_reg (mode, op1);
9833
9834 if (FLOAT_MODE_P (mode))
9835 {
9836 /* If we are loading a floating point constant to a register,
9837 force the value to memory now, since we'll get better code
9838 out the back end. */
9839
9840 if (strict)
9841 ;
9842 else if (GET_CODE (op1) == CONST_DOUBLE)
9843 {
9844 op1 = validize_mem (force_const_mem (mode, op1));
9845 if (!register_operand (op0, mode))
9846 {
9847 rtx temp = gen_reg_rtx (mode);
9848 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9849 emit_move_insn (op0, temp);
9850 return;
9851 }
9852 }
9853 }
9854 }
9855
9856 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9857 }
9858
9859 void
9860 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9861 {
9862 rtx op0 = operands[0], op1 = operands[1];
9863 unsigned int align = GET_MODE_ALIGNMENT (mode);
9864
9865 /* Force constants other than zero into memory. We do not know how
9866 the instructions used to build constants modify the upper 64 bits
9867 of the register, once we have that information we may be able
9868 to handle some of them more efficiently. */
9869 if ((reload_in_progress | reload_completed) == 0
9870 && register_operand (op0, mode)
9871 && (CONSTANT_P (op1)
9872 || (GET_CODE (op1) == SUBREG
9873 && CONSTANT_P (SUBREG_REG (op1))))
9874 && standard_sse_constant_p (op1) <= 0)
9875 op1 = validize_mem (force_const_mem (mode, op1));
9876
9877 /* TDmode values are passed as TImode on the stack. Timode values
9878 are moved via xmm registers, and moving them to stack can result in
9879 unaligned memory access. Use ix86_expand_vector_move_misalign()
9880 if memory operand is not aligned correctly. */
9881 if (!no_new_pseudos
9882 && (mode == TImode) && !TARGET_64BIT
9883 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
9884 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
9885 {
9886 rtx tmp[2];
9887
9888 /* ix86_expand_vector_move_misalign() does not like constants ... */
9889 if (CONSTANT_P (op1)
9890 || (GET_CODE (op1) == SUBREG
9891 && CONSTANT_P (SUBREG_REG (op1))))
9892 op1 = validize_mem (force_const_mem (mode, op1));
9893
9894 /* ... nor both arguments in memory. */
9895 if (!register_operand (op0, mode)
9896 && !register_operand (op1, mode))
9897 op1 = force_reg (mode, op1);
9898
9899 tmp[0] = op0; tmp[1] = op1;
9900 ix86_expand_vector_move_misalign (mode, tmp);
9901 return;
9902 }
9903
9904 /* Make operand1 a register if it isn't already. */
9905 if (!no_new_pseudos
9906 && !register_operand (op0, mode)
9907 && !register_operand (op1, mode))
9908 {
9909 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9910 return;
9911 }
9912
9913 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9914 }
9915
9916 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9917 straight to ix86_expand_vector_move. */
9918 /* Code generation for scalar reg-reg moves of single and double precision data:
9919 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9920 movaps reg, reg
9921 else
9922 movss reg, reg
9923 if (x86_sse_partial_reg_dependency == true)
9924 movapd reg, reg
9925 else
9926 movsd reg, reg
9927
9928 Code generation for scalar loads of double precision data:
9929 if (x86_sse_split_regs == true)
9930 movlpd mem, reg (gas syntax)
9931 else
9932 movsd mem, reg
9933
9934 Code generation for unaligned packed loads of single precision data
9935 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9936 if (x86_sse_unaligned_move_optimal)
9937 movups mem, reg
9938
9939 if (x86_sse_partial_reg_dependency == true)
9940 {
9941 xorps reg, reg
9942 movlps mem, reg
9943 movhps mem+8, reg
9944 }
9945 else
9946 {
9947 movlps mem, reg
9948 movhps mem+8, reg
9949 }
9950
9951 Code generation for unaligned packed loads of double precision data
9952 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9953 if (x86_sse_unaligned_move_optimal)
9954 movupd mem, reg
9955
9956 if (x86_sse_split_regs == true)
9957 {
9958 movlpd mem, reg
9959 movhpd mem+8, reg
9960 }
9961 else
9962 {
9963 movsd mem, reg
9964 movhpd mem+8, reg
9965 }
9966 */
9967
9968 void
9969 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9970 {
9971 rtx op0, op1, m;
9972
9973 op0 = operands[0];
9974 op1 = operands[1];
9975
9976 if (MEM_P (op1))
9977 {
9978 /* If we're optimizing for size, movups is the smallest. */
9979 if (optimize_size)
9980 {
9981 op0 = gen_lowpart (V4SFmode, op0);
9982 op1 = gen_lowpart (V4SFmode, op1);
9983 emit_insn (gen_sse_movups (op0, op1));
9984 return;
9985 }
9986
9987 /* ??? If we have typed data, then it would appear that using
9988 movdqu is the only way to get unaligned data loaded with
9989 integer type. */
9990 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9991 {
9992 op0 = gen_lowpart (V16QImode, op0);
9993 op1 = gen_lowpart (V16QImode, op1);
9994 emit_insn (gen_sse2_movdqu (op0, op1));
9995 return;
9996 }
9997
9998 if (TARGET_SSE2 && mode == V2DFmode)
9999 {
10000 rtx zero;
10001
10002 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10003 {
10004 op0 = gen_lowpart (V2DFmode, op0);
10005 op1 = gen_lowpart (V2DFmode, op1);
10006 emit_insn (gen_sse2_movupd (op0, op1));
10007 return;
10008 }
10009
10010 /* When SSE registers are split into halves, we can avoid
10011 writing to the top half twice. */
10012 if (TARGET_SSE_SPLIT_REGS)
10013 {
10014 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10015 zero = op0;
10016 }
10017 else
10018 {
10019 /* ??? Not sure about the best option for the Intel chips.
10020 The following would seem to satisfy; the register is
10021 entirely cleared, breaking the dependency chain. We
10022 then store to the upper half, with a dependency depth
10023 of one. A rumor has it that Intel recommends two movsd
10024 followed by an unpacklpd, but this is unconfirmed. And
10025 given that the dependency depth of the unpacklpd would
10026 still be one, I'm not sure why this would be better. */
10027 zero = CONST0_RTX (V2DFmode);
10028 }
10029
10030 m = adjust_address (op1, DFmode, 0);
10031 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10032 m = adjust_address (op1, DFmode, 8);
10033 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10034 }
10035 else
10036 {
10037 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10038 {
10039 op0 = gen_lowpart (V4SFmode, op0);
10040 op1 = gen_lowpart (V4SFmode, op1);
10041 emit_insn (gen_sse_movups (op0, op1));
10042 return;
10043 }
10044
10045 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10046 emit_move_insn (op0, CONST0_RTX (mode));
10047 else
10048 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10049
10050 if (mode != V4SFmode)
10051 op0 = gen_lowpart (V4SFmode, op0);
10052 m = adjust_address (op1, V2SFmode, 0);
10053 emit_insn (gen_sse_loadlps (op0, op0, m));
10054 m = adjust_address (op1, V2SFmode, 8);
10055 emit_insn (gen_sse_loadhps (op0, op0, m));
10056 }
10057 }
10058 else if (MEM_P (op0))
10059 {
10060 /* If we're optimizing for size, movups is the smallest. */
10061 if (optimize_size)
10062 {
10063 op0 = gen_lowpart (V4SFmode, op0);
10064 op1 = gen_lowpart (V4SFmode, op1);
10065 emit_insn (gen_sse_movups (op0, op1));
10066 return;
10067 }
10068
10069 /* ??? Similar to above, only less clear because of quote
10070 typeless stores unquote. */
10071 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10072 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10073 {
10074 op0 = gen_lowpart (V16QImode, op0);
10075 op1 = gen_lowpart (V16QImode, op1);
10076 emit_insn (gen_sse2_movdqu (op0, op1));
10077 return;
10078 }
10079
10080 if (TARGET_SSE2 && mode == V2DFmode)
10081 {
10082 m = adjust_address (op0, DFmode, 0);
10083 emit_insn (gen_sse2_storelpd (m, op1));
10084 m = adjust_address (op0, DFmode, 8);
10085 emit_insn (gen_sse2_storehpd (m, op1));
10086 }
10087 else
10088 {
10089 if (mode != V4SFmode)
10090 op1 = gen_lowpart (V4SFmode, op1);
10091 m = adjust_address (op0, V2SFmode, 0);
10092 emit_insn (gen_sse_storelps (m, op1));
10093 m = adjust_address (op0, V2SFmode, 8);
10094 emit_insn (gen_sse_storehps (m, op1));
10095 }
10096 }
10097 else
10098 gcc_unreachable ();
10099 }
10100
10101 /* Expand a push in MODE. This is some mode for which we do not support
10102 proper push instructions, at least from the registers that we expect
10103 the value to live in. */
10104
10105 void
10106 ix86_expand_push (enum machine_mode mode, rtx x)
10107 {
10108 rtx tmp;
10109
10110 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10111 GEN_INT (-GET_MODE_SIZE (mode)),
10112 stack_pointer_rtx, 1, OPTAB_DIRECT);
10113 if (tmp != stack_pointer_rtx)
10114 emit_move_insn (stack_pointer_rtx, tmp);
10115
10116 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10117 emit_move_insn (tmp, x);
10118 }
10119
10120 /* Helper function of ix86_fixup_binary_operands to canonicalize
10121 operand order. Returns true if the operands should be swapped. */
10122
10123 static bool
10124 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10125 rtx operands[])
10126 {
10127 rtx dst = operands[0];
10128 rtx src1 = operands[1];
10129 rtx src2 = operands[2];
10130
10131 /* If the operation is not commutative, we can't do anything. */
10132 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10133 return false;
10134
10135 /* Highest priority is that src1 should match dst. */
10136 if (rtx_equal_p (dst, src1))
10137 return false;
10138 if (rtx_equal_p (dst, src2))
10139 return true;
10140
10141 /* Next highest priority is that immediate constants come second. */
10142 if (immediate_operand (src2, mode))
10143 return false;
10144 if (immediate_operand (src1, mode))
10145 return true;
10146
10147 /* Lowest priority is that memory references should come second. */
10148 if (MEM_P (src2))
10149 return false;
10150 if (MEM_P (src1))
10151 return true;
10152
10153 return false;
10154 }
10155
10156
10157 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10158 destination to use for the operation. If different from the true
10159 destination in operands[0], a copy operation will be required. */
10160
10161 rtx
10162 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10163 rtx operands[])
10164 {
10165 rtx dst = operands[0];
10166 rtx src1 = operands[1];
10167 rtx src2 = operands[2];
10168
10169 /* Canonicalize operand order. */
10170 if (ix86_swap_binary_operands_p (code, mode, operands))
10171 {
10172 rtx temp = src1;
10173 src1 = src2;
10174 src2 = temp;
10175 }
10176
10177 /* Both source operands cannot be in memory. */
10178 if (MEM_P (src1) && MEM_P (src2))
10179 {
10180 /* Optimization: Only read from memory once. */
10181 if (rtx_equal_p (src1, src2))
10182 {
10183 src2 = force_reg (mode, src2);
10184 src1 = src2;
10185 }
10186 else
10187 src2 = force_reg (mode, src2);
10188 }
10189
10190 /* If the destination is memory, and we do not have matching source
10191 operands, do things in registers. */
10192 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10193 dst = gen_reg_rtx (mode);
10194
10195 /* Source 1 cannot be a constant. */
10196 if (CONSTANT_P (src1))
10197 src1 = force_reg (mode, src1);
10198
10199 /* Source 1 cannot be a non-matching memory. */
10200 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10201 src1 = force_reg (mode, src1);
10202
10203 operands[1] = src1;
10204 operands[2] = src2;
10205 return dst;
10206 }
10207
10208 /* Similarly, but assume that the destination has already been
10209 set up properly. */
10210
10211 void
10212 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10213 enum machine_mode mode, rtx operands[])
10214 {
10215 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10216 gcc_assert (dst == operands[0]);
10217 }
10218
10219 /* Attempt to expand a binary operator. Make the expansion closer to the
10220 actual machine, then just general_operand, which will allow 3 separate
10221 memory references (one output, two input) in a single insn. */
10222
10223 void
10224 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10225 rtx operands[])
10226 {
10227 rtx src1, src2, dst, op, clob;
10228
10229 dst = ix86_fixup_binary_operands (code, mode, operands);
10230 src1 = operands[1];
10231 src2 = operands[2];
10232
10233 /* Emit the instruction. */
10234
10235 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10236 if (reload_in_progress)
10237 {
10238 /* Reload doesn't know about the flags register, and doesn't know that
10239 it doesn't want to clobber it. We can only do this with PLUS. */
10240 gcc_assert (code == PLUS);
10241 emit_insn (op);
10242 }
10243 else
10244 {
10245 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10246 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10247 }
10248
10249 /* Fix up the destination if needed. */
10250 if (dst != operands[0])
10251 emit_move_insn (operands[0], dst);
10252 }
10253
10254 /* Return TRUE or FALSE depending on whether the binary operator meets the
10255 appropriate constraints. */
10256
10257 int
10258 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10259 rtx operands[3])
10260 {
10261 rtx dst = operands[0];
10262 rtx src1 = operands[1];
10263 rtx src2 = operands[2];
10264
10265 /* Both source operands cannot be in memory. */
10266 if (MEM_P (src1) && MEM_P (src2))
10267 return 0;
10268
10269 /* Canonicalize operand order for commutative operators. */
10270 if (ix86_swap_binary_operands_p (code, mode, operands))
10271 {
10272 rtx temp = src1;
10273 src1 = src2;
10274 src2 = temp;
10275 }
10276
10277 /* If the destination is memory, we must have a matching source operand. */
10278 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10279 return 0;
10280
10281 /* Source 1 cannot be a constant. */
10282 if (CONSTANT_P (src1))
10283 return 0;
10284
10285 /* Source 1 cannot be a non-matching memory. */
10286 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10287 return 0;
10288
10289 return 1;
10290 }
10291
10292 /* Attempt to expand a unary operator. Make the expansion closer to the
10293 actual machine, then just general_operand, which will allow 2 separate
10294 memory references (one output, one input) in a single insn. */
10295
10296 void
10297 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10298 rtx operands[])
10299 {
10300 int matching_memory;
10301 rtx src, dst, op, clob;
10302
10303 dst = operands[0];
10304 src = operands[1];
10305
10306 /* If the destination is memory, and we do not have matching source
10307 operands, do things in registers. */
10308 matching_memory = 0;
10309 if (MEM_P (dst))
10310 {
10311 if (rtx_equal_p (dst, src))
10312 matching_memory = 1;
10313 else
10314 dst = gen_reg_rtx (mode);
10315 }
10316
10317 /* When source operand is memory, destination must match. */
10318 if (MEM_P (src) && !matching_memory)
10319 src = force_reg (mode, src);
10320
10321 /* Emit the instruction. */
10322
10323 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10324 if (reload_in_progress || code == NOT)
10325 {
10326 /* Reload doesn't know about the flags register, and doesn't know that
10327 it doesn't want to clobber it. */
10328 gcc_assert (code == NOT);
10329 emit_insn (op);
10330 }
10331 else
10332 {
10333 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10334 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10335 }
10336
10337 /* Fix up the destination if needed. */
10338 if (dst != operands[0])
10339 emit_move_insn (operands[0], dst);
10340 }
10341
10342 /* Return TRUE or FALSE depending on whether the unary operator meets the
10343 appropriate constraints. */
10344
10345 int
10346 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10347 enum machine_mode mode ATTRIBUTE_UNUSED,
10348 rtx operands[2] ATTRIBUTE_UNUSED)
10349 {
10350 /* If one of operands is memory, source and destination must match. */
10351 if ((MEM_P (operands[0])
10352 || MEM_P (operands[1]))
10353 && ! rtx_equal_p (operands[0], operands[1]))
10354 return FALSE;
10355 return TRUE;
10356 }
10357
10358 /* Post-reload splitter for converting an SF or DFmode value in an
10359 SSE register into an unsigned SImode. */
10360
10361 void
10362 ix86_split_convert_uns_si_sse (rtx operands[])
10363 {
10364 enum machine_mode vecmode;
10365 rtx value, large, zero_or_two31, input, two31, x;
10366
10367 large = operands[1];
10368 zero_or_two31 = operands[2];
10369 input = operands[3];
10370 two31 = operands[4];
10371 vecmode = GET_MODE (large);
10372 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10373
10374 /* Load up the value into the low element. We must ensure that the other
10375 elements are valid floats -- zero is the easiest such value. */
10376 if (MEM_P (input))
10377 {
10378 if (vecmode == V4SFmode)
10379 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10380 else
10381 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10382 }
10383 else
10384 {
10385 input = gen_rtx_REG (vecmode, REGNO (input));
10386 emit_move_insn (value, CONST0_RTX (vecmode));
10387 if (vecmode == V4SFmode)
10388 emit_insn (gen_sse_movss (value, value, input));
10389 else
10390 emit_insn (gen_sse2_movsd (value, value, input));
10391 }
10392
10393 emit_move_insn (large, two31);
10394 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10395
10396 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10397 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10398
10399 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10400 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10401
10402 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10403 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10404
10405 large = gen_rtx_REG (V4SImode, REGNO (large));
10406 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10407
10408 x = gen_rtx_REG (V4SImode, REGNO (value));
10409 if (vecmode == V4SFmode)
10410 emit_insn (gen_sse2_cvttps2dq (x, value));
10411 else
10412 emit_insn (gen_sse2_cvttpd2dq (x, value));
10413 value = x;
10414
10415 emit_insn (gen_xorv4si3 (value, value, large));
10416 }
10417
10418 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10419 Expects the 64-bit DImode to be supplied in a pair of integral
10420 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10421 -mfpmath=sse, !optimize_size only. */
10422
10423 void
10424 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10425 {
10426 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10427 rtx int_xmm, fp_xmm;
10428 rtx biases, exponents;
10429 rtx x;
10430
10431 int_xmm = gen_reg_rtx (V4SImode);
10432 if (TARGET_INTER_UNIT_MOVES)
10433 emit_insn (gen_movdi_to_sse (int_xmm, input));
10434 else if (TARGET_SSE_SPLIT_REGS)
10435 {
10436 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10437 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10438 }
10439 else
10440 {
10441 x = gen_reg_rtx (V2DImode);
10442 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10443 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10444 }
10445
10446 x = gen_rtx_CONST_VECTOR (V4SImode,
10447 gen_rtvec (4, GEN_INT (0x43300000UL),
10448 GEN_INT (0x45300000UL),
10449 const0_rtx, const0_rtx));
10450 exponents = validize_mem (force_const_mem (V4SImode, x));
10451
10452 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10453 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10454
10455 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10456 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10457 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10458 (0x1.0p84 + double(fp_value_hi_xmm)).
10459 Note these exponents differ by 32. */
10460
10461 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10462
10463 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10464 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10465 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10466 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10467 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10468 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10469 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10470 biases = validize_mem (force_const_mem (V2DFmode, biases));
10471 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10472
10473 /* Add the upper and lower DFmode values together. */
10474 if (TARGET_SSE3)
10475 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10476 else
10477 {
10478 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10479 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10480 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10481 }
10482
10483 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10484 }
10485
10486 /* Convert an unsigned SImode value into a DFmode. Only currently used
10487 for SSE, but applicable anywhere. */
10488
10489 void
10490 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10491 {
10492 REAL_VALUE_TYPE TWO31r;
10493 rtx x, fp;
10494
10495 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10496 NULL, 1, OPTAB_DIRECT);
10497
10498 fp = gen_reg_rtx (DFmode);
10499 emit_insn (gen_floatsidf2 (fp, x));
10500
10501 real_ldexp (&TWO31r, &dconst1, 31);
10502 x = const_double_from_real_value (TWO31r, DFmode);
10503
10504 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10505 if (x != target)
10506 emit_move_insn (target, x);
10507 }
10508
10509 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10510 32-bit mode; otherwise we have a direct convert instruction. */
10511
10512 void
10513 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10514 {
10515 REAL_VALUE_TYPE TWO32r;
10516 rtx fp_lo, fp_hi, x;
10517
10518 fp_lo = gen_reg_rtx (DFmode);
10519 fp_hi = gen_reg_rtx (DFmode);
10520
10521 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10522
10523 real_ldexp (&TWO32r, &dconst1, 32);
10524 x = const_double_from_real_value (TWO32r, DFmode);
10525 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10526
10527 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10528
10529 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10530 0, OPTAB_DIRECT);
10531 if (x != target)
10532 emit_move_insn (target, x);
10533 }
10534
10535 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10536 For x86_32, -mfpmath=sse, !optimize_size only. */
10537 void
10538 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10539 {
10540 REAL_VALUE_TYPE ONE16r;
10541 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10542
10543 real_ldexp (&ONE16r, &dconst1, 16);
10544 x = const_double_from_real_value (ONE16r, SFmode);
10545 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10546 NULL, 0, OPTAB_DIRECT);
10547 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10548 NULL, 0, OPTAB_DIRECT);
10549 fp_hi = gen_reg_rtx (SFmode);
10550 fp_lo = gen_reg_rtx (SFmode);
10551 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10552 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10553 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10554 0, OPTAB_DIRECT);
10555 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10556 0, OPTAB_DIRECT);
10557 if (!rtx_equal_p (target, fp_hi))
10558 emit_move_insn (target, fp_hi);
10559 }
10560
10561 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10562 then replicate the value for all elements of the vector
10563 register. */
10564
10565 rtx
10566 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10567 {
10568 rtvec v;
10569 switch (mode)
10570 {
10571 case SImode:
10572 gcc_assert (vect);
10573 v = gen_rtvec (4, value, value, value, value);
10574 return gen_rtx_CONST_VECTOR (V4SImode, v);
10575
10576 case DImode:
10577 gcc_assert (vect);
10578 v = gen_rtvec (2, value, value);
10579 return gen_rtx_CONST_VECTOR (V2DImode, v);
10580
10581 case SFmode:
10582 if (vect)
10583 v = gen_rtvec (4, value, value, value, value);
10584 else
10585 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10586 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10587 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10588
10589 case DFmode:
10590 if (vect)
10591 v = gen_rtvec (2, value, value);
10592 else
10593 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10594 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10595
10596 default:
10597 gcc_unreachable ();
10598 }
10599 }
10600
10601 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10602 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10603 for an SSE register. If VECT is true, then replicate the mask for
10604 all elements of the vector register. If INVERT is true, then create
10605 a mask excluding the sign bit. */
10606
10607 rtx
10608 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10609 {
10610 enum machine_mode vec_mode, imode;
10611 HOST_WIDE_INT hi, lo;
10612 int shift = 63;
10613 rtx v;
10614 rtx mask;
10615
10616 /* Find the sign bit, sign extended to 2*HWI. */
10617 switch (mode)
10618 {
10619 case SImode:
10620 case SFmode:
10621 imode = SImode;
10622 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10623 lo = 0x80000000, hi = lo < 0;
10624 break;
10625
10626 case DImode:
10627 case DFmode:
10628 imode = DImode;
10629 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10630 if (HOST_BITS_PER_WIDE_INT >= 64)
10631 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10632 else
10633 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10634 break;
10635
10636 case TImode:
10637 case TFmode:
10638 imode = TImode;
10639 vec_mode = VOIDmode;
10640 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
10641 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
10642 break;
10643
10644 default:
10645 gcc_unreachable ();
10646 }
10647
10648 if (invert)
10649 lo = ~lo, hi = ~hi;
10650
10651 /* Force this value into the low part of a fp vector constant. */
10652 mask = immed_double_const (lo, hi, imode);
10653 mask = gen_lowpart (mode, mask);
10654
10655 if (vec_mode == VOIDmode)
10656 return force_reg (mode, mask);
10657
10658 v = ix86_build_const_vector (mode, vect, mask);
10659 return force_reg (vec_mode, v);
10660 }
10661
10662 /* Generate code for floating point ABS or NEG. */
10663
10664 void
10665 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10666 rtx operands[])
10667 {
10668 rtx mask, set, use, clob, dst, src;
10669 bool matching_memory;
10670 bool use_sse = false;
10671 bool vector_mode = VECTOR_MODE_P (mode);
10672 enum machine_mode elt_mode = mode;
10673
10674 if (vector_mode)
10675 {
10676 elt_mode = GET_MODE_INNER (mode);
10677 use_sse = true;
10678 }
10679 else if (mode == TFmode)
10680 use_sse = true;
10681 else if (TARGET_SSE_MATH)
10682 use_sse = SSE_FLOAT_MODE_P (mode);
10683
10684 /* NEG and ABS performed with SSE use bitwise mask operations.
10685 Create the appropriate mask now. */
10686 if (use_sse)
10687 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10688 else
10689 mask = NULL_RTX;
10690
10691 dst = operands[0];
10692 src = operands[1];
10693
10694 /* If the destination is memory, and we don't have matching source
10695 operands or we're using the x87, do things in registers. */
10696 matching_memory = false;
10697 if (MEM_P (dst))
10698 {
10699 if (use_sse && rtx_equal_p (dst, src))
10700 matching_memory = true;
10701 else
10702 dst = gen_reg_rtx (mode);
10703 }
10704 if (MEM_P (src) && !matching_memory)
10705 src = force_reg (mode, src);
10706
10707 if (vector_mode)
10708 {
10709 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10710 set = gen_rtx_SET (VOIDmode, dst, set);
10711 emit_insn (set);
10712 }
10713 else
10714 {
10715 set = gen_rtx_fmt_e (code, mode, src);
10716 set = gen_rtx_SET (VOIDmode, dst, set);
10717 if (mask)
10718 {
10719 use = gen_rtx_USE (VOIDmode, mask);
10720 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10721 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10722 gen_rtvec (3, set, use, clob)));
10723 }
10724 else
10725 emit_insn (set);
10726 }
10727
10728 if (dst != operands[0])
10729 emit_move_insn (operands[0], dst);
10730 }
10731
10732 /* Expand a copysign operation. Special case operand 0 being a constant. */
10733
10734 void
10735 ix86_expand_copysign (rtx operands[])
10736 {
10737 enum machine_mode mode, vmode;
10738 rtx dest, op0, op1, mask, nmask;
10739
10740 dest = operands[0];
10741 op0 = operands[1];
10742 op1 = operands[2];
10743
10744 mode = GET_MODE (dest);
10745 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10746
10747 if (GET_CODE (op0) == CONST_DOUBLE)
10748 {
10749 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
10750
10751 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10752 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10753
10754 if (mode == SFmode || mode == DFmode)
10755 {
10756 if (op0 == CONST0_RTX (mode))
10757 op0 = CONST0_RTX (vmode);
10758 else
10759 {
10760 rtvec v;
10761
10762 if (mode == SFmode)
10763 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10764 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10765 else
10766 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10767 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10768 }
10769 }
10770
10771 mask = ix86_build_signbit_mask (mode, 0, 0);
10772
10773 if (mode == SFmode)
10774 copysign_insn = gen_copysignsf3_const;
10775 else if (mode == DFmode)
10776 copysign_insn = gen_copysigndf3_const;
10777 else
10778 copysign_insn = gen_copysigntf3_const;
10779
10780 emit_insn (copysign_insn (dest, op0, op1, mask));
10781 }
10782 else
10783 {
10784 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
10785
10786 nmask = ix86_build_signbit_mask (mode, 0, 1);
10787 mask = ix86_build_signbit_mask (mode, 0, 0);
10788
10789 if (mode == SFmode)
10790 copysign_insn = gen_copysignsf3_var;
10791 else if (mode == DFmode)
10792 copysign_insn = gen_copysigndf3_var;
10793 else
10794 copysign_insn = gen_copysigntf3_var;
10795
10796 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
10797 }
10798 }
10799
10800 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10801 be a constant, and so has already been expanded into a vector constant. */
10802
10803 void
10804 ix86_split_copysign_const (rtx operands[])
10805 {
10806 enum machine_mode mode, vmode;
10807 rtx dest, op0, op1, mask, x;
10808
10809 dest = operands[0];
10810 op0 = operands[1];
10811 op1 = operands[2];
10812 mask = operands[3];
10813
10814 mode = GET_MODE (dest);
10815 vmode = GET_MODE (mask);
10816
10817 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10818 x = gen_rtx_AND (vmode, dest, mask);
10819 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10820
10821 if (op0 != CONST0_RTX (vmode))
10822 {
10823 x = gen_rtx_IOR (vmode, dest, op0);
10824 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10825 }
10826 }
10827
10828 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10829 so we have to do two masks. */
10830
10831 void
10832 ix86_split_copysign_var (rtx operands[])
10833 {
10834 enum machine_mode mode, vmode;
10835 rtx dest, scratch, op0, op1, mask, nmask, x;
10836
10837 dest = operands[0];
10838 scratch = operands[1];
10839 op0 = operands[2];
10840 op1 = operands[3];
10841 nmask = operands[4];
10842 mask = operands[5];
10843
10844 mode = GET_MODE (dest);
10845 vmode = GET_MODE (mask);
10846
10847 if (rtx_equal_p (op0, op1))
10848 {
10849 /* Shouldn't happen often (it's useless, obviously), but when it does
10850 we'd generate incorrect code if we continue below. */
10851 emit_move_insn (dest, op0);
10852 return;
10853 }
10854
10855 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10856 {
10857 gcc_assert (REGNO (op1) == REGNO (scratch));
10858
10859 x = gen_rtx_AND (vmode, scratch, mask);
10860 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10861
10862 dest = mask;
10863 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10864 x = gen_rtx_NOT (vmode, dest);
10865 x = gen_rtx_AND (vmode, x, op0);
10866 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10867 }
10868 else
10869 {
10870 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10871 {
10872 x = gen_rtx_AND (vmode, scratch, mask);
10873 }
10874 else /* alternative 2,4 */
10875 {
10876 gcc_assert (REGNO (mask) == REGNO (scratch));
10877 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10878 x = gen_rtx_AND (vmode, scratch, op1);
10879 }
10880 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10881
10882 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10883 {
10884 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10885 x = gen_rtx_AND (vmode, dest, nmask);
10886 }
10887 else /* alternative 3,4 */
10888 {
10889 gcc_assert (REGNO (nmask) == REGNO (dest));
10890 dest = nmask;
10891 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10892 x = gen_rtx_AND (vmode, dest, op0);
10893 }
10894 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10895 }
10896
10897 x = gen_rtx_IOR (vmode, dest, scratch);
10898 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10899 }
10900
10901 /* Return TRUE or FALSE depending on whether the first SET in INSN
10902 has source and destination with matching CC modes, and that the
10903 CC mode is at least as constrained as REQ_MODE. */
10904
10905 int
10906 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10907 {
10908 rtx set;
10909 enum machine_mode set_mode;
10910
10911 set = PATTERN (insn);
10912 if (GET_CODE (set) == PARALLEL)
10913 set = XVECEXP (set, 0, 0);
10914 gcc_assert (GET_CODE (set) == SET);
10915 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10916
10917 set_mode = GET_MODE (SET_DEST (set));
10918 switch (set_mode)
10919 {
10920 case CCNOmode:
10921 if (req_mode != CCNOmode
10922 && (req_mode != CCmode
10923 || XEXP (SET_SRC (set), 1) != const0_rtx))
10924 return 0;
10925 break;
10926 case CCmode:
10927 if (req_mode == CCGCmode)
10928 return 0;
10929 /* FALLTHRU */
10930 case CCGCmode:
10931 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10932 return 0;
10933 /* FALLTHRU */
10934 case CCGOCmode:
10935 if (req_mode == CCZmode)
10936 return 0;
10937 /* FALLTHRU */
10938 case CCZmode:
10939 break;
10940
10941 default:
10942 gcc_unreachable ();
10943 }
10944
10945 return (GET_MODE (SET_SRC (set)) == set_mode);
10946 }
10947
10948 /* Generate insn patterns to do an integer compare of OPERANDS. */
10949
10950 static rtx
10951 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10952 {
10953 enum machine_mode cmpmode;
10954 rtx tmp, flags;
10955
10956 cmpmode = SELECT_CC_MODE (code, op0, op1);
10957 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10958
10959 /* This is very simple, but making the interface the same as in the
10960 FP case makes the rest of the code easier. */
10961 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10962 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10963
10964 /* Return the test that should be put into the flags user, i.e.
10965 the bcc, scc, or cmov instruction. */
10966 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10967 }
10968
10969 /* Figure out whether to use ordered or unordered fp comparisons.
10970 Return the appropriate mode to use. */
10971
10972 enum machine_mode
10973 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10974 {
10975 /* ??? In order to make all comparisons reversible, we do all comparisons
10976 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10977 all forms trapping and nontrapping comparisons, we can make inequality
10978 comparisons trapping again, since it results in better code when using
10979 FCOM based compares. */
10980 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10981 }
10982
10983 enum machine_mode
10984 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10985 {
10986 enum machine_mode mode = GET_MODE (op0);
10987
10988 if (SCALAR_FLOAT_MODE_P (mode))
10989 {
10990 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
10991 return ix86_fp_compare_mode (code);
10992 }
10993
10994 switch (code)
10995 {
10996 /* Only zero flag is needed. */
10997 case EQ: /* ZF=0 */
10998 case NE: /* ZF!=0 */
10999 return CCZmode;
11000 /* Codes needing carry flag. */
11001 case GEU: /* CF=0 */
11002 case GTU: /* CF=0 & ZF=0 */
11003 case LTU: /* CF=1 */
11004 case LEU: /* CF=1 | ZF=1 */
11005 return CCmode;
11006 /* Codes possibly doable only with sign flag when
11007 comparing against zero. */
11008 case GE: /* SF=OF or SF=0 */
11009 case LT: /* SF<>OF or SF=1 */
11010 if (op1 == const0_rtx)
11011 return CCGOCmode;
11012 else
11013 /* For other cases Carry flag is not required. */
11014 return CCGCmode;
11015 /* Codes doable only with sign flag when comparing
11016 against zero, but we miss jump instruction for it
11017 so we need to use relational tests against overflow
11018 that thus needs to be zero. */
11019 case GT: /* ZF=0 & SF=OF */
11020 case LE: /* ZF=1 | SF<>OF */
11021 if (op1 == const0_rtx)
11022 return CCNOmode;
11023 else
11024 return CCGCmode;
11025 /* strcmp pattern do (use flags) and combine may ask us for proper
11026 mode. */
11027 case USE:
11028 return CCmode;
11029 default:
11030 gcc_unreachable ();
11031 }
11032 }
11033
11034 /* Return the fixed registers used for condition codes. */
11035
11036 static bool
11037 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11038 {
11039 *p1 = FLAGS_REG;
11040 *p2 = FPSR_REG;
11041 return true;
11042 }
11043
11044 /* If two condition code modes are compatible, return a condition code
11045 mode which is compatible with both. Otherwise, return
11046 VOIDmode. */
11047
11048 static enum machine_mode
11049 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11050 {
11051 if (m1 == m2)
11052 return m1;
11053
11054 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11055 return VOIDmode;
11056
11057 if ((m1 == CCGCmode && m2 == CCGOCmode)
11058 || (m1 == CCGOCmode && m2 == CCGCmode))
11059 return CCGCmode;
11060
11061 switch (m1)
11062 {
11063 default:
11064 gcc_unreachable ();
11065
11066 case CCmode:
11067 case CCGCmode:
11068 case CCGOCmode:
11069 case CCNOmode:
11070 case CCAmode:
11071 case CCCmode:
11072 case CCOmode:
11073 case CCSmode:
11074 case CCZmode:
11075 switch (m2)
11076 {
11077 default:
11078 return VOIDmode;
11079
11080 case CCmode:
11081 case CCGCmode:
11082 case CCGOCmode:
11083 case CCNOmode:
11084 case CCAmode:
11085 case CCCmode:
11086 case CCOmode:
11087 case CCSmode:
11088 case CCZmode:
11089 return CCmode;
11090 }
11091
11092 case CCFPmode:
11093 case CCFPUmode:
11094 /* These are only compatible with themselves, which we already
11095 checked above. */
11096 return VOIDmode;
11097 }
11098 }
11099
11100 /* Split comparison code CODE into comparisons we can do using branch
11101 instructions. BYPASS_CODE is comparison code for branch that will
11102 branch around FIRST_CODE and SECOND_CODE. If some of branches
11103 is not required, set value to UNKNOWN.
11104 We never require more than two branches. */
11105
11106 void
11107 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11108 enum rtx_code *first_code,
11109 enum rtx_code *second_code)
11110 {
11111 *first_code = code;
11112 *bypass_code = UNKNOWN;
11113 *second_code = UNKNOWN;
11114
11115 /* The fcomi comparison sets flags as follows:
11116
11117 cmp ZF PF CF
11118 > 0 0 0
11119 < 0 0 1
11120 = 1 0 0
11121 un 1 1 1 */
11122
11123 switch (code)
11124 {
11125 case GT: /* GTU - CF=0 & ZF=0 */
11126 case GE: /* GEU - CF=0 */
11127 case ORDERED: /* PF=0 */
11128 case UNORDERED: /* PF=1 */
11129 case UNEQ: /* EQ - ZF=1 */
11130 case UNLT: /* LTU - CF=1 */
11131 case UNLE: /* LEU - CF=1 | ZF=1 */
11132 case LTGT: /* EQ - ZF=0 */
11133 break;
11134 case LT: /* LTU - CF=1 - fails on unordered */
11135 *first_code = UNLT;
11136 *bypass_code = UNORDERED;
11137 break;
11138 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11139 *first_code = UNLE;
11140 *bypass_code = UNORDERED;
11141 break;
11142 case EQ: /* EQ - ZF=1 - fails on unordered */
11143 *first_code = UNEQ;
11144 *bypass_code = UNORDERED;
11145 break;
11146 case NE: /* NE - ZF=0 - fails on unordered */
11147 *first_code = LTGT;
11148 *second_code = UNORDERED;
11149 break;
11150 case UNGE: /* GEU - CF=0 - fails on unordered */
11151 *first_code = GE;
11152 *second_code = UNORDERED;
11153 break;
11154 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11155 *first_code = GT;
11156 *second_code = UNORDERED;
11157 break;
11158 default:
11159 gcc_unreachable ();
11160 }
11161 if (!TARGET_IEEE_FP)
11162 {
11163 *second_code = UNKNOWN;
11164 *bypass_code = UNKNOWN;
11165 }
11166 }
11167
11168 /* Return cost of comparison done fcom + arithmetics operations on AX.
11169 All following functions do use number of instructions as a cost metrics.
11170 In future this should be tweaked to compute bytes for optimize_size and
11171 take into account performance of various instructions on various CPUs. */
11172 static int
11173 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11174 {
11175 if (!TARGET_IEEE_FP)
11176 return 4;
11177 /* The cost of code output by ix86_expand_fp_compare. */
11178 switch (code)
11179 {
11180 case UNLE:
11181 case UNLT:
11182 case LTGT:
11183 case GT:
11184 case GE:
11185 case UNORDERED:
11186 case ORDERED:
11187 case UNEQ:
11188 return 4;
11189 break;
11190 case LT:
11191 case NE:
11192 case EQ:
11193 case UNGE:
11194 return 5;
11195 break;
11196 case LE:
11197 case UNGT:
11198 return 6;
11199 break;
11200 default:
11201 gcc_unreachable ();
11202 }
11203 }
11204
11205 /* Return cost of comparison done using fcomi operation.
11206 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11207 static int
11208 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11209 {
11210 enum rtx_code bypass_code, first_code, second_code;
11211 /* Return arbitrarily high cost when instruction is not supported - this
11212 prevents gcc from using it. */
11213 if (!TARGET_CMOVE)
11214 return 1024;
11215 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11216 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11217 }
11218
11219 /* Return cost of comparison done using sahf operation.
11220 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11221 static int
11222 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11223 {
11224 enum rtx_code bypass_code, first_code, second_code;
11225 /* Return arbitrarily high cost when instruction is not preferred - this
11226 avoids gcc from using it. */
11227 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11228 return 1024;
11229 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11230 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11231 }
11232
11233 /* Compute cost of the comparison done using any method.
11234 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11235 static int
11236 ix86_fp_comparison_cost (enum rtx_code code)
11237 {
11238 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11239 int min;
11240
11241 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11242 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11243
11244 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11245 if (min > sahf_cost)
11246 min = sahf_cost;
11247 if (min > fcomi_cost)
11248 min = fcomi_cost;
11249 return min;
11250 }
11251
11252 /* Return true if we should use an FCOMI instruction for this
11253 fp comparison. */
11254
11255 int
11256 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11257 {
11258 enum rtx_code swapped_code = swap_condition (code);
11259
11260 return ((ix86_fp_comparison_cost (code)
11261 == ix86_fp_comparison_fcomi_cost (code))
11262 || (ix86_fp_comparison_cost (swapped_code)
11263 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11264 }
11265
11266 /* Swap, force into registers, or otherwise massage the two operands
11267 to a fp comparison. The operands are updated in place; the new
11268 comparison code is returned. */
11269
11270 static enum rtx_code
11271 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11272 {
11273 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11274 rtx op0 = *pop0, op1 = *pop1;
11275 enum machine_mode op_mode = GET_MODE (op0);
11276 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11277
11278 /* All of the unordered compare instructions only work on registers.
11279 The same is true of the fcomi compare instructions. The XFmode
11280 compare instructions require registers except when comparing
11281 against zero or when converting operand 1 from fixed point to
11282 floating point. */
11283
11284 if (!is_sse
11285 && (fpcmp_mode == CCFPUmode
11286 || (op_mode == XFmode
11287 && ! (standard_80387_constant_p (op0) == 1
11288 || standard_80387_constant_p (op1) == 1)
11289 && GET_CODE (op1) != FLOAT)
11290 || ix86_use_fcomi_compare (code)))
11291 {
11292 op0 = force_reg (op_mode, op0);
11293 op1 = force_reg (op_mode, op1);
11294 }
11295 else
11296 {
11297 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11298 things around if they appear profitable, otherwise force op0
11299 into a register. */
11300
11301 if (standard_80387_constant_p (op0) == 0
11302 || (MEM_P (op0)
11303 && ! (standard_80387_constant_p (op1) == 0
11304 || MEM_P (op1))))
11305 {
11306 rtx tmp;
11307 tmp = op0, op0 = op1, op1 = tmp;
11308 code = swap_condition (code);
11309 }
11310
11311 if (!REG_P (op0))
11312 op0 = force_reg (op_mode, op0);
11313
11314 if (CONSTANT_P (op1))
11315 {
11316 int tmp = standard_80387_constant_p (op1);
11317 if (tmp == 0)
11318 op1 = validize_mem (force_const_mem (op_mode, op1));
11319 else if (tmp == 1)
11320 {
11321 if (TARGET_CMOVE)
11322 op1 = force_reg (op_mode, op1);
11323 }
11324 else
11325 op1 = force_reg (op_mode, op1);
11326 }
11327 }
11328
11329 /* Try to rearrange the comparison to make it cheaper. */
11330 if (ix86_fp_comparison_cost (code)
11331 > ix86_fp_comparison_cost (swap_condition (code))
11332 && (REG_P (op1) || !no_new_pseudos))
11333 {
11334 rtx tmp;
11335 tmp = op0, op0 = op1, op1 = tmp;
11336 code = swap_condition (code);
11337 if (!REG_P (op0))
11338 op0 = force_reg (op_mode, op0);
11339 }
11340
11341 *pop0 = op0;
11342 *pop1 = op1;
11343 return code;
11344 }
11345
11346 /* Convert comparison codes we use to represent FP comparison to integer
11347 code that will result in proper branch. Return UNKNOWN if no such code
11348 is available. */
11349
11350 enum rtx_code
11351 ix86_fp_compare_code_to_integer (enum rtx_code code)
11352 {
11353 switch (code)
11354 {
11355 case GT:
11356 return GTU;
11357 case GE:
11358 return GEU;
11359 case ORDERED:
11360 case UNORDERED:
11361 return code;
11362 break;
11363 case UNEQ:
11364 return EQ;
11365 break;
11366 case UNLT:
11367 return LTU;
11368 break;
11369 case UNLE:
11370 return LEU;
11371 break;
11372 case LTGT:
11373 return NE;
11374 break;
11375 default:
11376 return UNKNOWN;
11377 }
11378 }
11379
11380 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11381
11382 static rtx
11383 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11384 rtx *second_test, rtx *bypass_test)
11385 {
11386 enum machine_mode fpcmp_mode, intcmp_mode;
11387 rtx tmp, tmp2;
11388 int cost = ix86_fp_comparison_cost (code);
11389 enum rtx_code bypass_code, first_code, second_code;
11390
11391 fpcmp_mode = ix86_fp_compare_mode (code);
11392 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11393
11394 if (second_test)
11395 *second_test = NULL_RTX;
11396 if (bypass_test)
11397 *bypass_test = NULL_RTX;
11398
11399 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11400
11401 /* Do fcomi/sahf based test when profitable. */
11402 if ((TARGET_CMOVE || TARGET_SAHF)
11403 && (bypass_code == UNKNOWN || bypass_test)
11404 && (second_code == UNKNOWN || second_test)
11405 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11406 {
11407 if (TARGET_CMOVE)
11408 {
11409 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11410 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11411 tmp);
11412 emit_insn (tmp);
11413 }
11414 else
11415 {
11416 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11417 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11418 if (!scratch)
11419 scratch = gen_reg_rtx (HImode);
11420 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11421 emit_insn (gen_x86_sahf_1 (scratch));
11422 }
11423
11424 /* The FP codes work out to act like unsigned. */
11425 intcmp_mode = fpcmp_mode;
11426 code = first_code;
11427 if (bypass_code != UNKNOWN)
11428 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11429 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11430 const0_rtx);
11431 if (second_code != UNKNOWN)
11432 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11433 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11434 const0_rtx);
11435 }
11436 else
11437 {
11438 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11439 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11440 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11441 if (!scratch)
11442 scratch = gen_reg_rtx (HImode);
11443 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11444
11445 /* In the unordered case, we have to check C2 for NaN's, which
11446 doesn't happen to work out to anything nice combination-wise.
11447 So do some bit twiddling on the value we've got in AH to come
11448 up with an appropriate set of condition codes. */
11449
11450 intcmp_mode = CCNOmode;
11451 switch (code)
11452 {
11453 case GT:
11454 case UNGT:
11455 if (code == GT || !TARGET_IEEE_FP)
11456 {
11457 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11458 code = EQ;
11459 }
11460 else
11461 {
11462 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11463 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11464 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11465 intcmp_mode = CCmode;
11466 code = GEU;
11467 }
11468 break;
11469 case LT:
11470 case UNLT:
11471 if (code == LT && TARGET_IEEE_FP)
11472 {
11473 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11474 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11475 intcmp_mode = CCmode;
11476 code = EQ;
11477 }
11478 else
11479 {
11480 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11481 code = NE;
11482 }
11483 break;
11484 case GE:
11485 case UNGE:
11486 if (code == GE || !TARGET_IEEE_FP)
11487 {
11488 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11489 code = EQ;
11490 }
11491 else
11492 {
11493 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11494 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11495 GEN_INT (0x01)));
11496 code = NE;
11497 }
11498 break;
11499 case LE:
11500 case UNLE:
11501 if (code == LE && TARGET_IEEE_FP)
11502 {
11503 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11504 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11505 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11506 intcmp_mode = CCmode;
11507 code = LTU;
11508 }
11509 else
11510 {
11511 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11512 code = NE;
11513 }
11514 break;
11515 case EQ:
11516 case UNEQ:
11517 if (code == EQ && TARGET_IEEE_FP)
11518 {
11519 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11520 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11521 intcmp_mode = CCmode;
11522 code = EQ;
11523 }
11524 else
11525 {
11526 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11527 code = NE;
11528 break;
11529 }
11530 break;
11531 case NE:
11532 case LTGT:
11533 if (code == NE && TARGET_IEEE_FP)
11534 {
11535 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11536 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11537 GEN_INT (0x40)));
11538 code = NE;
11539 }
11540 else
11541 {
11542 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11543 code = EQ;
11544 }
11545 break;
11546
11547 case UNORDERED:
11548 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11549 code = NE;
11550 break;
11551 case ORDERED:
11552 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11553 code = EQ;
11554 break;
11555
11556 default:
11557 gcc_unreachable ();
11558 }
11559 }
11560
11561 /* Return the test that should be put into the flags user, i.e.
11562 the bcc, scc, or cmov instruction. */
11563 return gen_rtx_fmt_ee (code, VOIDmode,
11564 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11565 const0_rtx);
11566 }
11567
11568 rtx
11569 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11570 {
11571 rtx op0, op1, ret;
11572 op0 = ix86_compare_op0;
11573 op1 = ix86_compare_op1;
11574
11575 if (second_test)
11576 *second_test = NULL_RTX;
11577 if (bypass_test)
11578 *bypass_test = NULL_RTX;
11579
11580 if (ix86_compare_emitted)
11581 {
11582 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11583 ix86_compare_emitted = NULL_RTX;
11584 }
11585 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11586 {
11587 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11588 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11589 second_test, bypass_test);
11590 }
11591 else
11592 ret = ix86_expand_int_compare (code, op0, op1);
11593
11594 return ret;
11595 }
11596
11597 /* Return true if the CODE will result in nontrivial jump sequence. */
11598 bool
11599 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11600 {
11601 enum rtx_code bypass_code, first_code, second_code;
11602 if (!TARGET_CMOVE)
11603 return true;
11604 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11605 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11606 }
11607
11608 void
11609 ix86_expand_branch (enum rtx_code code, rtx label)
11610 {
11611 rtx tmp;
11612
11613 /* If we have emitted a compare insn, go straight to simple.
11614 ix86_expand_compare won't emit anything if ix86_compare_emitted
11615 is non NULL. */
11616 if (ix86_compare_emitted)
11617 goto simple;
11618
11619 switch (GET_MODE (ix86_compare_op0))
11620 {
11621 case QImode:
11622 case HImode:
11623 case SImode:
11624 simple:
11625 tmp = ix86_expand_compare (code, NULL, NULL);
11626 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11627 gen_rtx_LABEL_REF (VOIDmode, label),
11628 pc_rtx);
11629 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11630 return;
11631
11632 case SFmode:
11633 case DFmode:
11634 case XFmode:
11635 {
11636 rtvec vec;
11637 int use_fcomi;
11638 enum rtx_code bypass_code, first_code, second_code;
11639
11640 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11641 &ix86_compare_op1);
11642
11643 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11644
11645 /* Check whether we will use the natural sequence with one jump. If
11646 so, we can expand jump early. Otherwise delay expansion by
11647 creating compound insn to not confuse optimizers. */
11648 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11649 && TARGET_CMOVE)
11650 {
11651 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11652 gen_rtx_LABEL_REF (VOIDmode, label),
11653 pc_rtx, NULL_RTX, NULL_RTX);
11654 }
11655 else
11656 {
11657 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11658 ix86_compare_op0, ix86_compare_op1);
11659 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11660 gen_rtx_LABEL_REF (VOIDmode, label),
11661 pc_rtx);
11662 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11663
11664 use_fcomi = ix86_use_fcomi_compare (code);
11665 vec = rtvec_alloc (3 + !use_fcomi);
11666 RTVEC_ELT (vec, 0) = tmp;
11667 RTVEC_ELT (vec, 1)
11668 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11669 RTVEC_ELT (vec, 2)
11670 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11671 if (! use_fcomi)
11672 RTVEC_ELT (vec, 3)
11673 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11674
11675 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11676 }
11677 return;
11678 }
11679
11680 case DImode:
11681 if (TARGET_64BIT)
11682 goto simple;
11683 case TImode:
11684 /* Expand DImode branch into multiple compare+branch. */
11685 {
11686 rtx lo[2], hi[2], label2;
11687 enum rtx_code code1, code2, code3;
11688 enum machine_mode submode;
11689
11690 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11691 {
11692 tmp = ix86_compare_op0;
11693 ix86_compare_op0 = ix86_compare_op1;
11694 ix86_compare_op1 = tmp;
11695 code = swap_condition (code);
11696 }
11697 if (GET_MODE (ix86_compare_op0) == DImode)
11698 {
11699 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11700 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11701 submode = SImode;
11702 }
11703 else
11704 {
11705 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11706 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11707 submode = DImode;
11708 }
11709
11710 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11711 avoid two branches. This costs one extra insn, so disable when
11712 optimizing for size. */
11713
11714 if ((code == EQ || code == NE)
11715 && (!optimize_size
11716 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11717 {
11718 rtx xor0, xor1;
11719
11720 xor1 = hi[0];
11721 if (hi[1] != const0_rtx)
11722 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11723 NULL_RTX, 0, OPTAB_WIDEN);
11724
11725 xor0 = lo[0];
11726 if (lo[1] != const0_rtx)
11727 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11728 NULL_RTX, 0, OPTAB_WIDEN);
11729
11730 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11731 NULL_RTX, 0, OPTAB_WIDEN);
11732
11733 ix86_compare_op0 = tmp;
11734 ix86_compare_op1 = const0_rtx;
11735 ix86_expand_branch (code, label);
11736 return;
11737 }
11738
11739 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11740 op1 is a constant and the low word is zero, then we can just
11741 examine the high word. */
11742
11743 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11744 switch (code)
11745 {
11746 case LT: case LTU: case GE: case GEU:
11747 ix86_compare_op0 = hi[0];
11748 ix86_compare_op1 = hi[1];
11749 ix86_expand_branch (code, label);
11750 return;
11751 default:
11752 break;
11753 }
11754
11755 /* Otherwise, we need two or three jumps. */
11756
11757 label2 = gen_label_rtx ();
11758
11759 code1 = code;
11760 code2 = swap_condition (code);
11761 code3 = unsigned_condition (code);
11762
11763 switch (code)
11764 {
11765 case LT: case GT: case LTU: case GTU:
11766 break;
11767
11768 case LE: code1 = LT; code2 = GT; break;
11769 case GE: code1 = GT; code2 = LT; break;
11770 case LEU: code1 = LTU; code2 = GTU; break;
11771 case GEU: code1 = GTU; code2 = LTU; break;
11772
11773 case EQ: code1 = UNKNOWN; code2 = NE; break;
11774 case NE: code2 = UNKNOWN; break;
11775
11776 default:
11777 gcc_unreachable ();
11778 }
11779
11780 /*
11781 * a < b =>
11782 * if (hi(a) < hi(b)) goto true;
11783 * if (hi(a) > hi(b)) goto false;
11784 * if (lo(a) < lo(b)) goto true;
11785 * false:
11786 */
11787
11788 ix86_compare_op0 = hi[0];
11789 ix86_compare_op1 = hi[1];
11790
11791 if (code1 != UNKNOWN)
11792 ix86_expand_branch (code1, label);
11793 if (code2 != UNKNOWN)
11794 ix86_expand_branch (code2, label2);
11795
11796 ix86_compare_op0 = lo[0];
11797 ix86_compare_op1 = lo[1];
11798 ix86_expand_branch (code3, label);
11799
11800 if (code2 != UNKNOWN)
11801 emit_label (label2);
11802 return;
11803 }
11804
11805 default:
11806 gcc_unreachable ();
11807 }
11808 }
11809
11810 /* Split branch based on floating point condition. */
11811 void
11812 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11813 rtx target1, rtx target2, rtx tmp, rtx pushed)
11814 {
11815 rtx second, bypass;
11816 rtx label = NULL_RTX;
11817 rtx condition;
11818 int bypass_probability = -1, second_probability = -1, probability = -1;
11819 rtx i;
11820
11821 if (target2 != pc_rtx)
11822 {
11823 rtx tmp = target2;
11824 code = reverse_condition_maybe_unordered (code);
11825 target2 = target1;
11826 target1 = tmp;
11827 }
11828
11829 condition = ix86_expand_fp_compare (code, op1, op2,
11830 tmp, &second, &bypass);
11831
11832 /* Remove pushed operand from stack. */
11833 if (pushed)
11834 ix86_free_from_memory (GET_MODE (pushed));
11835
11836 if (split_branch_probability >= 0)
11837 {
11838 /* Distribute the probabilities across the jumps.
11839 Assume the BYPASS and SECOND to be always test
11840 for UNORDERED. */
11841 probability = split_branch_probability;
11842
11843 /* Value of 1 is low enough to make no need for probability
11844 to be updated. Later we may run some experiments and see
11845 if unordered values are more frequent in practice. */
11846 if (bypass)
11847 bypass_probability = 1;
11848 if (second)
11849 second_probability = 1;
11850 }
11851 if (bypass != NULL_RTX)
11852 {
11853 label = gen_label_rtx ();
11854 i = emit_jump_insn (gen_rtx_SET
11855 (VOIDmode, pc_rtx,
11856 gen_rtx_IF_THEN_ELSE (VOIDmode,
11857 bypass,
11858 gen_rtx_LABEL_REF (VOIDmode,
11859 label),
11860 pc_rtx)));
11861 if (bypass_probability >= 0)
11862 REG_NOTES (i)
11863 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11864 GEN_INT (bypass_probability),
11865 REG_NOTES (i));
11866 }
11867 i = emit_jump_insn (gen_rtx_SET
11868 (VOIDmode, pc_rtx,
11869 gen_rtx_IF_THEN_ELSE (VOIDmode,
11870 condition, target1, target2)));
11871 if (probability >= 0)
11872 REG_NOTES (i)
11873 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11874 GEN_INT (probability),
11875 REG_NOTES (i));
11876 if (second != NULL_RTX)
11877 {
11878 i = emit_jump_insn (gen_rtx_SET
11879 (VOIDmode, pc_rtx,
11880 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11881 target2)));
11882 if (second_probability >= 0)
11883 REG_NOTES (i)
11884 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11885 GEN_INT (second_probability),
11886 REG_NOTES (i));
11887 }
11888 if (label != NULL_RTX)
11889 emit_label (label);
11890 }
11891
11892 int
11893 ix86_expand_setcc (enum rtx_code code, rtx dest)
11894 {
11895 rtx ret, tmp, tmpreg, equiv;
11896 rtx second_test, bypass_test;
11897
11898 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11899 return 0; /* FAIL */
11900
11901 gcc_assert (GET_MODE (dest) == QImode);
11902
11903 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11904 PUT_MODE (ret, QImode);
11905
11906 tmp = dest;
11907 tmpreg = dest;
11908
11909 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11910 if (bypass_test || second_test)
11911 {
11912 rtx test = second_test;
11913 int bypass = 0;
11914 rtx tmp2 = gen_reg_rtx (QImode);
11915 if (bypass_test)
11916 {
11917 gcc_assert (!second_test);
11918 test = bypass_test;
11919 bypass = 1;
11920 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11921 }
11922 PUT_MODE (test, QImode);
11923 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11924
11925 if (bypass)
11926 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11927 else
11928 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11929 }
11930
11931 /* Attach a REG_EQUAL note describing the comparison result. */
11932 if (ix86_compare_op0 && ix86_compare_op1)
11933 {
11934 equiv = simplify_gen_relational (code, QImode,
11935 GET_MODE (ix86_compare_op0),
11936 ix86_compare_op0, ix86_compare_op1);
11937 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11938 }
11939
11940 return 1; /* DONE */
11941 }
11942
11943 /* Expand comparison setting or clearing carry flag. Return true when
11944 successful and set pop for the operation. */
11945 static bool
11946 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11947 {
11948 enum machine_mode mode =
11949 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11950
11951 /* Do not handle DImode compares that go through special path.
11952 Also we can't deal with FP compares yet. This is possible to add. */
11953 if (mode == (TARGET_64BIT ? TImode : DImode))
11954 return false;
11955
11956 if (SCALAR_FLOAT_MODE_P (mode))
11957 {
11958 rtx second_test = NULL, bypass_test = NULL;
11959 rtx compare_op, compare_seq;
11960
11961 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11962
11963 /* Shortcut: following common codes never translate
11964 into carry flag compares. */
11965 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11966 || code == ORDERED || code == UNORDERED)
11967 return false;
11968
11969 /* These comparisons require zero flag; swap operands so they won't. */
11970 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11971 && !TARGET_IEEE_FP)
11972 {
11973 rtx tmp = op0;
11974 op0 = op1;
11975 op1 = tmp;
11976 code = swap_condition (code);
11977 }
11978
11979 /* Try to expand the comparison and verify that we end up with carry flag
11980 based comparison. This is fails to be true only when we decide to expand
11981 comparison using arithmetic that is not too common scenario. */
11982 start_sequence ();
11983 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11984 &second_test, &bypass_test);
11985 compare_seq = get_insns ();
11986 end_sequence ();
11987
11988 if (second_test || bypass_test)
11989 return false;
11990 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11991 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11992 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11993 else
11994 code = GET_CODE (compare_op);
11995 if (code != LTU && code != GEU)
11996 return false;
11997 emit_insn (compare_seq);
11998 *pop = compare_op;
11999 return true;
12000 }
12001 if (!INTEGRAL_MODE_P (mode))
12002 return false;
12003 switch (code)
12004 {
12005 case LTU:
12006 case GEU:
12007 break;
12008
12009 /* Convert a==0 into (unsigned)a<1. */
12010 case EQ:
12011 case NE:
12012 if (op1 != const0_rtx)
12013 return false;
12014 op1 = const1_rtx;
12015 code = (code == EQ ? LTU : GEU);
12016 break;
12017
12018 /* Convert a>b into b<a or a>=b-1. */
12019 case GTU:
12020 case LEU:
12021 if (CONST_INT_P (op1))
12022 {
12023 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12024 /* Bail out on overflow. We still can swap operands but that
12025 would force loading of the constant into register. */
12026 if (op1 == const0_rtx
12027 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12028 return false;
12029 code = (code == GTU ? GEU : LTU);
12030 }
12031 else
12032 {
12033 rtx tmp = op1;
12034 op1 = op0;
12035 op0 = tmp;
12036 code = (code == GTU ? LTU : GEU);
12037 }
12038 break;
12039
12040 /* Convert a>=0 into (unsigned)a<0x80000000. */
12041 case LT:
12042 case GE:
12043 if (mode == DImode || op1 != const0_rtx)
12044 return false;
12045 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12046 code = (code == LT ? GEU : LTU);
12047 break;
12048 case LE:
12049 case GT:
12050 if (mode == DImode || op1 != constm1_rtx)
12051 return false;
12052 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12053 code = (code == LE ? GEU : LTU);
12054 break;
12055
12056 default:
12057 return false;
12058 }
12059 /* Swapping operands may cause constant to appear as first operand. */
12060 if (!nonimmediate_operand (op0, VOIDmode))
12061 {
12062 if (no_new_pseudos)
12063 return false;
12064 op0 = force_reg (mode, op0);
12065 }
12066 ix86_compare_op0 = op0;
12067 ix86_compare_op1 = op1;
12068 *pop = ix86_expand_compare (code, NULL, NULL);
12069 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12070 return true;
12071 }
12072
12073 int
12074 ix86_expand_int_movcc (rtx operands[])
12075 {
12076 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12077 rtx compare_seq, compare_op;
12078 rtx second_test, bypass_test;
12079 enum machine_mode mode = GET_MODE (operands[0]);
12080 bool sign_bit_compare_p = false;;
12081
12082 start_sequence ();
12083 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12084 compare_seq = get_insns ();
12085 end_sequence ();
12086
12087 compare_code = GET_CODE (compare_op);
12088
12089 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12090 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12091 sign_bit_compare_p = true;
12092
12093 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12094 HImode insns, we'd be swallowed in word prefix ops. */
12095
12096 if ((mode != HImode || TARGET_FAST_PREFIX)
12097 && (mode != (TARGET_64BIT ? TImode : DImode))
12098 && CONST_INT_P (operands[2])
12099 && CONST_INT_P (operands[3]))
12100 {
12101 rtx out = operands[0];
12102 HOST_WIDE_INT ct = INTVAL (operands[2]);
12103 HOST_WIDE_INT cf = INTVAL (operands[3]);
12104 HOST_WIDE_INT diff;
12105
12106 diff = ct - cf;
12107 /* Sign bit compares are better done using shifts than we do by using
12108 sbb. */
12109 if (sign_bit_compare_p
12110 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12111 ix86_compare_op1, &compare_op))
12112 {
12113 /* Detect overlap between destination and compare sources. */
12114 rtx tmp = out;
12115
12116 if (!sign_bit_compare_p)
12117 {
12118 bool fpcmp = false;
12119
12120 compare_code = GET_CODE (compare_op);
12121
12122 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12123 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12124 {
12125 fpcmp = true;
12126 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12127 }
12128
12129 /* To simplify rest of code, restrict to the GEU case. */
12130 if (compare_code == LTU)
12131 {
12132 HOST_WIDE_INT tmp = ct;
12133 ct = cf;
12134 cf = tmp;
12135 compare_code = reverse_condition (compare_code);
12136 code = reverse_condition (code);
12137 }
12138 else
12139 {
12140 if (fpcmp)
12141 PUT_CODE (compare_op,
12142 reverse_condition_maybe_unordered
12143 (GET_CODE (compare_op)));
12144 else
12145 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12146 }
12147 diff = ct - cf;
12148
12149 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12150 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12151 tmp = gen_reg_rtx (mode);
12152
12153 if (mode == DImode)
12154 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12155 else
12156 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12157 }
12158 else
12159 {
12160 if (code == GT || code == GE)
12161 code = reverse_condition (code);
12162 else
12163 {
12164 HOST_WIDE_INT tmp = ct;
12165 ct = cf;
12166 cf = tmp;
12167 diff = ct - cf;
12168 }
12169 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12170 ix86_compare_op1, VOIDmode, 0, -1);
12171 }
12172
12173 if (diff == 1)
12174 {
12175 /*
12176 * cmpl op0,op1
12177 * sbbl dest,dest
12178 * [addl dest, ct]
12179 *
12180 * Size 5 - 8.
12181 */
12182 if (ct)
12183 tmp = expand_simple_binop (mode, PLUS,
12184 tmp, GEN_INT (ct),
12185 copy_rtx (tmp), 1, OPTAB_DIRECT);
12186 }
12187 else if (cf == -1)
12188 {
12189 /*
12190 * cmpl op0,op1
12191 * sbbl dest,dest
12192 * orl $ct, dest
12193 *
12194 * Size 8.
12195 */
12196 tmp = expand_simple_binop (mode, IOR,
12197 tmp, GEN_INT (ct),
12198 copy_rtx (tmp), 1, OPTAB_DIRECT);
12199 }
12200 else if (diff == -1 && ct)
12201 {
12202 /*
12203 * cmpl op0,op1
12204 * sbbl dest,dest
12205 * notl dest
12206 * [addl dest, cf]
12207 *
12208 * Size 8 - 11.
12209 */
12210 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12211 if (cf)
12212 tmp = expand_simple_binop (mode, PLUS,
12213 copy_rtx (tmp), GEN_INT (cf),
12214 copy_rtx (tmp), 1, OPTAB_DIRECT);
12215 }
12216 else
12217 {
12218 /*
12219 * cmpl op0,op1
12220 * sbbl dest,dest
12221 * [notl dest]
12222 * andl cf - ct, dest
12223 * [addl dest, ct]
12224 *
12225 * Size 8 - 11.
12226 */
12227
12228 if (cf == 0)
12229 {
12230 cf = ct;
12231 ct = 0;
12232 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12233 }
12234
12235 tmp = expand_simple_binop (mode, AND,
12236 copy_rtx (tmp),
12237 gen_int_mode (cf - ct, mode),
12238 copy_rtx (tmp), 1, OPTAB_DIRECT);
12239 if (ct)
12240 tmp = expand_simple_binop (mode, PLUS,
12241 copy_rtx (tmp), GEN_INT (ct),
12242 copy_rtx (tmp), 1, OPTAB_DIRECT);
12243 }
12244
12245 if (!rtx_equal_p (tmp, out))
12246 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12247
12248 return 1; /* DONE */
12249 }
12250
12251 if (diff < 0)
12252 {
12253 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12254
12255 HOST_WIDE_INT tmp;
12256 tmp = ct, ct = cf, cf = tmp;
12257 diff = -diff;
12258
12259 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12260 {
12261 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12262
12263 /* We may be reversing unordered compare to normal compare, that
12264 is not valid in general (we may convert non-trapping condition
12265 to trapping one), however on i386 we currently emit all
12266 comparisons unordered. */
12267 compare_code = reverse_condition_maybe_unordered (compare_code);
12268 code = reverse_condition_maybe_unordered (code);
12269 }
12270 else
12271 {
12272 compare_code = reverse_condition (compare_code);
12273 code = reverse_condition (code);
12274 }
12275 }
12276
12277 compare_code = UNKNOWN;
12278 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12279 && CONST_INT_P (ix86_compare_op1))
12280 {
12281 if (ix86_compare_op1 == const0_rtx
12282 && (code == LT || code == GE))
12283 compare_code = code;
12284 else if (ix86_compare_op1 == constm1_rtx)
12285 {
12286 if (code == LE)
12287 compare_code = LT;
12288 else if (code == GT)
12289 compare_code = GE;
12290 }
12291 }
12292
12293 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12294 if (compare_code != UNKNOWN
12295 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12296 && (cf == -1 || ct == -1))
12297 {
12298 /* If lea code below could be used, only optimize
12299 if it results in a 2 insn sequence. */
12300
12301 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12302 || diff == 3 || diff == 5 || diff == 9)
12303 || (compare_code == LT && ct == -1)
12304 || (compare_code == GE && cf == -1))
12305 {
12306 /*
12307 * notl op1 (if necessary)
12308 * sarl $31, op1
12309 * orl cf, op1
12310 */
12311 if (ct != -1)
12312 {
12313 cf = ct;
12314 ct = -1;
12315 code = reverse_condition (code);
12316 }
12317
12318 out = emit_store_flag (out, code, ix86_compare_op0,
12319 ix86_compare_op1, VOIDmode, 0, -1);
12320
12321 out = expand_simple_binop (mode, IOR,
12322 out, GEN_INT (cf),
12323 out, 1, OPTAB_DIRECT);
12324 if (out != operands[0])
12325 emit_move_insn (operands[0], out);
12326
12327 return 1; /* DONE */
12328 }
12329 }
12330
12331
12332 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12333 || diff == 3 || diff == 5 || diff == 9)
12334 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12335 && (mode != DImode
12336 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12337 {
12338 /*
12339 * xorl dest,dest
12340 * cmpl op1,op2
12341 * setcc dest
12342 * lea cf(dest*(ct-cf)),dest
12343 *
12344 * Size 14.
12345 *
12346 * This also catches the degenerate setcc-only case.
12347 */
12348
12349 rtx tmp;
12350 int nops;
12351
12352 out = emit_store_flag (out, code, ix86_compare_op0,
12353 ix86_compare_op1, VOIDmode, 0, 1);
12354
12355 nops = 0;
12356 /* On x86_64 the lea instruction operates on Pmode, so we need
12357 to get arithmetics done in proper mode to match. */
12358 if (diff == 1)
12359 tmp = copy_rtx (out);
12360 else
12361 {
12362 rtx out1;
12363 out1 = copy_rtx (out);
12364 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12365 nops++;
12366 if (diff & 1)
12367 {
12368 tmp = gen_rtx_PLUS (mode, tmp, out1);
12369 nops++;
12370 }
12371 }
12372 if (cf != 0)
12373 {
12374 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12375 nops++;
12376 }
12377 if (!rtx_equal_p (tmp, out))
12378 {
12379 if (nops == 1)
12380 out = force_operand (tmp, copy_rtx (out));
12381 else
12382 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12383 }
12384 if (!rtx_equal_p (out, operands[0]))
12385 emit_move_insn (operands[0], copy_rtx (out));
12386
12387 return 1; /* DONE */
12388 }
12389
12390 /*
12391 * General case: Jumpful:
12392 * xorl dest,dest cmpl op1, op2
12393 * cmpl op1, op2 movl ct, dest
12394 * setcc dest jcc 1f
12395 * decl dest movl cf, dest
12396 * andl (cf-ct),dest 1:
12397 * addl ct,dest
12398 *
12399 * Size 20. Size 14.
12400 *
12401 * This is reasonably steep, but branch mispredict costs are
12402 * high on modern cpus, so consider failing only if optimizing
12403 * for space.
12404 */
12405
12406 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12407 && BRANCH_COST >= 2)
12408 {
12409 if (cf == 0)
12410 {
12411 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12412
12413 cf = ct;
12414 ct = 0;
12415
12416 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12417 {
12418 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12419
12420 /* We may be reversing unordered compare to normal compare,
12421 that is not valid in general (we may convert non-trapping
12422 condition to trapping one), however on i386 we currently
12423 emit all comparisons unordered. */
12424 code = reverse_condition_maybe_unordered (code);
12425 }
12426 else
12427 {
12428 code = reverse_condition (code);
12429 if (compare_code != UNKNOWN)
12430 compare_code = reverse_condition (compare_code);
12431 }
12432 }
12433
12434 if (compare_code != UNKNOWN)
12435 {
12436 /* notl op1 (if needed)
12437 sarl $31, op1
12438 andl (cf-ct), op1
12439 addl ct, op1
12440
12441 For x < 0 (resp. x <= -1) there will be no notl,
12442 so if possible swap the constants to get rid of the
12443 complement.
12444 True/false will be -1/0 while code below (store flag
12445 followed by decrement) is 0/-1, so the constants need
12446 to be exchanged once more. */
12447
12448 if (compare_code == GE || !cf)
12449 {
12450 code = reverse_condition (code);
12451 compare_code = LT;
12452 }
12453 else
12454 {
12455 HOST_WIDE_INT tmp = cf;
12456 cf = ct;
12457 ct = tmp;
12458 }
12459
12460 out = emit_store_flag (out, code, ix86_compare_op0,
12461 ix86_compare_op1, VOIDmode, 0, -1);
12462 }
12463 else
12464 {
12465 out = emit_store_flag (out, code, ix86_compare_op0,
12466 ix86_compare_op1, VOIDmode, 0, 1);
12467
12468 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12469 copy_rtx (out), 1, OPTAB_DIRECT);
12470 }
12471
12472 out = expand_simple_binop (mode, AND, copy_rtx (out),
12473 gen_int_mode (cf - ct, mode),
12474 copy_rtx (out), 1, OPTAB_DIRECT);
12475 if (ct)
12476 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12477 copy_rtx (out), 1, OPTAB_DIRECT);
12478 if (!rtx_equal_p (out, operands[0]))
12479 emit_move_insn (operands[0], copy_rtx (out));
12480
12481 return 1; /* DONE */
12482 }
12483 }
12484
12485 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12486 {
12487 /* Try a few things more with specific constants and a variable. */
12488
12489 optab op;
12490 rtx var, orig_out, out, tmp;
12491
12492 if (BRANCH_COST <= 2)
12493 return 0; /* FAIL */
12494
12495 /* If one of the two operands is an interesting constant, load a
12496 constant with the above and mask it in with a logical operation. */
12497
12498 if (CONST_INT_P (operands[2]))
12499 {
12500 var = operands[3];
12501 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12502 operands[3] = constm1_rtx, op = and_optab;
12503 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12504 operands[3] = const0_rtx, op = ior_optab;
12505 else
12506 return 0; /* FAIL */
12507 }
12508 else if (CONST_INT_P (operands[3]))
12509 {
12510 var = operands[2];
12511 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12512 operands[2] = constm1_rtx, op = and_optab;
12513 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12514 operands[2] = const0_rtx, op = ior_optab;
12515 else
12516 return 0; /* FAIL */
12517 }
12518 else
12519 return 0; /* FAIL */
12520
12521 orig_out = operands[0];
12522 tmp = gen_reg_rtx (mode);
12523 operands[0] = tmp;
12524
12525 /* Recurse to get the constant loaded. */
12526 if (ix86_expand_int_movcc (operands) == 0)
12527 return 0; /* FAIL */
12528
12529 /* Mask in the interesting variable. */
12530 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12531 OPTAB_WIDEN);
12532 if (!rtx_equal_p (out, orig_out))
12533 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12534
12535 return 1; /* DONE */
12536 }
12537
12538 /*
12539 * For comparison with above,
12540 *
12541 * movl cf,dest
12542 * movl ct,tmp
12543 * cmpl op1,op2
12544 * cmovcc tmp,dest
12545 *
12546 * Size 15.
12547 */
12548
12549 if (! nonimmediate_operand (operands[2], mode))
12550 operands[2] = force_reg (mode, operands[2]);
12551 if (! nonimmediate_operand (operands[3], mode))
12552 operands[3] = force_reg (mode, operands[3]);
12553
12554 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12555 {
12556 rtx tmp = gen_reg_rtx (mode);
12557 emit_move_insn (tmp, operands[3]);
12558 operands[3] = tmp;
12559 }
12560 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12561 {
12562 rtx tmp = gen_reg_rtx (mode);
12563 emit_move_insn (tmp, operands[2]);
12564 operands[2] = tmp;
12565 }
12566
12567 if (! register_operand (operands[2], VOIDmode)
12568 && (mode == QImode
12569 || ! register_operand (operands[3], VOIDmode)))
12570 operands[2] = force_reg (mode, operands[2]);
12571
12572 if (mode == QImode
12573 && ! register_operand (operands[3], VOIDmode))
12574 operands[3] = force_reg (mode, operands[3]);
12575
12576 emit_insn (compare_seq);
12577 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12578 gen_rtx_IF_THEN_ELSE (mode,
12579 compare_op, operands[2],
12580 operands[3])));
12581 if (bypass_test)
12582 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12583 gen_rtx_IF_THEN_ELSE (mode,
12584 bypass_test,
12585 copy_rtx (operands[3]),
12586 copy_rtx (operands[0]))));
12587 if (second_test)
12588 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12589 gen_rtx_IF_THEN_ELSE (mode,
12590 second_test,
12591 copy_rtx (operands[2]),
12592 copy_rtx (operands[0]))));
12593
12594 return 1; /* DONE */
12595 }
12596
12597 /* Swap, force into registers, or otherwise massage the two operands
12598 to an sse comparison with a mask result. Thus we differ a bit from
12599 ix86_prepare_fp_compare_args which expects to produce a flags result.
12600
12601 The DEST operand exists to help determine whether to commute commutative
12602 operators. The POP0/POP1 operands are updated in place. The new
12603 comparison code is returned, or UNKNOWN if not implementable. */
12604
12605 static enum rtx_code
12606 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12607 rtx *pop0, rtx *pop1)
12608 {
12609 rtx tmp;
12610
12611 switch (code)
12612 {
12613 case LTGT:
12614 case UNEQ:
12615 /* We have no LTGT as an operator. We could implement it with
12616 NE & ORDERED, but this requires an extra temporary. It's
12617 not clear that it's worth it. */
12618 return UNKNOWN;
12619
12620 case LT:
12621 case LE:
12622 case UNGT:
12623 case UNGE:
12624 /* These are supported directly. */
12625 break;
12626
12627 case EQ:
12628 case NE:
12629 case UNORDERED:
12630 case ORDERED:
12631 /* For commutative operators, try to canonicalize the destination
12632 operand to be first in the comparison - this helps reload to
12633 avoid extra moves. */
12634 if (!dest || !rtx_equal_p (dest, *pop1))
12635 break;
12636 /* FALLTHRU */
12637
12638 case GE:
12639 case GT:
12640 case UNLE:
12641 case UNLT:
12642 /* These are not supported directly. Swap the comparison operands
12643 to transform into something that is supported. */
12644 tmp = *pop0;
12645 *pop0 = *pop1;
12646 *pop1 = tmp;
12647 code = swap_condition (code);
12648 break;
12649
12650 default:
12651 gcc_unreachable ();
12652 }
12653
12654 return code;
12655 }
12656
12657 /* Detect conditional moves that exactly match min/max operational
12658 semantics. Note that this is IEEE safe, as long as we don't
12659 interchange the operands.
12660
12661 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12662 and TRUE if the operation is successful and instructions are emitted. */
12663
12664 static bool
12665 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12666 rtx cmp_op1, rtx if_true, rtx if_false)
12667 {
12668 enum machine_mode mode;
12669 bool is_min;
12670 rtx tmp;
12671
12672 if (code == LT)
12673 ;
12674 else if (code == UNGE)
12675 {
12676 tmp = if_true;
12677 if_true = if_false;
12678 if_false = tmp;
12679 }
12680 else
12681 return false;
12682
12683 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12684 is_min = true;
12685 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12686 is_min = false;
12687 else
12688 return false;
12689
12690 mode = GET_MODE (dest);
12691
12692 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12693 but MODE may be a vector mode and thus not appropriate. */
12694 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12695 {
12696 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12697 rtvec v;
12698
12699 if_true = force_reg (mode, if_true);
12700 v = gen_rtvec (2, if_true, if_false);
12701 tmp = gen_rtx_UNSPEC (mode, v, u);
12702 }
12703 else
12704 {
12705 code = is_min ? SMIN : SMAX;
12706 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12707 }
12708
12709 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12710 return true;
12711 }
12712
12713 /* Expand an sse vector comparison. Return the register with the result. */
12714
12715 static rtx
12716 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12717 rtx op_true, rtx op_false)
12718 {
12719 enum machine_mode mode = GET_MODE (dest);
12720 rtx x;
12721
12722 cmp_op0 = force_reg (mode, cmp_op0);
12723 if (!nonimmediate_operand (cmp_op1, mode))
12724 cmp_op1 = force_reg (mode, cmp_op1);
12725
12726 if (optimize
12727 || reg_overlap_mentioned_p (dest, op_true)
12728 || reg_overlap_mentioned_p (dest, op_false))
12729 dest = gen_reg_rtx (mode);
12730
12731 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12732 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12733
12734 return dest;
12735 }
12736
12737 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12738 operations. This is used for both scalar and vector conditional moves. */
12739
12740 static void
12741 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12742 {
12743 enum machine_mode mode = GET_MODE (dest);
12744 rtx t2, t3, x;
12745
12746 if (op_false == CONST0_RTX (mode))
12747 {
12748 op_true = force_reg (mode, op_true);
12749 x = gen_rtx_AND (mode, cmp, op_true);
12750 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12751 }
12752 else if (op_true == CONST0_RTX (mode))
12753 {
12754 op_false = force_reg (mode, op_false);
12755 x = gen_rtx_NOT (mode, cmp);
12756 x = gen_rtx_AND (mode, x, op_false);
12757 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12758 }
12759 else
12760 {
12761 op_true = force_reg (mode, op_true);
12762 op_false = force_reg (mode, op_false);
12763
12764 t2 = gen_reg_rtx (mode);
12765 if (optimize)
12766 t3 = gen_reg_rtx (mode);
12767 else
12768 t3 = dest;
12769
12770 x = gen_rtx_AND (mode, op_true, cmp);
12771 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12772
12773 x = gen_rtx_NOT (mode, cmp);
12774 x = gen_rtx_AND (mode, x, op_false);
12775 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12776
12777 x = gen_rtx_IOR (mode, t3, t2);
12778 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12779 }
12780 }
12781
12782 /* Expand a floating-point conditional move. Return true if successful. */
12783
12784 int
12785 ix86_expand_fp_movcc (rtx operands[])
12786 {
12787 enum machine_mode mode = GET_MODE (operands[0]);
12788 enum rtx_code code = GET_CODE (operands[1]);
12789 rtx tmp, compare_op, second_test, bypass_test;
12790
12791 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12792 {
12793 enum machine_mode cmode;
12794
12795 /* Since we've no cmove for sse registers, don't force bad register
12796 allocation just to gain access to it. Deny movcc when the
12797 comparison mode doesn't match the move mode. */
12798 cmode = GET_MODE (ix86_compare_op0);
12799 if (cmode == VOIDmode)
12800 cmode = GET_MODE (ix86_compare_op1);
12801 if (cmode != mode)
12802 return 0;
12803
12804 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12805 &ix86_compare_op0,
12806 &ix86_compare_op1);
12807 if (code == UNKNOWN)
12808 return 0;
12809
12810 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12811 ix86_compare_op1, operands[2],
12812 operands[3]))
12813 return 1;
12814
12815 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12816 ix86_compare_op1, operands[2], operands[3]);
12817 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12818 return 1;
12819 }
12820
12821 /* The floating point conditional move instructions don't directly
12822 support conditions resulting from a signed integer comparison. */
12823
12824 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12825
12826 /* The floating point conditional move instructions don't directly
12827 support signed integer comparisons. */
12828
12829 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12830 {
12831 gcc_assert (!second_test && !bypass_test);
12832 tmp = gen_reg_rtx (QImode);
12833 ix86_expand_setcc (code, tmp);
12834 code = NE;
12835 ix86_compare_op0 = tmp;
12836 ix86_compare_op1 = const0_rtx;
12837 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12838 }
12839 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12840 {
12841 tmp = gen_reg_rtx (mode);
12842 emit_move_insn (tmp, operands[3]);
12843 operands[3] = tmp;
12844 }
12845 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12846 {
12847 tmp = gen_reg_rtx (mode);
12848 emit_move_insn (tmp, operands[2]);
12849 operands[2] = tmp;
12850 }
12851
12852 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12853 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12854 operands[2], operands[3])));
12855 if (bypass_test)
12856 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12857 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12858 operands[3], operands[0])));
12859 if (second_test)
12860 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12861 gen_rtx_IF_THEN_ELSE (mode, second_test,
12862 operands[2], operands[0])));
12863
12864 return 1;
12865 }
12866
12867 /* Expand a floating-point vector conditional move; a vcond operation
12868 rather than a movcc operation. */
12869
12870 bool
12871 ix86_expand_fp_vcond (rtx operands[])
12872 {
12873 enum rtx_code code = GET_CODE (operands[3]);
12874 rtx cmp;
12875
12876 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12877 &operands[4], &operands[5]);
12878 if (code == UNKNOWN)
12879 return false;
12880
12881 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12882 operands[5], operands[1], operands[2]))
12883 return true;
12884
12885 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12886 operands[1], operands[2]);
12887 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12888 return true;
12889 }
12890
12891 /* Expand a signed/unsigned integral vector conditional move. */
12892
12893 bool
12894 ix86_expand_int_vcond (rtx operands[])
12895 {
12896 enum machine_mode mode = GET_MODE (operands[0]);
12897 enum rtx_code code = GET_CODE (operands[3]);
12898 bool negate = false;
12899 rtx x, cop0, cop1;
12900
12901 cop0 = operands[4];
12902 cop1 = operands[5];
12903
12904 /* Canonicalize the comparison to EQ, GT, GTU. */
12905 switch (code)
12906 {
12907 case EQ:
12908 case GT:
12909 case GTU:
12910 break;
12911
12912 case NE:
12913 case LE:
12914 case LEU:
12915 code = reverse_condition (code);
12916 negate = true;
12917 break;
12918
12919 case GE:
12920 case GEU:
12921 code = reverse_condition (code);
12922 negate = true;
12923 /* FALLTHRU */
12924
12925 case LT:
12926 case LTU:
12927 code = swap_condition (code);
12928 x = cop0, cop0 = cop1, cop1 = x;
12929 break;
12930
12931 default:
12932 gcc_unreachable ();
12933 }
12934
12935 /* Only SSE4.1/SSE4.2 supports V2DImode. */
12936 if (mode == V2DImode)
12937 {
12938 switch (code)
12939 {
12940 case EQ:
12941 /* SSE4.1 supports EQ. */
12942 if (!TARGET_SSE4_1)
12943 return false;
12944 break;
12945
12946 case GT:
12947 case GTU:
12948 /* SSE4.2 supports GT/GTU. */
12949 if (!TARGET_SSE4_2)
12950 return false;
12951 break;
12952
12953 default:
12954 gcc_unreachable ();
12955 }
12956 }
12957
12958 /* Unsigned parallel compare is not supported by the hardware. Play some
12959 tricks to turn this into a signed comparison against 0. */
12960 if (code == GTU)
12961 {
12962 cop0 = force_reg (mode, cop0);
12963
12964 switch (mode)
12965 {
12966 case V4SImode:
12967 case V2DImode:
12968 {
12969 rtx t1, t2, mask;
12970
12971 /* Perform a parallel modulo subtraction. */
12972 t1 = gen_reg_rtx (mode);
12973 emit_insn ((mode == V4SImode
12974 ? gen_subv4si3
12975 : gen_subv2di3) (t1, cop0, cop1));
12976
12977 /* Extract the original sign bit of op0. */
12978 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
12979 true, false);
12980 t2 = gen_reg_rtx (mode);
12981 emit_insn ((mode == V4SImode
12982 ? gen_andv4si3
12983 : gen_andv2di3) (t2, cop0, mask));
12984
12985 /* XOR it back into the result of the subtraction. This results
12986 in the sign bit set iff we saw unsigned underflow. */
12987 x = gen_reg_rtx (mode);
12988 emit_insn ((mode == V4SImode
12989 ? gen_xorv4si3
12990 : gen_xorv2di3) (x, t1, t2));
12991
12992 code = GT;
12993 }
12994 break;
12995
12996 case V16QImode:
12997 case V8HImode:
12998 /* Perform a parallel unsigned saturating subtraction. */
12999 x = gen_reg_rtx (mode);
13000 emit_insn (gen_rtx_SET (VOIDmode, x,
13001 gen_rtx_US_MINUS (mode, cop0, cop1)));
13002
13003 code = EQ;
13004 negate = !negate;
13005 break;
13006
13007 default:
13008 gcc_unreachable ();
13009 }
13010
13011 cop0 = x;
13012 cop1 = CONST0_RTX (mode);
13013 }
13014
13015 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13016 operands[1+negate], operands[2-negate]);
13017
13018 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13019 operands[2-negate]);
13020 return true;
13021 }
13022
13023 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13024 true if we should do zero extension, else sign extension. HIGH_P is
13025 true if we want the N/2 high elements, else the low elements. */
13026
13027 void
13028 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13029 {
13030 enum machine_mode imode = GET_MODE (operands[1]);
13031 rtx (*unpack)(rtx, rtx, rtx);
13032 rtx se, dest;
13033
13034 switch (imode)
13035 {
13036 case V16QImode:
13037 if (high_p)
13038 unpack = gen_vec_interleave_highv16qi;
13039 else
13040 unpack = gen_vec_interleave_lowv16qi;
13041 break;
13042 case V8HImode:
13043 if (high_p)
13044 unpack = gen_vec_interleave_highv8hi;
13045 else
13046 unpack = gen_vec_interleave_lowv8hi;
13047 break;
13048 case V4SImode:
13049 if (high_p)
13050 unpack = gen_vec_interleave_highv4si;
13051 else
13052 unpack = gen_vec_interleave_lowv4si;
13053 break;
13054 default:
13055 gcc_unreachable ();
13056 }
13057
13058 dest = gen_lowpart (imode, operands[0]);
13059
13060 if (unsigned_p)
13061 se = force_reg (imode, CONST0_RTX (imode));
13062 else
13063 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13064 operands[1], pc_rtx, pc_rtx);
13065
13066 emit_insn (unpack (dest, operands[1], se));
13067 }
13068
13069 /* This function performs the same task as ix86_expand_sse_unpack,
13070 but with SSE4.1 instructions. */
13071
13072 void
13073 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13074 {
13075 enum machine_mode imode = GET_MODE (operands[1]);
13076 rtx (*unpack)(rtx, rtx);
13077 rtx src, dest;
13078
13079 switch (imode)
13080 {
13081 case V16QImode:
13082 if (unsigned_p)
13083 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13084 else
13085 unpack = gen_sse4_1_extendv8qiv8hi2;
13086 break;
13087 case V8HImode:
13088 if (unsigned_p)
13089 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13090 else
13091 unpack = gen_sse4_1_extendv4hiv4si2;
13092 break;
13093 case V4SImode:
13094 if (unsigned_p)
13095 unpack = gen_sse4_1_zero_extendv2siv2di2;
13096 else
13097 unpack = gen_sse4_1_extendv2siv2di2;
13098 break;
13099 default:
13100 gcc_unreachable ();
13101 }
13102
13103 dest = operands[0];
13104 if (high_p)
13105 {
13106 /* Shift higher 8 bytes to lower 8 bytes. */
13107 src = gen_reg_rtx (imode);
13108 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13109 gen_lowpart (TImode, operands[1]),
13110 GEN_INT (64)));
13111 }
13112 else
13113 src = operands[1];
13114
13115 emit_insn (unpack (dest, src));
13116 }
13117
13118 /* Expand conditional increment or decrement using adb/sbb instructions.
13119 The default case using setcc followed by the conditional move can be
13120 done by generic code. */
13121 int
13122 ix86_expand_int_addcc (rtx operands[])
13123 {
13124 enum rtx_code code = GET_CODE (operands[1]);
13125 rtx compare_op;
13126 rtx val = const0_rtx;
13127 bool fpcmp = false;
13128 enum machine_mode mode = GET_MODE (operands[0]);
13129
13130 if (operands[3] != const1_rtx
13131 && operands[3] != constm1_rtx)
13132 return 0;
13133 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13134 ix86_compare_op1, &compare_op))
13135 return 0;
13136 code = GET_CODE (compare_op);
13137
13138 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13139 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13140 {
13141 fpcmp = true;
13142 code = ix86_fp_compare_code_to_integer (code);
13143 }
13144
13145 if (code != LTU)
13146 {
13147 val = constm1_rtx;
13148 if (fpcmp)
13149 PUT_CODE (compare_op,
13150 reverse_condition_maybe_unordered
13151 (GET_CODE (compare_op)));
13152 else
13153 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13154 }
13155 PUT_MODE (compare_op, mode);
13156
13157 /* Construct either adc or sbb insn. */
13158 if ((code == LTU) == (operands[3] == constm1_rtx))
13159 {
13160 switch (GET_MODE (operands[0]))
13161 {
13162 case QImode:
13163 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13164 break;
13165 case HImode:
13166 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13167 break;
13168 case SImode:
13169 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13170 break;
13171 case DImode:
13172 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13173 break;
13174 default:
13175 gcc_unreachable ();
13176 }
13177 }
13178 else
13179 {
13180 switch (GET_MODE (operands[0]))
13181 {
13182 case QImode:
13183 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13184 break;
13185 case HImode:
13186 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13187 break;
13188 case SImode:
13189 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13190 break;
13191 case DImode:
13192 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13193 break;
13194 default:
13195 gcc_unreachable ();
13196 }
13197 }
13198 return 1; /* DONE */
13199 }
13200
13201
13202 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13203 works for floating pointer parameters and nonoffsetable memories.
13204 For pushes, it returns just stack offsets; the values will be saved
13205 in the right order. Maximally three parts are generated. */
13206
13207 static int
13208 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13209 {
13210 int size;
13211
13212 if (!TARGET_64BIT)
13213 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13214 else
13215 size = (GET_MODE_SIZE (mode) + 4) / 8;
13216
13217 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13218 gcc_assert (size >= 2 && size <= 3);
13219
13220 /* Optimize constant pool reference to immediates. This is used by fp
13221 moves, that force all constants to memory to allow combining. */
13222 if (MEM_P (operand) && MEM_READONLY_P (operand))
13223 {
13224 rtx tmp = maybe_get_pool_constant (operand);
13225 if (tmp)
13226 operand = tmp;
13227 }
13228
13229 if (MEM_P (operand) && !offsettable_memref_p (operand))
13230 {
13231 /* The only non-offsetable memories we handle are pushes. */
13232 int ok = push_operand (operand, VOIDmode);
13233
13234 gcc_assert (ok);
13235
13236 operand = copy_rtx (operand);
13237 PUT_MODE (operand, Pmode);
13238 parts[0] = parts[1] = parts[2] = operand;
13239 return size;
13240 }
13241
13242 if (GET_CODE (operand) == CONST_VECTOR)
13243 {
13244 enum machine_mode imode = int_mode_for_mode (mode);
13245 /* Caution: if we looked through a constant pool memory above,
13246 the operand may actually have a different mode now. That's
13247 ok, since we want to pun this all the way back to an integer. */
13248 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13249 gcc_assert (operand != NULL);
13250 mode = imode;
13251 }
13252
13253 if (!TARGET_64BIT)
13254 {
13255 if (mode == DImode)
13256 split_di (&operand, 1, &parts[0], &parts[1]);
13257 else
13258 {
13259 if (REG_P (operand))
13260 {
13261 gcc_assert (reload_completed);
13262 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13263 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13264 if (size == 3)
13265 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13266 }
13267 else if (offsettable_memref_p (operand))
13268 {
13269 operand = adjust_address (operand, SImode, 0);
13270 parts[0] = operand;
13271 parts[1] = adjust_address (operand, SImode, 4);
13272 if (size == 3)
13273 parts[2] = adjust_address (operand, SImode, 8);
13274 }
13275 else if (GET_CODE (operand) == CONST_DOUBLE)
13276 {
13277 REAL_VALUE_TYPE r;
13278 long l[4];
13279
13280 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13281 switch (mode)
13282 {
13283 case XFmode:
13284 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13285 parts[2] = gen_int_mode (l[2], SImode);
13286 break;
13287 case DFmode:
13288 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13289 break;
13290 default:
13291 gcc_unreachable ();
13292 }
13293 parts[1] = gen_int_mode (l[1], SImode);
13294 parts[0] = gen_int_mode (l[0], SImode);
13295 }
13296 else
13297 gcc_unreachable ();
13298 }
13299 }
13300 else
13301 {
13302 if (mode == TImode)
13303 split_ti (&operand, 1, &parts[0], &parts[1]);
13304 if (mode == XFmode || mode == TFmode)
13305 {
13306 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13307 if (REG_P (operand))
13308 {
13309 gcc_assert (reload_completed);
13310 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13311 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13312 }
13313 else if (offsettable_memref_p (operand))
13314 {
13315 operand = adjust_address (operand, DImode, 0);
13316 parts[0] = operand;
13317 parts[1] = adjust_address (operand, upper_mode, 8);
13318 }
13319 else if (GET_CODE (operand) == CONST_DOUBLE)
13320 {
13321 REAL_VALUE_TYPE r;
13322 long l[4];
13323
13324 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13325 real_to_target (l, &r, mode);
13326
13327 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13328 if (HOST_BITS_PER_WIDE_INT >= 64)
13329 parts[0]
13330 = gen_int_mode
13331 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13332 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13333 DImode);
13334 else
13335 parts[0] = immed_double_const (l[0], l[1], DImode);
13336
13337 if (upper_mode == SImode)
13338 parts[1] = gen_int_mode (l[2], SImode);
13339 else if (HOST_BITS_PER_WIDE_INT >= 64)
13340 parts[1]
13341 = gen_int_mode
13342 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13343 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13344 DImode);
13345 else
13346 parts[1] = immed_double_const (l[2], l[3], DImode);
13347 }
13348 else
13349 gcc_unreachable ();
13350 }
13351 }
13352
13353 return size;
13354 }
13355
13356 /* Emit insns to perform a move or push of DI, DF, and XF values.
13357 Return false when normal moves are needed; true when all required
13358 insns have been emitted. Operands 2-4 contain the input values
13359 int the correct order; operands 5-7 contain the output values. */
13360
13361 void
13362 ix86_split_long_move (rtx operands[])
13363 {
13364 rtx part[2][3];
13365 int nparts;
13366 int push = 0;
13367 int collisions = 0;
13368 enum machine_mode mode = GET_MODE (operands[0]);
13369
13370 /* The DFmode expanders may ask us to move double.
13371 For 64bit target this is single move. By hiding the fact
13372 here we simplify i386.md splitters. */
13373 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13374 {
13375 /* Optimize constant pool reference to immediates. This is used by
13376 fp moves, that force all constants to memory to allow combining. */
13377
13378 if (MEM_P (operands[1])
13379 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13380 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13381 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13382 if (push_operand (operands[0], VOIDmode))
13383 {
13384 operands[0] = copy_rtx (operands[0]);
13385 PUT_MODE (operands[0], Pmode);
13386 }
13387 else
13388 operands[0] = gen_lowpart (DImode, operands[0]);
13389 operands[1] = gen_lowpart (DImode, operands[1]);
13390 emit_move_insn (operands[0], operands[1]);
13391 return;
13392 }
13393
13394 /* The only non-offsettable memory we handle is push. */
13395 if (push_operand (operands[0], VOIDmode))
13396 push = 1;
13397 else
13398 gcc_assert (!MEM_P (operands[0])
13399 || offsettable_memref_p (operands[0]));
13400
13401 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13402 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13403
13404 /* When emitting push, take care for source operands on the stack. */
13405 if (push && MEM_P (operands[1])
13406 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13407 {
13408 if (nparts == 3)
13409 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13410 XEXP (part[1][2], 0));
13411 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13412 XEXP (part[1][1], 0));
13413 }
13414
13415 /* We need to do copy in the right order in case an address register
13416 of the source overlaps the destination. */
13417 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13418 {
13419 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13420 collisions++;
13421 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13422 collisions++;
13423 if (nparts == 3
13424 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13425 collisions++;
13426
13427 /* Collision in the middle part can be handled by reordering. */
13428 if (collisions == 1 && nparts == 3
13429 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13430 {
13431 rtx tmp;
13432 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13433 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13434 }
13435
13436 /* If there are more collisions, we can't handle it by reordering.
13437 Do an lea to the last part and use only one colliding move. */
13438 else if (collisions > 1)
13439 {
13440 rtx base;
13441
13442 collisions = 1;
13443
13444 base = part[0][nparts - 1];
13445
13446 /* Handle the case when the last part isn't valid for lea.
13447 Happens in 64-bit mode storing the 12-byte XFmode. */
13448 if (GET_MODE (base) != Pmode)
13449 base = gen_rtx_REG (Pmode, REGNO (base));
13450
13451 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13452 part[1][0] = replace_equiv_address (part[1][0], base);
13453 part[1][1] = replace_equiv_address (part[1][1],
13454 plus_constant (base, UNITS_PER_WORD));
13455 if (nparts == 3)
13456 part[1][2] = replace_equiv_address (part[1][2],
13457 plus_constant (base, 8));
13458 }
13459 }
13460
13461 if (push)
13462 {
13463 if (!TARGET_64BIT)
13464 {
13465 if (nparts == 3)
13466 {
13467 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13468 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13469 emit_move_insn (part[0][2], part[1][2]);
13470 }
13471 }
13472 else
13473 {
13474 /* In 64bit mode we don't have 32bit push available. In case this is
13475 register, it is OK - we will just use larger counterpart. We also
13476 retype memory - these comes from attempt to avoid REX prefix on
13477 moving of second half of TFmode value. */
13478 if (GET_MODE (part[1][1]) == SImode)
13479 {
13480 switch (GET_CODE (part[1][1]))
13481 {
13482 case MEM:
13483 part[1][1] = adjust_address (part[1][1], DImode, 0);
13484 break;
13485
13486 case REG:
13487 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13488 break;
13489
13490 default:
13491 gcc_unreachable ();
13492 }
13493
13494 if (GET_MODE (part[1][0]) == SImode)
13495 part[1][0] = part[1][1];
13496 }
13497 }
13498 emit_move_insn (part[0][1], part[1][1]);
13499 emit_move_insn (part[0][0], part[1][0]);
13500 return;
13501 }
13502
13503 /* Choose correct order to not overwrite the source before it is copied. */
13504 if ((REG_P (part[0][0])
13505 && REG_P (part[1][1])
13506 && (REGNO (part[0][0]) == REGNO (part[1][1])
13507 || (nparts == 3
13508 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13509 || (collisions > 0
13510 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13511 {
13512 if (nparts == 3)
13513 {
13514 operands[2] = part[0][2];
13515 operands[3] = part[0][1];
13516 operands[4] = part[0][0];
13517 operands[5] = part[1][2];
13518 operands[6] = part[1][1];
13519 operands[7] = part[1][0];
13520 }
13521 else
13522 {
13523 operands[2] = part[0][1];
13524 operands[3] = part[0][0];
13525 operands[5] = part[1][1];
13526 operands[6] = part[1][0];
13527 }
13528 }
13529 else
13530 {
13531 if (nparts == 3)
13532 {
13533 operands[2] = part[0][0];
13534 operands[3] = part[0][1];
13535 operands[4] = part[0][2];
13536 operands[5] = part[1][0];
13537 operands[6] = part[1][1];
13538 operands[7] = part[1][2];
13539 }
13540 else
13541 {
13542 operands[2] = part[0][0];
13543 operands[3] = part[0][1];
13544 operands[5] = part[1][0];
13545 operands[6] = part[1][1];
13546 }
13547 }
13548
13549 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13550 if (optimize_size)
13551 {
13552 if (CONST_INT_P (operands[5])
13553 && operands[5] != const0_rtx
13554 && REG_P (operands[2]))
13555 {
13556 if (CONST_INT_P (operands[6])
13557 && INTVAL (operands[6]) == INTVAL (operands[5]))
13558 operands[6] = operands[2];
13559
13560 if (nparts == 3
13561 && CONST_INT_P (operands[7])
13562 && INTVAL (operands[7]) == INTVAL (operands[5]))
13563 operands[7] = operands[2];
13564 }
13565
13566 if (nparts == 3
13567 && CONST_INT_P (operands[6])
13568 && operands[6] != const0_rtx
13569 && REG_P (operands[3])
13570 && CONST_INT_P (operands[7])
13571 && INTVAL (operands[7]) == INTVAL (operands[6]))
13572 operands[7] = operands[3];
13573 }
13574
13575 emit_move_insn (operands[2], operands[5]);
13576 emit_move_insn (operands[3], operands[6]);
13577 if (nparts == 3)
13578 emit_move_insn (operands[4], operands[7]);
13579
13580 return;
13581 }
13582
13583 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13584 left shift by a constant, either using a single shift or
13585 a sequence of add instructions. */
13586
13587 static void
13588 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13589 {
13590 if (count == 1)
13591 {
13592 emit_insn ((mode == DImode
13593 ? gen_addsi3
13594 : gen_adddi3) (operand, operand, operand));
13595 }
13596 else if (!optimize_size
13597 && count * ix86_cost->add <= ix86_cost->shift_const)
13598 {
13599 int i;
13600 for (i=0; i<count; i++)
13601 {
13602 emit_insn ((mode == DImode
13603 ? gen_addsi3
13604 : gen_adddi3) (operand, operand, operand));
13605 }
13606 }
13607 else
13608 emit_insn ((mode == DImode
13609 ? gen_ashlsi3
13610 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13611 }
13612
13613 void
13614 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13615 {
13616 rtx low[2], high[2];
13617 int count;
13618 const int single_width = mode == DImode ? 32 : 64;
13619
13620 if (CONST_INT_P (operands[2]))
13621 {
13622 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13623 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13624
13625 if (count >= single_width)
13626 {
13627 emit_move_insn (high[0], low[1]);
13628 emit_move_insn (low[0], const0_rtx);
13629
13630 if (count > single_width)
13631 ix86_expand_ashl_const (high[0], count - single_width, mode);
13632 }
13633 else
13634 {
13635 if (!rtx_equal_p (operands[0], operands[1]))
13636 emit_move_insn (operands[0], operands[1]);
13637 emit_insn ((mode == DImode
13638 ? gen_x86_shld_1
13639 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13640 ix86_expand_ashl_const (low[0], count, mode);
13641 }
13642 return;
13643 }
13644
13645 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13646
13647 if (operands[1] == const1_rtx)
13648 {
13649 /* Assuming we've chosen a QImode capable registers, then 1 << N
13650 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13651 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13652 {
13653 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13654
13655 ix86_expand_clear (low[0]);
13656 ix86_expand_clear (high[0]);
13657 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13658
13659 d = gen_lowpart (QImode, low[0]);
13660 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13661 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13662 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13663
13664 d = gen_lowpart (QImode, high[0]);
13665 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13666 s = gen_rtx_NE (QImode, flags, const0_rtx);
13667 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13668 }
13669
13670 /* Otherwise, we can get the same results by manually performing
13671 a bit extract operation on bit 5/6, and then performing the two
13672 shifts. The two methods of getting 0/1 into low/high are exactly
13673 the same size. Avoiding the shift in the bit extract case helps
13674 pentium4 a bit; no one else seems to care much either way. */
13675 else
13676 {
13677 rtx x;
13678
13679 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13680 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13681 else
13682 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13683 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13684
13685 emit_insn ((mode == DImode
13686 ? gen_lshrsi3
13687 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13688 emit_insn ((mode == DImode
13689 ? gen_andsi3
13690 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13691 emit_move_insn (low[0], high[0]);
13692 emit_insn ((mode == DImode
13693 ? gen_xorsi3
13694 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13695 }
13696
13697 emit_insn ((mode == DImode
13698 ? gen_ashlsi3
13699 : gen_ashldi3) (low[0], low[0], operands[2]));
13700 emit_insn ((mode == DImode
13701 ? gen_ashlsi3
13702 : gen_ashldi3) (high[0], high[0], operands[2]));
13703 return;
13704 }
13705
13706 if (operands[1] == constm1_rtx)
13707 {
13708 /* For -1 << N, we can avoid the shld instruction, because we
13709 know that we're shifting 0...31/63 ones into a -1. */
13710 emit_move_insn (low[0], constm1_rtx);
13711 if (optimize_size)
13712 emit_move_insn (high[0], low[0]);
13713 else
13714 emit_move_insn (high[0], constm1_rtx);
13715 }
13716 else
13717 {
13718 if (!rtx_equal_p (operands[0], operands[1]))
13719 emit_move_insn (operands[0], operands[1]);
13720
13721 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13722 emit_insn ((mode == DImode
13723 ? gen_x86_shld_1
13724 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13725 }
13726
13727 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13728
13729 if (TARGET_CMOVE && scratch)
13730 {
13731 ix86_expand_clear (scratch);
13732 emit_insn ((mode == DImode
13733 ? gen_x86_shift_adj_1
13734 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13735 }
13736 else
13737 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13738 }
13739
13740 void
13741 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13742 {
13743 rtx low[2], high[2];
13744 int count;
13745 const int single_width = mode == DImode ? 32 : 64;
13746
13747 if (CONST_INT_P (operands[2]))
13748 {
13749 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13750 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13751
13752 if (count == single_width * 2 - 1)
13753 {
13754 emit_move_insn (high[0], high[1]);
13755 emit_insn ((mode == DImode
13756 ? gen_ashrsi3
13757 : gen_ashrdi3) (high[0], high[0],
13758 GEN_INT (single_width - 1)));
13759 emit_move_insn (low[0], high[0]);
13760
13761 }
13762 else if (count >= single_width)
13763 {
13764 emit_move_insn (low[0], high[1]);
13765 emit_move_insn (high[0], low[0]);
13766 emit_insn ((mode == DImode
13767 ? gen_ashrsi3
13768 : gen_ashrdi3) (high[0], high[0],
13769 GEN_INT (single_width - 1)));
13770 if (count > single_width)
13771 emit_insn ((mode == DImode
13772 ? gen_ashrsi3
13773 : gen_ashrdi3) (low[0], low[0],
13774 GEN_INT (count - single_width)));
13775 }
13776 else
13777 {
13778 if (!rtx_equal_p (operands[0], operands[1]))
13779 emit_move_insn (operands[0], operands[1]);
13780 emit_insn ((mode == DImode
13781 ? gen_x86_shrd_1
13782 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13783 emit_insn ((mode == DImode
13784 ? gen_ashrsi3
13785 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13786 }
13787 }
13788 else
13789 {
13790 if (!rtx_equal_p (operands[0], operands[1]))
13791 emit_move_insn (operands[0], operands[1]);
13792
13793 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13794
13795 emit_insn ((mode == DImode
13796 ? gen_x86_shrd_1
13797 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13798 emit_insn ((mode == DImode
13799 ? gen_ashrsi3
13800 : gen_ashrdi3) (high[0], high[0], operands[2]));
13801
13802 if (TARGET_CMOVE && scratch)
13803 {
13804 emit_move_insn (scratch, high[0]);
13805 emit_insn ((mode == DImode
13806 ? gen_ashrsi3
13807 : gen_ashrdi3) (scratch, scratch,
13808 GEN_INT (single_width - 1)));
13809 emit_insn ((mode == DImode
13810 ? gen_x86_shift_adj_1
13811 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13812 scratch));
13813 }
13814 else
13815 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13816 }
13817 }
13818
13819 void
13820 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13821 {
13822 rtx low[2], high[2];
13823 int count;
13824 const int single_width = mode == DImode ? 32 : 64;
13825
13826 if (CONST_INT_P (operands[2]))
13827 {
13828 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13829 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13830
13831 if (count >= single_width)
13832 {
13833 emit_move_insn (low[0], high[1]);
13834 ix86_expand_clear (high[0]);
13835
13836 if (count > single_width)
13837 emit_insn ((mode == DImode
13838 ? gen_lshrsi3
13839 : gen_lshrdi3) (low[0], low[0],
13840 GEN_INT (count - single_width)));
13841 }
13842 else
13843 {
13844 if (!rtx_equal_p (operands[0], operands[1]))
13845 emit_move_insn (operands[0], operands[1]);
13846 emit_insn ((mode == DImode
13847 ? gen_x86_shrd_1
13848 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13849 emit_insn ((mode == DImode
13850 ? gen_lshrsi3
13851 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13852 }
13853 }
13854 else
13855 {
13856 if (!rtx_equal_p (operands[0], operands[1]))
13857 emit_move_insn (operands[0], operands[1]);
13858
13859 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13860
13861 emit_insn ((mode == DImode
13862 ? gen_x86_shrd_1
13863 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13864 emit_insn ((mode == DImode
13865 ? gen_lshrsi3
13866 : gen_lshrdi3) (high[0], high[0], operands[2]));
13867
13868 /* Heh. By reversing the arguments, we can reuse this pattern. */
13869 if (TARGET_CMOVE && scratch)
13870 {
13871 ix86_expand_clear (scratch);
13872 emit_insn ((mode == DImode
13873 ? gen_x86_shift_adj_1
13874 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13875 scratch));
13876 }
13877 else
13878 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13879 }
13880 }
13881
13882 /* Predict just emitted jump instruction to be taken with probability PROB. */
13883 static void
13884 predict_jump (int prob)
13885 {
13886 rtx insn = get_last_insn ();
13887 gcc_assert (JUMP_P (insn));
13888 REG_NOTES (insn)
13889 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13890 GEN_INT (prob),
13891 REG_NOTES (insn));
13892 }
13893
13894 /* Helper function for the string operations below. Dest VARIABLE whether
13895 it is aligned to VALUE bytes. If true, jump to the label. */
13896 static rtx
13897 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13898 {
13899 rtx label = gen_label_rtx ();
13900 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13901 if (GET_MODE (variable) == DImode)
13902 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13903 else
13904 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13905 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13906 1, label);
13907 if (epilogue)
13908 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13909 else
13910 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13911 return label;
13912 }
13913
13914 /* Adjust COUNTER by the VALUE. */
13915 static void
13916 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13917 {
13918 if (GET_MODE (countreg) == DImode)
13919 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13920 else
13921 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13922 }
13923
13924 /* Zero extend possibly SImode EXP to Pmode register. */
13925 rtx
13926 ix86_zero_extend_to_Pmode (rtx exp)
13927 {
13928 rtx r;
13929 if (GET_MODE (exp) == VOIDmode)
13930 return force_reg (Pmode, exp);
13931 if (GET_MODE (exp) == Pmode)
13932 return copy_to_mode_reg (Pmode, exp);
13933 r = gen_reg_rtx (Pmode);
13934 emit_insn (gen_zero_extendsidi2 (r, exp));
13935 return r;
13936 }
13937
13938 /* Divide COUNTREG by SCALE. */
13939 static rtx
13940 scale_counter (rtx countreg, int scale)
13941 {
13942 rtx sc;
13943 rtx piece_size_mask;
13944
13945 if (scale == 1)
13946 return countreg;
13947 if (CONST_INT_P (countreg))
13948 return GEN_INT (INTVAL (countreg) / scale);
13949 gcc_assert (REG_P (countreg));
13950
13951 piece_size_mask = GEN_INT (scale - 1);
13952 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13953 GEN_INT (exact_log2 (scale)),
13954 NULL, 1, OPTAB_DIRECT);
13955 return sc;
13956 }
13957
13958 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13959 DImode for constant loop counts. */
13960
13961 static enum machine_mode
13962 counter_mode (rtx count_exp)
13963 {
13964 if (GET_MODE (count_exp) != VOIDmode)
13965 return GET_MODE (count_exp);
13966 if (GET_CODE (count_exp) != CONST_INT)
13967 return Pmode;
13968 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13969 return DImode;
13970 return SImode;
13971 }
13972
13973 /* When SRCPTR is non-NULL, output simple loop to move memory
13974 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13975 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13976 equivalent loop to set memory by VALUE (supposed to be in MODE).
13977
13978 The size is rounded down to whole number of chunk size moved at once.
13979 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13980
13981
13982 static void
13983 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13984 rtx destptr, rtx srcptr, rtx value,
13985 rtx count, enum machine_mode mode, int unroll,
13986 int expected_size)
13987 {
13988 rtx out_label, top_label, iter, tmp;
13989 enum machine_mode iter_mode = counter_mode (count);
13990 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13991 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13992 rtx size;
13993 rtx x_addr;
13994 rtx y_addr;
13995 int i;
13996
13997 top_label = gen_label_rtx ();
13998 out_label = gen_label_rtx ();
13999 iter = gen_reg_rtx (iter_mode);
14000
14001 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14002 NULL, 1, OPTAB_DIRECT);
14003 /* Those two should combine. */
14004 if (piece_size == const1_rtx)
14005 {
14006 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14007 true, out_label);
14008 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14009 }
14010 emit_move_insn (iter, const0_rtx);
14011
14012 emit_label (top_label);
14013
14014 tmp = convert_modes (Pmode, iter_mode, iter, true);
14015 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14016 destmem = change_address (destmem, mode, x_addr);
14017
14018 if (srcmem)
14019 {
14020 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14021 srcmem = change_address (srcmem, mode, y_addr);
14022
14023 /* When unrolling for chips that reorder memory reads and writes,
14024 we can save registers by using single temporary.
14025 Also using 4 temporaries is overkill in 32bit mode. */
14026 if (!TARGET_64BIT && 0)
14027 {
14028 for (i = 0; i < unroll; i++)
14029 {
14030 if (i)
14031 {
14032 destmem =
14033 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14034 srcmem =
14035 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14036 }
14037 emit_move_insn (destmem, srcmem);
14038 }
14039 }
14040 else
14041 {
14042 rtx tmpreg[4];
14043 gcc_assert (unroll <= 4);
14044 for (i = 0; i < unroll; i++)
14045 {
14046 tmpreg[i] = gen_reg_rtx (mode);
14047 if (i)
14048 {
14049 srcmem =
14050 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14051 }
14052 emit_move_insn (tmpreg[i], srcmem);
14053 }
14054 for (i = 0; i < unroll; i++)
14055 {
14056 if (i)
14057 {
14058 destmem =
14059 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14060 }
14061 emit_move_insn (destmem, tmpreg[i]);
14062 }
14063 }
14064 }
14065 else
14066 for (i = 0; i < unroll; i++)
14067 {
14068 if (i)
14069 destmem =
14070 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14071 emit_move_insn (destmem, value);
14072 }
14073
14074 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14075 true, OPTAB_LIB_WIDEN);
14076 if (tmp != iter)
14077 emit_move_insn (iter, tmp);
14078
14079 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14080 true, top_label);
14081 if (expected_size != -1)
14082 {
14083 expected_size /= GET_MODE_SIZE (mode) * unroll;
14084 if (expected_size == 0)
14085 predict_jump (0);
14086 else if (expected_size > REG_BR_PROB_BASE)
14087 predict_jump (REG_BR_PROB_BASE - 1);
14088 else
14089 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14090 }
14091 else
14092 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14093 iter = ix86_zero_extend_to_Pmode (iter);
14094 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14095 true, OPTAB_LIB_WIDEN);
14096 if (tmp != destptr)
14097 emit_move_insn (destptr, tmp);
14098 if (srcptr)
14099 {
14100 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14101 true, OPTAB_LIB_WIDEN);
14102 if (tmp != srcptr)
14103 emit_move_insn (srcptr, tmp);
14104 }
14105 emit_label (out_label);
14106 }
14107
14108 /* Output "rep; mov" instruction.
14109 Arguments have same meaning as for previous function */
14110 static void
14111 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14112 rtx destptr, rtx srcptr,
14113 rtx count,
14114 enum machine_mode mode)
14115 {
14116 rtx destexp;
14117 rtx srcexp;
14118 rtx countreg;
14119
14120 /* If the size is known, it is shorter to use rep movs. */
14121 if (mode == QImode && CONST_INT_P (count)
14122 && !(INTVAL (count) & 3))
14123 mode = SImode;
14124
14125 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14126 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14127 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14128 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14129 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14130 if (mode != QImode)
14131 {
14132 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14133 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14134 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14135 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14136 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14137 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14138 }
14139 else
14140 {
14141 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14142 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14143 }
14144 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14145 destexp, srcexp));
14146 }
14147
14148 /* Output "rep; stos" instruction.
14149 Arguments have same meaning as for previous function */
14150 static void
14151 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14152 rtx count,
14153 enum machine_mode mode)
14154 {
14155 rtx destexp;
14156 rtx countreg;
14157
14158 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14159 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14160 value = force_reg (mode, gen_lowpart (mode, value));
14161 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14162 if (mode != QImode)
14163 {
14164 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14165 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14166 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14167 }
14168 else
14169 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14170 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14171 }
14172
14173 static void
14174 emit_strmov (rtx destmem, rtx srcmem,
14175 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14176 {
14177 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14178 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14179 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14180 }
14181
14182 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14183 static void
14184 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14185 rtx destptr, rtx srcptr, rtx count, int max_size)
14186 {
14187 rtx src, dest;
14188 if (CONST_INT_P (count))
14189 {
14190 HOST_WIDE_INT countval = INTVAL (count);
14191 int offset = 0;
14192
14193 if ((countval & 0x10) && max_size > 16)
14194 {
14195 if (TARGET_64BIT)
14196 {
14197 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14198 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14199 }
14200 else
14201 gcc_unreachable ();
14202 offset += 16;
14203 }
14204 if ((countval & 0x08) && max_size > 8)
14205 {
14206 if (TARGET_64BIT)
14207 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14208 else
14209 {
14210 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14211 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14212 }
14213 offset += 8;
14214 }
14215 if ((countval & 0x04) && max_size > 4)
14216 {
14217 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14218 offset += 4;
14219 }
14220 if ((countval & 0x02) && max_size > 2)
14221 {
14222 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14223 offset += 2;
14224 }
14225 if ((countval & 0x01) && max_size > 1)
14226 {
14227 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14228 offset += 1;
14229 }
14230 return;
14231 }
14232 if (max_size > 8)
14233 {
14234 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14235 count, 1, OPTAB_DIRECT);
14236 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14237 count, QImode, 1, 4);
14238 return;
14239 }
14240
14241 /* When there are stringops, we can cheaply increase dest and src pointers.
14242 Otherwise we save code size by maintaining offset (zero is readily
14243 available from preceding rep operation) and using x86 addressing modes.
14244 */
14245 if (TARGET_SINGLE_STRINGOP)
14246 {
14247 if (max_size > 4)
14248 {
14249 rtx label = ix86_expand_aligntest (count, 4, true);
14250 src = change_address (srcmem, SImode, srcptr);
14251 dest = change_address (destmem, SImode, destptr);
14252 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14253 emit_label (label);
14254 LABEL_NUSES (label) = 1;
14255 }
14256 if (max_size > 2)
14257 {
14258 rtx label = ix86_expand_aligntest (count, 2, true);
14259 src = change_address (srcmem, HImode, srcptr);
14260 dest = change_address (destmem, HImode, destptr);
14261 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14262 emit_label (label);
14263 LABEL_NUSES (label) = 1;
14264 }
14265 if (max_size > 1)
14266 {
14267 rtx label = ix86_expand_aligntest (count, 1, true);
14268 src = change_address (srcmem, QImode, srcptr);
14269 dest = change_address (destmem, QImode, destptr);
14270 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14271 emit_label (label);
14272 LABEL_NUSES (label) = 1;
14273 }
14274 }
14275 else
14276 {
14277 rtx offset = force_reg (Pmode, const0_rtx);
14278 rtx tmp;
14279
14280 if (max_size > 4)
14281 {
14282 rtx label = ix86_expand_aligntest (count, 4, true);
14283 src = change_address (srcmem, SImode, srcptr);
14284 dest = change_address (destmem, SImode, destptr);
14285 emit_move_insn (dest, src);
14286 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14287 true, OPTAB_LIB_WIDEN);
14288 if (tmp != offset)
14289 emit_move_insn (offset, tmp);
14290 emit_label (label);
14291 LABEL_NUSES (label) = 1;
14292 }
14293 if (max_size > 2)
14294 {
14295 rtx label = ix86_expand_aligntest (count, 2, true);
14296 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14297 src = change_address (srcmem, HImode, tmp);
14298 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14299 dest = change_address (destmem, HImode, tmp);
14300 emit_move_insn (dest, src);
14301 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14302 true, OPTAB_LIB_WIDEN);
14303 if (tmp != offset)
14304 emit_move_insn (offset, tmp);
14305 emit_label (label);
14306 LABEL_NUSES (label) = 1;
14307 }
14308 if (max_size > 1)
14309 {
14310 rtx label = ix86_expand_aligntest (count, 1, true);
14311 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14312 src = change_address (srcmem, QImode, tmp);
14313 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14314 dest = change_address (destmem, QImode, tmp);
14315 emit_move_insn (dest, src);
14316 emit_label (label);
14317 LABEL_NUSES (label) = 1;
14318 }
14319 }
14320 }
14321
14322 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14323 static void
14324 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14325 rtx count, int max_size)
14326 {
14327 count =
14328 expand_simple_binop (counter_mode (count), AND, count,
14329 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14330 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14331 gen_lowpart (QImode, value), count, QImode,
14332 1, max_size / 2);
14333 }
14334
14335 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14336 static void
14337 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14338 {
14339 rtx dest;
14340
14341 if (CONST_INT_P (count))
14342 {
14343 HOST_WIDE_INT countval = INTVAL (count);
14344 int offset = 0;
14345
14346 if ((countval & 0x10) && max_size > 16)
14347 {
14348 if (TARGET_64BIT)
14349 {
14350 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14351 emit_insn (gen_strset (destptr, dest, value));
14352 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14353 emit_insn (gen_strset (destptr, dest, value));
14354 }
14355 else
14356 gcc_unreachable ();
14357 offset += 16;
14358 }
14359 if ((countval & 0x08) && max_size > 8)
14360 {
14361 if (TARGET_64BIT)
14362 {
14363 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14364 emit_insn (gen_strset (destptr, dest, value));
14365 }
14366 else
14367 {
14368 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14369 emit_insn (gen_strset (destptr, dest, value));
14370 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14371 emit_insn (gen_strset (destptr, dest, value));
14372 }
14373 offset += 8;
14374 }
14375 if ((countval & 0x04) && max_size > 4)
14376 {
14377 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14378 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14379 offset += 4;
14380 }
14381 if ((countval & 0x02) && max_size > 2)
14382 {
14383 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14384 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14385 offset += 2;
14386 }
14387 if ((countval & 0x01) && max_size > 1)
14388 {
14389 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14390 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14391 offset += 1;
14392 }
14393 return;
14394 }
14395 if (max_size > 32)
14396 {
14397 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14398 return;
14399 }
14400 if (max_size > 16)
14401 {
14402 rtx label = ix86_expand_aligntest (count, 16, true);
14403 if (TARGET_64BIT)
14404 {
14405 dest = change_address (destmem, DImode, destptr);
14406 emit_insn (gen_strset (destptr, dest, value));
14407 emit_insn (gen_strset (destptr, dest, value));
14408 }
14409 else
14410 {
14411 dest = change_address (destmem, SImode, destptr);
14412 emit_insn (gen_strset (destptr, dest, value));
14413 emit_insn (gen_strset (destptr, dest, value));
14414 emit_insn (gen_strset (destptr, dest, value));
14415 emit_insn (gen_strset (destptr, dest, value));
14416 }
14417 emit_label (label);
14418 LABEL_NUSES (label) = 1;
14419 }
14420 if (max_size > 8)
14421 {
14422 rtx label = ix86_expand_aligntest (count, 8, true);
14423 if (TARGET_64BIT)
14424 {
14425 dest = change_address (destmem, DImode, destptr);
14426 emit_insn (gen_strset (destptr, dest, value));
14427 }
14428 else
14429 {
14430 dest = change_address (destmem, SImode, destptr);
14431 emit_insn (gen_strset (destptr, dest, value));
14432 emit_insn (gen_strset (destptr, dest, value));
14433 }
14434 emit_label (label);
14435 LABEL_NUSES (label) = 1;
14436 }
14437 if (max_size > 4)
14438 {
14439 rtx label = ix86_expand_aligntest (count, 4, true);
14440 dest = change_address (destmem, SImode, destptr);
14441 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14442 emit_label (label);
14443 LABEL_NUSES (label) = 1;
14444 }
14445 if (max_size > 2)
14446 {
14447 rtx label = ix86_expand_aligntest (count, 2, true);
14448 dest = change_address (destmem, HImode, destptr);
14449 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14450 emit_label (label);
14451 LABEL_NUSES (label) = 1;
14452 }
14453 if (max_size > 1)
14454 {
14455 rtx label = ix86_expand_aligntest (count, 1, true);
14456 dest = change_address (destmem, QImode, destptr);
14457 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14458 emit_label (label);
14459 LABEL_NUSES (label) = 1;
14460 }
14461 }
14462
14463 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14464 DESIRED_ALIGNMENT. */
14465 static void
14466 expand_movmem_prologue (rtx destmem, rtx srcmem,
14467 rtx destptr, rtx srcptr, rtx count,
14468 int align, int desired_alignment)
14469 {
14470 if (align <= 1 && desired_alignment > 1)
14471 {
14472 rtx label = ix86_expand_aligntest (destptr, 1, false);
14473 srcmem = change_address (srcmem, QImode, srcptr);
14474 destmem = change_address (destmem, QImode, destptr);
14475 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14476 ix86_adjust_counter (count, 1);
14477 emit_label (label);
14478 LABEL_NUSES (label) = 1;
14479 }
14480 if (align <= 2 && desired_alignment > 2)
14481 {
14482 rtx label = ix86_expand_aligntest (destptr, 2, false);
14483 srcmem = change_address (srcmem, HImode, srcptr);
14484 destmem = change_address (destmem, HImode, destptr);
14485 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14486 ix86_adjust_counter (count, 2);
14487 emit_label (label);
14488 LABEL_NUSES (label) = 1;
14489 }
14490 if (align <= 4 && desired_alignment > 4)
14491 {
14492 rtx label = ix86_expand_aligntest (destptr, 4, false);
14493 srcmem = change_address (srcmem, SImode, srcptr);
14494 destmem = change_address (destmem, SImode, destptr);
14495 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14496 ix86_adjust_counter (count, 4);
14497 emit_label (label);
14498 LABEL_NUSES (label) = 1;
14499 }
14500 gcc_assert (desired_alignment <= 8);
14501 }
14502
14503 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14504 DESIRED_ALIGNMENT. */
14505 static void
14506 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14507 int align, int desired_alignment)
14508 {
14509 if (align <= 1 && desired_alignment > 1)
14510 {
14511 rtx label = ix86_expand_aligntest (destptr, 1, false);
14512 destmem = change_address (destmem, QImode, destptr);
14513 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14514 ix86_adjust_counter (count, 1);
14515 emit_label (label);
14516 LABEL_NUSES (label) = 1;
14517 }
14518 if (align <= 2 && desired_alignment > 2)
14519 {
14520 rtx label = ix86_expand_aligntest (destptr, 2, false);
14521 destmem = change_address (destmem, HImode, destptr);
14522 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14523 ix86_adjust_counter (count, 2);
14524 emit_label (label);
14525 LABEL_NUSES (label) = 1;
14526 }
14527 if (align <= 4 && desired_alignment > 4)
14528 {
14529 rtx label = ix86_expand_aligntest (destptr, 4, false);
14530 destmem = change_address (destmem, SImode, destptr);
14531 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14532 ix86_adjust_counter (count, 4);
14533 emit_label (label);
14534 LABEL_NUSES (label) = 1;
14535 }
14536 gcc_assert (desired_alignment <= 8);
14537 }
14538
14539 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14540 static enum stringop_alg
14541 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14542 int *dynamic_check)
14543 {
14544 const struct stringop_algs * algs;
14545
14546 *dynamic_check = -1;
14547 if (memset)
14548 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14549 else
14550 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14551 if (stringop_alg != no_stringop)
14552 return stringop_alg;
14553 /* rep; movq or rep; movl is the smallest variant. */
14554 else if (optimize_size)
14555 {
14556 if (!count || (count & 3))
14557 return rep_prefix_1_byte;
14558 else
14559 return rep_prefix_4_byte;
14560 }
14561 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14562 */
14563 else if (expected_size != -1 && expected_size < 4)
14564 return loop_1_byte;
14565 else if (expected_size != -1)
14566 {
14567 unsigned int i;
14568 enum stringop_alg alg = libcall;
14569 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14570 {
14571 gcc_assert (algs->size[i].max);
14572 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14573 {
14574 if (algs->size[i].alg != libcall)
14575 alg = algs->size[i].alg;
14576 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14577 last non-libcall inline algorithm. */
14578 if (TARGET_INLINE_ALL_STRINGOPS)
14579 {
14580 /* When the current size is best to be copied by a libcall,
14581 but we are still forced to inline, run the heuristic bellow
14582 that will pick code for medium sized blocks. */
14583 if (alg != libcall)
14584 return alg;
14585 break;
14586 }
14587 else
14588 return algs->size[i].alg;
14589 }
14590 }
14591 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14592 }
14593 /* When asked to inline the call anyway, try to pick meaningful choice.
14594 We look for maximal size of block that is faster to copy by hand and
14595 take blocks of at most of that size guessing that average size will
14596 be roughly half of the block.
14597
14598 If this turns out to be bad, we might simply specify the preferred
14599 choice in ix86_costs. */
14600 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14601 && algs->unknown_size == libcall)
14602 {
14603 int max = -1;
14604 enum stringop_alg alg;
14605 int i;
14606
14607 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14608 if (algs->size[i].alg != libcall && algs->size[i].alg)
14609 max = algs->size[i].max;
14610 if (max == -1)
14611 max = 4096;
14612 alg = decide_alg (count, max / 2, memset, dynamic_check);
14613 gcc_assert (*dynamic_check == -1);
14614 gcc_assert (alg != libcall);
14615 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14616 *dynamic_check = max;
14617 return alg;
14618 }
14619 return algs->unknown_size;
14620 }
14621
14622 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14623 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14624 static int
14625 decide_alignment (int align,
14626 enum stringop_alg alg,
14627 int expected_size)
14628 {
14629 int desired_align = 0;
14630 switch (alg)
14631 {
14632 case no_stringop:
14633 gcc_unreachable ();
14634 case loop:
14635 case unrolled_loop:
14636 desired_align = GET_MODE_SIZE (Pmode);
14637 break;
14638 case rep_prefix_8_byte:
14639 desired_align = 8;
14640 break;
14641 case rep_prefix_4_byte:
14642 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14643 copying whole cacheline at once. */
14644 if (TARGET_PENTIUMPRO)
14645 desired_align = 8;
14646 else
14647 desired_align = 4;
14648 break;
14649 case rep_prefix_1_byte:
14650 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14651 copying whole cacheline at once. */
14652 if (TARGET_PENTIUMPRO)
14653 desired_align = 8;
14654 else
14655 desired_align = 1;
14656 break;
14657 case loop_1_byte:
14658 desired_align = 1;
14659 break;
14660 case libcall:
14661 return 0;
14662 }
14663
14664 if (optimize_size)
14665 desired_align = 1;
14666 if (desired_align < align)
14667 desired_align = align;
14668 if (expected_size != -1 && expected_size < 4)
14669 desired_align = align;
14670 return desired_align;
14671 }
14672
14673 /* Return the smallest power of 2 greater than VAL. */
14674 static int
14675 smallest_pow2_greater_than (int val)
14676 {
14677 int ret = 1;
14678 while (ret <= val)
14679 ret <<= 1;
14680 return ret;
14681 }
14682
14683 /* Expand string move (memcpy) operation. Use i386 string operations when
14684 profitable. expand_clrmem contains similar code. The code depends upon
14685 architecture, block size and alignment, but always has the same
14686 overall structure:
14687
14688 1) Prologue guard: Conditional that jumps up to epilogues for small
14689 blocks that can be handled by epilogue alone. This is faster but
14690 also needed for correctness, since prologue assume the block is larger
14691 than the desired alignment.
14692
14693 Optional dynamic check for size and libcall for large
14694 blocks is emitted here too, with -minline-stringops-dynamically.
14695
14696 2) Prologue: copy first few bytes in order to get destination aligned
14697 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14698 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14699 We emit either a jump tree on power of two sized blocks, or a byte loop.
14700
14701 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14702 with specified algorithm.
14703
14704 4) Epilogue: code copying tail of the block that is too small to be
14705 handled by main body (or up to size guarded by prologue guard). */
14706
14707 int
14708 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14709 rtx expected_align_exp, rtx expected_size_exp)
14710 {
14711 rtx destreg;
14712 rtx srcreg;
14713 rtx label = NULL;
14714 rtx tmp;
14715 rtx jump_around_label = NULL;
14716 HOST_WIDE_INT align = 1;
14717 unsigned HOST_WIDE_INT count = 0;
14718 HOST_WIDE_INT expected_size = -1;
14719 int size_needed = 0, epilogue_size_needed;
14720 int desired_align = 0;
14721 enum stringop_alg alg;
14722 int dynamic_check;
14723
14724 if (CONST_INT_P (align_exp))
14725 align = INTVAL (align_exp);
14726 /* i386 can do misaligned access on reasonably increased cost. */
14727 if (CONST_INT_P (expected_align_exp)
14728 && INTVAL (expected_align_exp) > align)
14729 align = INTVAL (expected_align_exp);
14730 if (CONST_INT_P (count_exp))
14731 count = expected_size = INTVAL (count_exp);
14732 if (CONST_INT_P (expected_size_exp) && count == 0)
14733 expected_size = INTVAL (expected_size_exp);
14734
14735 /* Step 0: Decide on preferred algorithm, desired alignment and
14736 size of chunks to be copied by main loop. */
14737
14738 alg = decide_alg (count, expected_size, false, &dynamic_check);
14739 desired_align = decide_alignment (align, alg, expected_size);
14740
14741 if (!TARGET_ALIGN_STRINGOPS)
14742 align = desired_align;
14743
14744 if (alg == libcall)
14745 return 0;
14746 gcc_assert (alg != no_stringop);
14747 if (!count)
14748 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14749 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14750 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14751 switch (alg)
14752 {
14753 case libcall:
14754 case no_stringop:
14755 gcc_unreachable ();
14756 case loop:
14757 size_needed = GET_MODE_SIZE (Pmode);
14758 break;
14759 case unrolled_loop:
14760 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14761 break;
14762 case rep_prefix_8_byte:
14763 size_needed = 8;
14764 break;
14765 case rep_prefix_4_byte:
14766 size_needed = 4;
14767 break;
14768 case rep_prefix_1_byte:
14769 case loop_1_byte:
14770 size_needed = 1;
14771 break;
14772 }
14773
14774 epilogue_size_needed = size_needed;
14775
14776 /* Step 1: Prologue guard. */
14777
14778 /* Alignment code needs count to be in register. */
14779 if (CONST_INT_P (count_exp) && desired_align > align)
14780 {
14781 enum machine_mode mode = SImode;
14782 if (TARGET_64BIT && (count & ~0xffffffff))
14783 mode = DImode;
14784 count_exp = force_reg (mode, count_exp);
14785 }
14786 gcc_assert (desired_align >= 1 && align >= 1);
14787
14788 /* Ensure that alignment prologue won't copy past end of block. */
14789 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14790 {
14791 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14792 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14793 Make sure it is power of 2. */
14794 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14795
14796 label = gen_label_rtx ();
14797 emit_cmp_and_jump_insns (count_exp,
14798 GEN_INT (epilogue_size_needed),
14799 LTU, 0, counter_mode (count_exp), 1, label);
14800 if (GET_CODE (count_exp) == CONST_INT)
14801 ;
14802 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14803 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14804 else
14805 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14806 }
14807 /* Emit code to decide on runtime whether library call or inline should be
14808 used. */
14809 if (dynamic_check != -1)
14810 {
14811 rtx hot_label = gen_label_rtx ();
14812 jump_around_label = gen_label_rtx ();
14813 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14814 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14815 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14816 emit_block_move_via_libcall (dst, src, count_exp, false);
14817 emit_jump (jump_around_label);
14818 emit_label (hot_label);
14819 }
14820
14821 /* Step 2: Alignment prologue. */
14822
14823 if (desired_align > align)
14824 {
14825 /* Except for the first move in epilogue, we no longer know
14826 constant offset in aliasing info. It don't seems to worth
14827 the pain to maintain it for the first move, so throw away
14828 the info early. */
14829 src = change_address (src, BLKmode, srcreg);
14830 dst = change_address (dst, BLKmode, destreg);
14831 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14832 desired_align);
14833 }
14834 if (label && size_needed == 1)
14835 {
14836 emit_label (label);
14837 LABEL_NUSES (label) = 1;
14838 label = NULL;
14839 }
14840
14841 /* Step 3: Main loop. */
14842
14843 switch (alg)
14844 {
14845 case libcall:
14846 case no_stringop:
14847 gcc_unreachable ();
14848 case loop_1_byte:
14849 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14850 count_exp, QImode, 1, expected_size);
14851 break;
14852 case loop:
14853 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14854 count_exp, Pmode, 1, expected_size);
14855 break;
14856 case unrolled_loop:
14857 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14858 registers for 4 temporaries anyway. */
14859 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14860 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14861 expected_size);
14862 break;
14863 case rep_prefix_8_byte:
14864 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14865 DImode);
14866 break;
14867 case rep_prefix_4_byte:
14868 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14869 SImode);
14870 break;
14871 case rep_prefix_1_byte:
14872 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14873 QImode);
14874 break;
14875 }
14876 /* Adjust properly the offset of src and dest memory for aliasing. */
14877 if (CONST_INT_P (count_exp))
14878 {
14879 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14880 (count / size_needed) * size_needed);
14881 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14882 (count / size_needed) * size_needed);
14883 }
14884 else
14885 {
14886 src = change_address (src, BLKmode, srcreg);
14887 dst = change_address (dst, BLKmode, destreg);
14888 }
14889
14890 /* Step 4: Epilogue to copy the remaining bytes. */
14891
14892 if (label)
14893 {
14894 /* When the main loop is done, COUNT_EXP might hold original count,
14895 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14896 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14897 bytes. Compensate if needed. */
14898
14899 if (size_needed < epilogue_size_needed)
14900 {
14901 tmp =
14902 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14903 GEN_INT (size_needed - 1), count_exp, 1,
14904 OPTAB_DIRECT);
14905 if (tmp != count_exp)
14906 emit_move_insn (count_exp, tmp);
14907 }
14908 emit_label (label);
14909 LABEL_NUSES (label) = 1;
14910 }
14911
14912 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14913 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14914 epilogue_size_needed);
14915 if (jump_around_label)
14916 emit_label (jump_around_label);
14917 return 1;
14918 }
14919
14920 /* Helper function for memcpy. For QImode value 0xXY produce
14921 0xXYXYXYXY of wide specified by MODE. This is essentially
14922 a * 0x10101010, but we can do slightly better than
14923 synth_mult by unwinding the sequence by hand on CPUs with
14924 slow multiply. */
14925 static rtx
14926 promote_duplicated_reg (enum machine_mode mode, rtx val)
14927 {
14928 enum machine_mode valmode = GET_MODE (val);
14929 rtx tmp;
14930 int nops = mode == DImode ? 3 : 2;
14931
14932 gcc_assert (mode == SImode || mode == DImode);
14933 if (val == const0_rtx)
14934 return copy_to_mode_reg (mode, const0_rtx);
14935 if (CONST_INT_P (val))
14936 {
14937 HOST_WIDE_INT v = INTVAL (val) & 255;
14938
14939 v |= v << 8;
14940 v |= v << 16;
14941 if (mode == DImode)
14942 v |= (v << 16) << 16;
14943 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14944 }
14945
14946 if (valmode == VOIDmode)
14947 valmode = QImode;
14948 if (valmode != QImode)
14949 val = gen_lowpart (QImode, val);
14950 if (mode == QImode)
14951 return val;
14952 if (!TARGET_PARTIAL_REG_STALL)
14953 nops--;
14954 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14955 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14956 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14957 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14958 {
14959 rtx reg = convert_modes (mode, QImode, val, true);
14960 tmp = promote_duplicated_reg (mode, const1_rtx);
14961 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14962 OPTAB_DIRECT);
14963 }
14964 else
14965 {
14966 rtx reg = convert_modes (mode, QImode, val, true);
14967
14968 if (!TARGET_PARTIAL_REG_STALL)
14969 if (mode == SImode)
14970 emit_insn (gen_movsi_insv_1 (reg, reg));
14971 else
14972 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14973 else
14974 {
14975 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14976 NULL, 1, OPTAB_DIRECT);
14977 reg =
14978 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14979 }
14980 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14981 NULL, 1, OPTAB_DIRECT);
14982 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14983 if (mode == SImode)
14984 return reg;
14985 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14986 NULL, 1, OPTAB_DIRECT);
14987 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14988 return reg;
14989 }
14990 }
14991
14992 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14993 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14994 alignment from ALIGN to DESIRED_ALIGN. */
14995 static rtx
14996 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14997 {
14998 rtx promoted_val;
14999
15000 if (TARGET_64BIT
15001 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15002 promoted_val = promote_duplicated_reg (DImode, val);
15003 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15004 promoted_val = promote_duplicated_reg (SImode, val);
15005 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15006 promoted_val = promote_duplicated_reg (HImode, val);
15007 else
15008 promoted_val = val;
15009
15010 return promoted_val;
15011 }
15012
15013 /* Expand string clear operation (bzero). Use i386 string operations when
15014 profitable. See expand_movmem comment for explanation of individual
15015 steps performed. */
15016 int
15017 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15018 rtx expected_align_exp, rtx expected_size_exp)
15019 {
15020 rtx destreg;
15021 rtx label = NULL;
15022 rtx tmp;
15023 rtx jump_around_label = NULL;
15024 HOST_WIDE_INT align = 1;
15025 unsigned HOST_WIDE_INT count = 0;
15026 HOST_WIDE_INT expected_size = -1;
15027 int size_needed = 0, epilogue_size_needed;
15028 int desired_align = 0;
15029 enum stringop_alg alg;
15030 rtx promoted_val = NULL;
15031 bool force_loopy_epilogue = false;
15032 int dynamic_check;
15033
15034 if (CONST_INT_P (align_exp))
15035 align = INTVAL (align_exp);
15036 /* i386 can do misaligned access on reasonably increased cost. */
15037 if (CONST_INT_P (expected_align_exp)
15038 && INTVAL (expected_align_exp) > align)
15039 align = INTVAL (expected_align_exp);
15040 if (CONST_INT_P (count_exp))
15041 count = expected_size = INTVAL (count_exp);
15042 if (CONST_INT_P (expected_size_exp) && count == 0)
15043 expected_size = INTVAL (expected_size_exp);
15044
15045 /* Step 0: Decide on preferred algorithm, desired alignment and
15046 size of chunks to be copied by main loop. */
15047
15048 alg = decide_alg (count, expected_size, true, &dynamic_check);
15049 desired_align = decide_alignment (align, alg, expected_size);
15050
15051 if (!TARGET_ALIGN_STRINGOPS)
15052 align = desired_align;
15053
15054 if (alg == libcall)
15055 return 0;
15056 gcc_assert (alg != no_stringop);
15057 if (!count)
15058 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15059 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15060 switch (alg)
15061 {
15062 case libcall:
15063 case no_stringop:
15064 gcc_unreachable ();
15065 case loop:
15066 size_needed = GET_MODE_SIZE (Pmode);
15067 break;
15068 case unrolled_loop:
15069 size_needed = GET_MODE_SIZE (Pmode) * 4;
15070 break;
15071 case rep_prefix_8_byte:
15072 size_needed = 8;
15073 break;
15074 case rep_prefix_4_byte:
15075 size_needed = 4;
15076 break;
15077 case rep_prefix_1_byte:
15078 case loop_1_byte:
15079 size_needed = 1;
15080 break;
15081 }
15082 epilogue_size_needed = size_needed;
15083
15084 /* Step 1: Prologue guard. */
15085
15086 /* Alignment code needs count to be in register. */
15087 if (CONST_INT_P (count_exp) && desired_align > align)
15088 {
15089 enum machine_mode mode = SImode;
15090 if (TARGET_64BIT && (count & ~0xffffffff))
15091 mode = DImode;
15092 count_exp = force_reg (mode, count_exp);
15093 }
15094 /* Do the cheap promotion to allow better CSE across the
15095 main loop and epilogue (ie one load of the big constant in the
15096 front of all code. */
15097 if (CONST_INT_P (val_exp))
15098 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15099 desired_align, align);
15100 /* Ensure that alignment prologue won't copy past end of block. */
15101 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15102 {
15103 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15104 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15105 Make sure it is power of 2. */
15106 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15107
15108 /* To improve performance of small blocks, we jump around the VAL
15109 promoting mode. This mean that if the promoted VAL is not constant,
15110 we might not use it in the epilogue and have to use byte
15111 loop variant. */
15112 if (epilogue_size_needed > 2 && !promoted_val)
15113 force_loopy_epilogue = true;
15114 label = gen_label_rtx ();
15115 emit_cmp_and_jump_insns (count_exp,
15116 GEN_INT (epilogue_size_needed),
15117 LTU, 0, counter_mode (count_exp), 1, label);
15118 if (GET_CODE (count_exp) == CONST_INT)
15119 ;
15120 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15121 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15122 else
15123 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15124 }
15125 if (dynamic_check != -1)
15126 {
15127 rtx hot_label = gen_label_rtx ();
15128 jump_around_label = gen_label_rtx ();
15129 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15130 LEU, 0, counter_mode (count_exp), 1, hot_label);
15131 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15132 set_storage_via_libcall (dst, count_exp, val_exp, false);
15133 emit_jump (jump_around_label);
15134 emit_label (hot_label);
15135 }
15136
15137 /* Step 2: Alignment prologue. */
15138
15139 /* Do the expensive promotion once we branched off the small blocks. */
15140 if (!promoted_val)
15141 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15142 desired_align, align);
15143 gcc_assert (desired_align >= 1 && align >= 1);
15144
15145 if (desired_align > align)
15146 {
15147 /* Except for the first move in epilogue, we no longer know
15148 constant offset in aliasing info. It don't seems to worth
15149 the pain to maintain it for the first move, so throw away
15150 the info early. */
15151 dst = change_address (dst, BLKmode, destreg);
15152 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15153 desired_align);
15154 }
15155 if (label && size_needed == 1)
15156 {
15157 emit_label (label);
15158 LABEL_NUSES (label) = 1;
15159 label = NULL;
15160 }
15161
15162 /* Step 3: Main loop. */
15163
15164 switch (alg)
15165 {
15166 case libcall:
15167 case no_stringop:
15168 gcc_unreachable ();
15169 case loop_1_byte:
15170 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15171 count_exp, QImode, 1, expected_size);
15172 break;
15173 case loop:
15174 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15175 count_exp, Pmode, 1, expected_size);
15176 break;
15177 case unrolled_loop:
15178 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15179 count_exp, Pmode, 4, expected_size);
15180 break;
15181 case rep_prefix_8_byte:
15182 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15183 DImode);
15184 break;
15185 case rep_prefix_4_byte:
15186 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15187 SImode);
15188 break;
15189 case rep_prefix_1_byte:
15190 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15191 QImode);
15192 break;
15193 }
15194 /* Adjust properly the offset of src and dest memory for aliasing. */
15195 if (CONST_INT_P (count_exp))
15196 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15197 (count / size_needed) * size_needed);
15198 else
15199 dst = change_address (dst, BLKmode, destreg);
15200
15201 /* Step 4: Epilogue to copy the remaining bytes. */
15202
15203 if (label)
15204 {
15205 /* When the main loop is done, COUNT_EXP might hold original count,
15206 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15207 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15208 bytes. Compensate if needed. */
15209
15210 if (size_needed < desired_align - align)
15211 {
15212 tmp =
15213 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15214 GEN_INT (size_needed - 1), count_exp, 1,
15215 OPTAB_DIRECT);
15216 size_needed = desired_align - align + 1;
15217 if (tmp != count_exp)
15218 emit_move_insn (count_exp, tmp);
15219 }
15220 emit_label (label);
15221 LABEL_NUSES (label) = 1;
15222 }
15223 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15224 {
15225 if (force_loopy_epilogue)
15226 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15227 size_needed);
15228 else
15229 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15230 size_needed);
15231 }
15232 if (jump_around_label)
15233 emit_label (jump_around_label);
15234 return 1;
15235 }
15236
15237 /* Expand the appropriate insns for doing strlen if not just doing
15238 repnz; scasb
15239
15240 out = result, initialized with the start address
15241 align_rtx = alignment of the address.
15242 scratch = scratch register, initialized with the startaddress when
15243 not aligned, otherwise undefined
15244
15245 This is just the body. It needs the initializations mentioned above and
15246 some address computing at the end. These things are done in i386.md. */
15247
15248 static void
15249 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15250 {
15251 int align;
15252 rtx tmp;
15253 rtx align_2_label = NULL_RTX;
15254 rtx align_3_label = NULL_RTX;
15255 rtx align_4_label = gen_label_rtx ();
15256 rtx end_0_label = gen_label_rtx ();
15257 rtx mem;
15258 rtx tmpreg = gen_reg_rtx (SImode);
15259 rtx scratch = gen_reg_rtx (SImode);
15260 rtx cmp;
15261
15262 align = 0;
15263 if (CONST_INT_P (align_rtx))
15264 align = INTVAL (align_rtx);
15265
15266 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15267
15268 /* Is there a known alignment and is it less than 4? */
15269 if (align < 4)
15270 {
15271 rtx scratch1 = gen_reg_rtx (Pmode);
15272 emit_move_insn (scratch1, out);
15273 /* Is there a known alignment and is it not 2? */
15274 if (align != 2)
15275 {
15276 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15277 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15278
15279 /* Leave just the 3 lower bits. */
15280 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15281 NULL_RTX, 0, OPTAB_WIDEN);
15282
15283 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15284 Pmode, 1, align_4_label);
15285 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15286 Pmode, 1, align_2_label);
15287 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15288 Pmode, 1, align_3_label);
15289 }
15290 else
15291 {
15292 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15293 check if is aligned to 4 - byte. */
15294
15295 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15296 NULL_RTX, 0, OPTAB_WIDEN);
15297
15298 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15299 Pmode, 1, align_4_label);
15300 }
15301
15302 mem = change_address (src, QImode, out);
15303
15304 /* Now compare the bytes. */
15305
15306 /* Compare the first n unaligned byte on a byte per byte basis. */
15307 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15308 QImode, 1, end_0_label);
15309
15310 /* Increment the address. */
15311 if (TARGET_64BIT)
15312 emit_insn (gen_adddi3 (out, out, const1_rtx));
15313 else
15314 emit_insn (gen_addsi3 (out, out, const1_rtx));
15315
15316 /* Not needed with an alignment of 2 */
15317 if (align != 2)
15318 {
15319 emit_label (align_2_label);
15320
15321 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15322 end_0_label);
15323
15324 if (TARGET_64BIT)
15325 emit_insn (gen_adddi3 (out, out, const1_rtx));
15326 else
15327 emit_insn (gen_addsi3 (out, out, const1_rtx));
15328
15329 emit_label (align_3_label);
15330 }
15331
15332 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15333 end_0_label);
15334
15335 if (TARGET_64BIT)
15336 emit_insn (gen_adddi3 (out, out, const1_rtx));
15337 else
15338 emit_insn (gen_addsi3 (out, out, const1_rtx));
15339 }
15340
15341 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15342 align this loop. It gives only huge programs, but does not help to
15343 speed up. */
15344 emit_label (align_4_label);
15345
15346 mem = change_address (src, SImode, out);
15347 emit_move_insn (scratch, mem);
15348 if (TARGET_64BIT)
15349 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15350 else
15351 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15352
15353 /* This formula yields a nonzero result iff one of the bytes is zero.
15354 This saves three branches inside loop and many cycles. */
15355
15356 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15357 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15358 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15359 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15360 gen_int_mode (0x80808080, SImode)));
15361 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15362 align_4_label);
15363
15364 if (TARGET_CMOVE)
15365 {
15366 rtx reg = gen_reg_rtx (SImode);
15367 rtx reg2 = gen_reg_rtx (Pmode);
15368 emit_move_insn (reg, tmpreg);
15369 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15370
15371 /* If zero is not in the first two bytes, move two bytes forward. */
15372 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15373 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15374 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15375 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15376 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15377 reg,
15378 tmpreg)));
15379 /* Emit lea manually to avoid clobbering of flags. */
15380 emit_insn (gen_rtx_SET (SImode, reg2,
15381 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15382
15383 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15384 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15385 emit_insn (gen_rtx_SET (VOIDmode, out,
15386 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15387 reg2,
15388 out)));
15389
15390 }
15391 else
15392 {
15393 rtx end_2_label = gen_label_rtx ();
15394 /* Is zero in the first two bytes? */
15395
15396 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15397 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15398 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15399 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15400 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15401 pc_rtx);
15402 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15403 JUMP_LABEL (tmp) = end_2_label;
15404
15405 /* Not in the first two. Move two bytes forward. */
15406 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15407 if (TARGET_64BIT)
15408 emit_insn (gen_adddi3 (out, out, const2_rtx));
15409 else
15410 emit_insn (gen_addsi3 (out, out, const2_rtx));
15411
15412 emit_label (end_2_label);
15413
15414 }
15415
15416 /* Avoid branch in fixing the byte. */
15417 tmpreg = gen_lowpart (QImode, tmpreg);
15418 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15419 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15420 if (TARGET_64BIT)
15421 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15422 else
15423 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15424
15425 emit_label (end_0_label);
15426 }
15427
15428 /* Expand strlen. */
15429
15430 int
15431 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15432 {
15433 rtx addr, scratch1, scratch2, scratch3, scratch4;
15434
15435 /* The generic case of strlen expander is long. Avoid it's
15436 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15437
15438 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15439 && !TARGET_INLINE_ALL_STRINGOPS
15440 && !optimize_size
15441 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15442 return 0;
15443
15444 addr = force_reg (Pmode, XEXP (src, 0));
15445 scratch1 = gen_reg_rtx (Pmode);
15446
15447 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15448 && !optimize_size)
15449 {
15450 /* Well it seems that some optimizer does not combine a call like
15451 foo(strlen(bar), strlen(bar));
15452 when the move and the subtraction is done here. It does calculate
15453 the length just once when these instructions are done inside of
15454 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15455 often used and I use one fewer register for the lifetime of
15456 output_strlen_unroll() this is better. */
15457
15458 emit_move_insn (out, addr);
15459
15460 ix86_expand_strlensi_unroll_1 (out, src, align);
15461
15462 /* strlensi_unroll_1 returns the address of the zero at the end of
15463 the string, like memchr(), so compute the length by subtracting
15464 the start address. */
15465 if (TARGET_64BIT)
15466 emit_insn (gen_subdi3 (out, out, addr));
15467 else
15468 emit_insn (gen_subsi3 (out, out, addr));
15469 }
15470 else
15471 {
15472 rtx unspec;
15473 scratch2 = gen_reg_rtx (Pmode);
15474 scratch3 = gen_reg_rtx (Pmode);
15475 scratch4 = force_reg (Pmode, constm1_rtx);
15476
15477 emit_move_insn (scratch3, addr);
15478 eoschar = force_reg (QImode, eoschar);
15479
15480 src = replace_equiv_address_nv (src, scratch3);
15481
15482 /* If .md starts supporting :P, this can be done in .md. */
15483 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15484 scratch4), UNSPEC_SCAS);
15485 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15486 if (TARGET_64BIT)
15487 {
15488 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15489 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15490 }
15491 else
15492 {
15493 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15494 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15495 }
15496 }
15497 return 1;
15498 }
15499
15500 /* For given symbol (function) construct code to compute address of it's PLT
15501 entry in large x86-64 PIC model. */
15502 rtx
15503 construct_plt_address (rtx symbol)
15504 {
15505 rtx tmp = gen_reg_rtx (Pmode);
15506 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15507
15508 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15509 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15510
15511 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15512 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15513 return tmp;
15514 }
15515
15516 void
15517 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15518 rtx callarg2 ATTRIBUTE_UNUSED,
15519 rtx pop, int sibcall)
15520 {
15521 rtx use = NULL, call;
15522
15523 if (pop == const0_rtx)
15524 pop = NULL;
15525 gcc_assert (!TARGET_64BIT || !pop);
15526
15527 if (TARGET_MACHO && !TARGET_64BIT)
15528 {
15529 #if TARGET_MACHO
15530 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15531 fnaddr = machopic_indirect_call_target (fnaddr);
15532 #endif
15533 }
15534 else
15535 {
15536 /* Static functions and indirect calls don't need the pic register. */
15537 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15538 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15539 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15540 use_reg (&use, pic_offset_table_rtx);
15541 }
15542
15543 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15544 {
15545 rtx al = gen_rtx_REG (QImode, 0);
15546 emit_move_insn (al, callarg2);
15547 use_reg (&use, al);
15548 }
15549
15550 if (ix86_cmodel == CM_LARGE_PIC
15551 && GET_CODE (fnaddr) == MEM
15552 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15553 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15554 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15555 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15556 {
15557 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15558 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15559 }
15560 if (sibcall && TARGET_64BIT
15561 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15562 {
15563 rtx addr;
15564 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15565 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15566 emit_move_insn (fnaddr, addr);
15567 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15568 }
15569
15570 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15571 if (retval)
15572 call = gen_rtx_SET (VOIDmode, retval, call);
15573 if (pop)
15574 {
15575 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15576 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15577 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15578 }
15579
15580 call = emit_call_insn (call);
15581 if (use)
15582 CALL_INSN_FUNCTION_USAGE (call) = use;
15583 }
15584
15585 \f
15586 /* Clear stack slot assignments remembered from previous functions.
15587 This is called from INIT_EXPANDERS once before RTL is emitted for each
15588 function. */
15589
15590 static struct machine_function *
15591 ix86_init_machine_status (void)
15592 {
15593 struct machine_function *f;
15594
15595 f = GGC_CNEW (struct machine_function);
15596 f->use_fast_prologue_epilogue_nregs = -1;
15597 f->tls_descriptor_call_expanded_p = 0;
15598
15599 return f;
15600 }
15601
15602 /* Return a MEM corresponding to a stack slot with mode MODE.
15603 Allocate a new slot if necessary.
15604
15605 The RTL for a function can have several slots available: N is
15606 which slot to use. */
15607
15608 rtx
15609 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15610 {
15611 struct stack_local_entry *s;
15612
15613 gcc_assert (n < MAX_386_STACK_LOCALS);
15614
15615 for (s = ix86_stack_locals; s; s = s->next)
15616 if (s->mode == mode && s->n == n)
15617 return copy_rtx (s->rtl);
15618
15619 s = (struct stack_local_entry *)
15620 ggc_alloc (sizeof (struct stack_local_entry));
15621 s->n = n;
15622 s->mode = mode;
15623 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15624
15625 s->next = ix86_stack_locals;
15626 ix86_stack_locals = s;
15627 return s->rtl;
15628 }
15629
15630 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15631
15632 static GTY(()) rtx ix86_tls_symbol;
15633 rtx
15634 ix86_tls_get_addr (void)
15635 {
15636
15637 if (!ix86_tls_symbol)
15638 {
15639 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15640 (TARGET_ANY_GNU_TLS
15641 && !TARGET_64BIT)
15642 ? "___tls_get_addr"
15643 : "__tls_get_addr");
15644 }
15645
15646 return ix86_tls_symbol;
15647 }
15648
15649 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15650
15651 static GTY(()) rtx ix86_tls_module_base_symbol;
15652 rtx
15653 ix86_tls_module_base (void)
15654 {
15655
15656 if (!ix86_tls_module_base_symbol)
15657 {
15658 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15659 "_TLS_MODULE_BASE_");
15660 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15661 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15662 }
15663
15664 return ix86_tls_module_base_symbol;
15665 }
15666 \f
15667 /* Calculate the length of the memory address in the instruction
15668 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15669
15670 int
15671 memory_address_length (rtx addr)
15672 {
15673 struct ix86_address parts;
15674 rtx base, index, disp;
15675 int len;
15676 int ok;
15677
15678 if (GET_CODE (addr) == PRE_DEC
15679 || GET_CODE (addr) == POST_INC
15680 || GET_CODE (addr) == PRE_MODIFY
15681 || GET_CODE (addr) == POST_MODIFY)
15682 return 0;
15683
15684 ok = ix86_decompose_address (addr, &parts);
15685 gcc_assert (ok);
15686
15687 if (parts.base && GET_CODE (parts.base) == SUBREG)
15688 parts.base = SUBREG_REG (parts.base);
15689 if (parts.index && GET_CODE (parts.index) == SUBREG)
15690 parts.index = SUBREG_REG (parts.index);
15691
15692 base = parts.base;
15693 index = parts.index;
15694 disp = parts.disp;
15695 len = 0;
15696
15697 /* Rule of thumb:
15698 - esp as the base always wants an index,
15699 - ebp as the base always wants a displacement. */
15700
15701 /* Register Indirect. */
15702 if (base && !index && !disp)
15703 {
15704 /* esp (for its index) and ebp (for its displacement) need
15705 the two-byte modrm form. */
15706 if (addr == stack_pointer_rtx
15707 || addr == arg_pointer_rtx
15708 || addr == frame_pointer_rtx
15709 || addr == hard_frame_pointer_rtx)
15710 len = 1;
15711 }
15712
15713 /* Direct Addressing. */
15714 else if (disp && !base && !index)
15715 len = 4;
15716
15717 else
15718 {
15719 /* Find the length of the displacement constant. */
15720 if (disp)
15721 {
15722 if (base && satisfies_constraint_K (disp))
15723 len = 1;
15724 else
15725 len = 4;
15726 }
15727 /* ebp always wants a displacement. */
15728 else if (base == hard_frame_pointer_rtx)
15729 len = 1;
15730
15731 /* An index requires the two-byte modrm form.... */
15732 if (index
15733 /* ...like esp, which always wants an index. */
15734 || base == stack_pointer_rtx
15735 || base == arg_pointer_rtx
15736 || base == frame_pointer_rtx)
15737 len += 1;
15738 }
15739
15740 return len;
15741 }
15742
15743 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15744 is set, expect that insn have 8bit immediate alternative. */
15745 int
15746 ix86_attr_length_immediate_default (rtx insn, int shortform)
15747 {
15748 int len = 0;
15749 int i;
15750 extract_insn_cached (insn);
15751 for (i = recog_data.n_operands - 1; i >= 0; --i)
15752 if (CONSTANT_P (recog_data.operand[i]))
15753 {
15754 gcc_assert (!len);
15755 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15756 len = 1;
15757 else
15758 {
15759 switch (get_attr_mode (insn))
15760 {
15761 case MODE_QI:
15762 len+=1;
15763 break;
15764 case MODE_HI:
15765 len+=2;
15766 break;
15767 case MODE_SI:
15768 len+=4;
15769 break;
15770 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15771 case MODE_DI:
15772 len+=4;
15773 break;
15774 default:
15775 fatal_insn ("unknown insn mode", insn);
15776 }
15777 }
15778 }
15779 return len;
15780 }
15781 /* Compute default value for "length_address" attribute. */
15782 int
15783 ix86_attr_length_address_default (rtx insn)
15784 {
15785 int i;
15786
15787 if (get_attr_type (insn) == TYPE_LEA)
15788 {
15789 rtx set = PATTERN (insn);
15790
15791 if (GET_CODE (set) == PARALLEL)
15792 set = XVECEXP (set, 0, 0);
15793
15794 gcc_assert (GET_CODE (set) == SET);
15795
15796 return memory_address_length (SET_SRC (set));
15797 }
15798
15799 extract_insn_cached (insn);
15800 for (i = recog_data.n_operands - 1; i >= 0; --i)
15801 if (MEM_P (recog_data.operand[i]))
15802 {
15803 return memory_address_length (XEXP (recog_data.operand[i], 0));
15804 break;
15805 }
15806 return 0;
15807 }
15808 \f
15809 /* Return the maximum number of instructions a cpu can issue. */
15810
15811 static int
15812 ix86_issue_rate (void)
15813 {
15814 switch (ix86_tune)
15815 {
15816 case PROCESSOR_PENTIUM:
15817 case PROCESSOR_K6:
15818 return 2;
15819
15820 case PROCESSOR_PENTIUMPRO:
15821 case PROCESSOR_PENTIUM4:
15822 case PROCESSOR_ATHLON:
15823 case PROCESSOR_K8:
15824 case PROCESSOR_AMDFAM10:
15825 case PROCESSOR_NOCONA:
15826 case PROCESSOR_GENERIC32:
15827 case PROCESSOR_GENERIC64:
15828 return 3;
15829
15830 case PROCESSOR_CORE2:
15831 return 4;
15832
15833 default:
15834 return 1;
15835 }
15836 }
15837
15838 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15839 by DEP_INSN and nothing set by DEP_INSN. */
15840
15841 static int
15842 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15843 {
15844 rtx set, set2;
15845
15846 /* Simplify the test for uninteresting insns. */
15847 if (insn_type != TYPE_SETCC
15848 && insn_type != TYPE_ICMOV
15849 && insn_type != TYPE_FCMOV
15850 && insn_type != TYPE_IBR)
15851 return 0;
15852
15853 if ((set = single_set (dep_insn)) != 0)
15854 {
15855 set = SET_DEST (set);
15856 set2 = NULL_RTX;
15857 }
15858 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15859 && XVECLEN (PATTERN (dep_insn), 0) == 2
15860 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15861 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15862 {
15863 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15864 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15865 }
15866 else
15867 return 0;
15868
15869 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15870 return 0;
15871
15872 /* This test is true if the dependent insn reads the flags but
15873 not any other potentially set register. */
15874 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15875 return 0;
15876
15877 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15878 return 0;
15879
15880 return 1;
15881 }
15882
15883 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15884 address with operands set by DEP_INSN. */
15885
15886 static int
15887 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15888 {
15889 rtx addr;
15890
15891 if (insn_type == TYPE_LEA
15892 && TARGET_PENTIUM)
15893 {
15894 addr = PATTERN (insn);
15895
15896 if (GET_CODE (addr) == PARALLEL)
15897 addr = XVECEXP (addr, 0, 0);
15898
15899 gcc_assert (GET_CODE (addr) == SET);
15900
15901 addr = SET_SRC (addr);
15902 }
15903 else
15904 {
15905 int i;
15906 extract_insn_cached (insn);
15907 for (i = recog_data.n_operands - 1; i >= 0; --i)
15908 if (MEM_P (recog_data.operand[i]))
15909 {
15910 addr = XEXP (recog_data.operand[i], 0);
15911 goto found;
15912 }
15913 return 0;
15914 found:;
15915 }
15916
15917 return modified_in_p (addr, dep_insn);
15918 }
15919
15920 static int
15921 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15922 {
15923 enum attr_type insn_type, dep_insn_type;
15924 enum attr_memory memory;
15925 rtx set, set2;
15926 int dep_insn_code_number;
15927
15928 /* Anti and output dependencies have zero cost on all CPUs. */
15929 if (REG_NOTE_KIND (link) != 0)
15930 return 0;
15931
15932 dep_insn_code_number = recog_memoized (dep_insn);
15933
15934 /* If we can't recognize the insns, we can't really do anything. */
15935 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15936 return cost;
15937
15938 insn_type = get_attr_type (insn);
15939 dep_insn_type = get_attr_type (dep_insn);
15940
15941 switch (ix86_tune)
15942 {
15943 case PROCESSOR_PENTIUM:
15944 /* Address Generation Interlock adds a cycle of latency. */
15945 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15946 cost += 1;
15947
15948 /* ??? Compares pair with jump/setcc. */
15949 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15950 cost = 0;
15951
15952 /* Floating point stores require value to be ready one cycle earlier. */
15953 if (insn_type == TYPE_FMOV
15954 && get_attr_memory (insn) == MEMORY_STORE
15955 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15956 cost += 1;
15957 break;
15958
15959 case PROCESSOR_PENTIUMPRO:
15960 memory = get_attr_memory (insn);
15961
15962 /* INT->FP conversion is expensive. */
15963 if (get_attr_fp_int_src (dep_insn))
15964 cost += 5;
15965
15966 /* There is one cycle extra latency between an FP op and a store. */
15967 if (insn_type == TYPE_FMOV
15968 && (set = single_set (dep_insn)) != NULL_RTX
15969 && (set2 = single_set (insn)) != NULL_RTX
15970 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15971 && MEM_P (SET_DEST (set2)))
15972 cost += 1;
15973
15974 /* Show ability of reorder buffer to hide latency of load by executing
15975 in parallel with previous instruction in case
15976 previous instruction is not needed to compute the address. */
15977 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15978 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15979 {
15980 /* Claim moves to take one cycle, as core can issue one load
15981 at time and the next load can start cycle later. */
15982 if (dep_insn_type == TYPE_IMOV
15983 || dep_insn_type == TYPE_FMOV)
15984 cost = 1;
15985 else if (cost > 1)
15986 cost--;
15987 }
15988 break;
15989
15990 case PROCESSOR_K6:
15991 memory = get_attr_memory (insn);
15992
15993 /* The esp dependency is resolved before the instruction is really
15994 finished. */
15995 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15996 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15997 return 1;
15998
15999 /* INT->FP conversion is expensive. */
16000 if (get_attr_fp_int_src (dep_insn))
16001 cost += 5;
16002
16003 /* Show ability of reorder buffer to hide latency of load by executing
16004 in parallel with previous instruction in case
16005 previous instruction is not needed to compute the address. */
16006 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16007 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16008 {
16009 /* Claim moves to take one cycle, as core can issue one load
16010 at time and the next load can start cycle later. */
16011 if (dep_insn_type == TYPE_IMOV
16012 || dep_insn_type == TYPE_FMOV)
16013 cost = 1;
16014 else if (cost > 2)
16015 cost -= 2;
16016 else
16017 cost = 1;
16018 }
16019 break;
16020
16021 case PROCESSOR_ATHLON:
16022 case PROCESSOR_K8:
16023 case PROCESSOR_AMDFAM10:
16024 case PROCESSOR_GENERIC32:
16025 case PROCESSOR_GENERIC64:
16026 memory = get_attr_memory (insn);
16027
16028 /* Show ability of reorder buffer to hide latency of load by executing
16029 in parallel with previous instruction in case
16030 previous instruction is not needed to compute the address. */
16031 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16032 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16033 {
16034 enum attr_unit unit = get_attr_unit (insn);
16035 int loadcost = 3;
16036
16037 /* Because of the difference between the length of integer and
16038 floating unit pipeline preparation stages, the memory operands
16039 for floating point are cheaper.
16040
16041 ??? For Athlon it the difference is most probably 2. */
16042 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16043 loadcost = 3;
16044 else
16045 loadcost = TARGET_ATHLON ? 2 : 0;
16046
16047 if (cost >= loadcost)
16048 cost -= loadcost;
16049 else
16050 cost = 0;
16051 }
16052
16053 default:
16054 break;
16055 }
16056
16057 return cost;
16058 }
16059
16060 /* How many alternative schedules to try. This should be as wide as the
16061 scheduling freedom in the DFA, but no wider. Making this value too
16062 large results extra work for the scheduler. */
16063
16064 static int
16065 ia32_multipass_dfa_lookahead (void)
16066 {
16067 if (ix86_tune == PROCESSOR_PENTIUM)
16068 return 2;
16069
16070 if (ix86_tune == PROCESSOR_PENTIUMPRO
16071 || ix86_tune == PROCESSOR_K6)
16072 return 1;
16073
16074 else
16075 return 0;
16076 }
16077
16078 \f
16079 /* Compute the alignment given to a constant that is being placed in memory.
16080 EXP is the constant and ALIGN is the alignment that the object would
16081 ordinarily have.
16082 The value of this function is used instead of that alignment to align
16083 the object. */
16084
16085 int
16086 ix86_constant_alignment (tree exp, int align)
16087 {
16088 if (TREE_CODE (exp) == REAL_CST)
16089 {
16090 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16091 return 64;
16092 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16093 return 128;
16094 }
16095 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16096 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16097 return BITS_PER_WORD;
16098
16099 return align;
16100 }
16101
16102 /* Compute the alignment for a static variable.
16103 TYPE is the data type, and ALIGN is the alignment that
16104 the object would ordinarily have. The value of this function is used
16105 instead of that alignment to align the object. */
16106
16107 int
16108 ix86_data_alignment (tree type, int align)
16109 {
16110 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16111
16112 if (AGGREGATE_TYPE_P (type)
16113 && TYPE_SIZE (type)
16114 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16115 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16116 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16117 && align < max_align)
16118 align = max_align;
16119
16120 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16121 to 16byte boundary. */
16122 if (TARGET_64BIT)
16123 {
16124 if (AGGREGATE_TYPE_P (type)
16125 && TYPE_SIZE (type)
16126 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16127 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16128 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16129 return 128;
16130 }
16131
16132 if (TREE_CODE (type) == ARRAY_TYPE)
16133 {
16134 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16135 return 64;
16136 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16137 return 128;
16138 }
16139 else if (TREE_CODE (type) == COMPLEX_TYPE)
16140 {
16141
16142 if (TYPE_MODE (type) == DCmode && align < 64)
16143 return 64;
16144 if (TYPE_MODE (type) == XCmode && align < 128)
16145 return 128;
16146 }
16147 else if ((TREE_CODE (type) == RECORD_TYPE
16148 || TREE_CODE (type) == UNION_TYPE
16149 || TREE_CODE (type) == QUAL_UNION_TYPE)
16150 && TYPE_FIELDS (type))
16151 {
16152 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16153 return 64;
16154 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16155 return 128;
16156 }
16157 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16158 || TREE_CODE (type) == INTEGER_TYPE)
16159 {
16160 if (TYPE_MODE (type) == DFmode && align < 64)
16161 return 64;
16162 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16163 return 128;
16164 }
16165
16166 return align;
16167 }
16168
16169 /* Compute the alignment for a local variable.
16170 TYPE is the data type, and ALIGN is the alignment that
16171 the object would ordinarily have. The value of this macro is used
16172 instead of that alignment to align the object. */
16173
16174 int
16175 ix86_local_alignment (tree type, int align)
16176 {
16177 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16178 to 16byte boundary. */
16179 if (TARGET_64BIT)
16180 {
16181 if (AGGREGATE_TYPE_P (type)
16182 && TYPE_SIZE (type)
16183 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16184 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16185 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16186 return 128;
16187 }
16188 if (TREE_CODE (type) == ARRAY_TYPE)
16189 {
16190 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16191 return 64;
16192 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16193 return 128;
16194 }
16195 else if (TREE_CODE (type) == COMPLEX_TYPE)
16196 {
16197 if (TYPE_MODE (type) == DCmode && align < 64)
16198 return 64;
16199 if (TYPE_MODE (type) == XCmode && align < 128)
16200 return 128;
16201 }
16202 else if ((TREE_CODE (type) == RECORD_TYPE
16203 || TREE_CODE (type) == UNION_TYPE
16204 || TREE_CODE (type) == QUAL_UNION_TYPE)
16205 && TYPE_FIELDS (type))
16206 {
16207 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16208 return 64;
16209 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16210 return 128;
16211 }
16212 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16213 || TREE_CODE (type) == INTEGER_TYPE)
16214 {
16215
16216 if (TYPE_MODE (type) == DFmode && align < 64)
16217 return 64;
16218 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16219 return 128;
16220 }
16221 return align;
16222 }
16223 \f
16224 /* Emit RTL insns to initialize the variable parts of a trampoline.
16225 FNADDR is an RTX for the address of the function's pure code.
16226 CXT is an RTX for the static chain value for the function. */
16227 void
16228 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16229 {
16230 if (!TARGET_64BIT)
16231 {
16232 /* Compute offset from the end of the jmp to the target function. */
16233 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16234 plus_constant (tramp, 10),
16235 NULL_RTX, 1, OPTAB_DIRECT);
16236 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16237 gen_int_mode (0xb9, QImode));
16238 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16239 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16240 gen_int_mode (0xe9, QImode));
16241 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16242 }
16243 else
16244 {
16245 int offset = 0;
16246 /* Try to load address using shorter movl instead of movabs.
16247 We may want to support movq for kernel mode, but kernel does not use
16248 trampolines at the moment. */
16249 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16250 {
16251 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16252 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16253 gen_int_mode (0xbb41, HImode));
16254 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16255 gen_lowpart (SImode, fnaddr));
16256 offset += 6;
16257 }
16258 else
16259 {
16260 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16261 gen_int_mode (0xbb49, HImode));
16262 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16263 fnaddr);
16264 offset += 10;
16265 }
16266 /* Load static chain using movabs to r10. */
16267 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16268 gen_int_mode (0xba49, HImode));
16269 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16270 cxt);
16271 offset += 10;
16272 /* Jump to the r11 */
16273 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16274 gen_int_mode (0xff49, HImode));
16275 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16276 gen_int_mode (0xe3, QImode));
16277 offset += 3;
16278 gcc_assert (offset <= TRAMPOLINE_SIZE);
16279 }
16280
16281 #ifdef ENABLE_EXECUTE_STACK
16282 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16283 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16284 #endif
16285 }
16286 \f
16287 /* Codes for all the SSE/MMX builtins. */
16288 enum ix86_builtins
16289 {
16290 IX86_BUILTIN_ADDPS,
16291 IX86_BUILTIN_ADDSS,
16292 IX86_BUILTIN_DIVPS,
16293 IX86_BUILTIN_DIVSS,
16294 IX86_BUILTIN_MULPS,
16295 IX86_BUILTIN_MULSS,
16296 IX86_BUILTIN_SUBPS,
16297 IX86_BUILTIN_SUBSS,
16298
16299 IX86_BUILTIN_CMPEQPS,
16300 IX86_BUILTIN_CMPLTPS,
16301 IX86_BUILTIN_CMPLEPS,
16302 IX86_BUILTIN_CMPGTPS,
16303 IX86_BUILTIN_CMPGEPS,
16304 IX86_BUILTIN_CMPNEQPS,
16305 IX86_BUILTIN_CMPNLTPS,
16306 IX86_BUILTIN_CMPNLEPS,
16307 IX86_BUILTIN_CMPNGTPS,
16308 IX86_BUILTIN_CMPNGEPS,
16309 IX86_BUILTIN_CMPORDPS,
16310 IX86_BUILTIN_CMPUNORDPS,
16311 IX86_BUILTIN_CMPEQSS,
16312 IX86_BUILTIN_CMPLTSS,
16313 IX86_BUILTIN_CMPLESS,
16314 IX86_BUILTIN_CMPNEQSS,
16315 IX86_BUILTIN_CMPNLTSS,
16316 IX86_BUILTIN_CMPNLESS,
16317 IX86_BUILTIN_CMPNGTSS,
16318 IX86_BUILTIN_CMPNGESS,
16319 IX86_BUILTIN_CMPORDSS,
16320 IX86_BUILTIN_CMPUNORDSS,
16321
16322 IX86_BUILTIN_COMIEQSS,
16323 IX86_BUILTIN_COMILTSS,
16324 IX86_BUILTIN_COMILESS,
16325 IX86_BUILTIN_COMIGTSS,
16326 IX86_BUILTIN_COMIGESS,
16327 IX86_BUILTIN_COMINEQSS,
16328 IX86_BUILTIN_UCOMIEQSS,
16329 IX86_BUILTIN_UCOMILTSS,
16330 IX86_BUILTIN_UCOMILESS,
16331 IX86_BUILTIN_UCOMIGTSS,
16332 IX86_BUILTIN_UCOMIGESS,
16333 IX86_BUILTIN_UCOMINEQSS,
16334
16335 IX86_BUILTIN_CVTPI2PS,
16336 IX86_BUILTIN_CVTPS2PI,
16337 IX86_BUILTIN_CVTSI2SS,
16338 IX86_BUILTIN_CVTSI642SS,
16339 IX86_BUILTIN_CVTSS2SI,
16340 IX86_BUILTIN_CVTSS2SI64,
16341 IX86_BUILTIN_CVTTPS2PI,
16342 IX86_BUILTIN_CVTTSS2SI,
16343 IX86_BUILTIN_CVTTSS2SI64,
16344
16345 IX86_BUILTIN_MAXPS,
16346 IX86_BUILTIN_MAXSS,
16347 IX86_BUILTIN_MINPS,
16348 IX86_BUILTIN_MINSS,
16349
16350 IX86_BUILTIN_LOADUPS,
16351 IX86_BUILTIN_STOREUPS,
16352 IX86_BUILTIN_MOVSS,
16353
16354 IX86_BUILTIN_MOVHLPS,
16355 IX86_BUILTIN_MOVLHPS,
16356 IX86_BUILTIN_LOADHPS,
16357 IX86_BUILTIN_LOADLPS,
16358 IX86_BUILTIN_STOREHPS,
16359 IX86_BUILTIN_STORELPS,
16360
16361 IX86_BUILTIN_MASKMOVQ,
16362 IX86_BUILTIN_MOVMSKPS,
16363 IX86_BUILTIN_PMOVMSKB,
16364
16365 IX86_BUILTIN_MOVNTPS,
16366 IX86_BUILTIN_MOVNTQ,
16367
16368 IX86_BUILTIN_LOADDQU,
16369 IX86_BUILTIN_STOREDQU,
16370
16371 IX86_BUILTIN_PACKSSWB,
16372 IX86_BUILTIN_PACKSSDW,
16373 IX86_BUILTIN_PACKUSWB,
16374
16375 IX86_BUILTIN_PADDB,
16376 IX86_BUILTIN_PADDW,
16377 IX86_BUILTIN_PADDD,
16378 IX86_BUILTIN_PADDQ,
16379 IX86_BUILTIN_PADDSB,
16380 IX86_BUILTIN_PADDSW,
16381 IX86_BUILTIN_PADDUSB,
16382 IX86_BUILTIN_PADDUSW,
16383 IX86_BUILTIN_PSUBB,
16384 IX86_BUILTIN_PSUBW,
16385 IX86_BUILTIN_PSUBD,
16386 IX86_BUILTIN_PSUBQ,
16387 IX86_BUILTIN_PSUBSB,
16388 IX86_BUILTIN_PSUBSW,
16389 IX86_BUILTIN_PSUBUSB,
16390 IX86_BUILTIN_PSUBUSW,
16391
16392 IX86_BUILTIN_PAND,
16393 IX86_BUILTIN_PANDN,
16394 IX86_BUILTIN_POR,
16395 IX86_BUILTIN_PXOR,
16396
16397 IX86_BUILTIN_PAVGB,
16398 IX86_BUILTIN_PAVGW,
16399
16400 IX86_BUILTIN_PCMPEQB,
16401 IX86_BUILTIN_PCMPEQW,
16402 IX86_BUILTIN_PCMPEQD,
16403 IX86_BUILTIN_PCMPGTB,
16404 IX86_BUILTIN_PCMPGTW,
16405 IX86_BUILTIN_PCMPGTD,
16406
16407 IX86_BUILTIN_PMADDWD,
16408
16409 IX86_BUILTIN_PMAXSW,
16410 IX86_BUILTIN_PMAXUB,
16411 IX86_BUILTIN_PMINSW,
16412 IX86_BUILTIN_PMINUB,
16413
16414 IX86_BUILTIN_PMULHUW,
16415 IX86_BUILTIN_PMULHW,
16416 IX86_BUILTIN_PMULLW,
16417
16418 IX86_BUILTIN_PSADBW,
16419 IX86_BUILTIN_PSHUFW,
16420
16421 IX86_BUILTIN_PSLLW,
16422 IX86_BUILTIN_PSLLD,
16423 IX86_BUILTIN_PSLLQ,
16424 IX86_BUILTIN_PSRAW,
16425 IX86_BUILTIN_PSRAD,
16426 IX86_BUILTIN_PSRLW,
16427 IX86_BUILTIN_PSRLD,
16428 IX86_BUILTIN_PSRLQ,
16429 IX86_BUILTIN_PSLLWI,
16430 IX86_BUILTIN_PSLLDI,
16431 IX86_BUILTIN_PSLLQI,
16432 IX86_BUILTIN_PSRAWI,
16433 IX86_BUILTIN_PSRADI,
16434 IX86_BUILTIN_PSRLWI,
16435 IX86_BUILTIN_PSRLDI,
16436 IX86_BUILTIN_PSRLQI,
16437
16438 IX86_BUILTIN_PUNPCKHBW,
16439 IX86_BUILTIN_PUNPCKHWD,
16440 IX86_BUILTIN_PUNPCKHDQ,
16441 IX86_BUILTIN_PUNPCKLBW,
16442 IX86_BUILTIN_PUNPCKLWD,
16443 IX86_BUILTIN_PUNPCKLDQ,
16444
16445 IX86_BUILTIN_SHUFPS,
16446
16447 IX86_BUILTIN_RCPPS,
16448 IX86_BUILTIN_RCPSS,
16449 IX86_BUILTIN_RSQRTPS,
16450 IX86_BUILTIN_RSQRTSS,
16451 IX86_BUILTIN_SQRTPS,
16452 IX86_BUILTIN_SQRTSS,
16453
16454 IX86_BUILTIN_UNPCKHPS,
16455 IX86_BUILTIN_UNPCKLPS,
16456
16457 IX86_BUILTIN_ANDPS,
16458 IX86_BUILTIN_ANDNPS,
16459 IX86_BUILTIN_ORPS,
16460 IX86_BUILTIN_XORPS,
16461
16462 IX86_BUILTIN_EMMS,
16463 IX86_BUILTIN_LDMXCSR,
16464 IX86_BUILTIN_STMXCSR,
16465 IX86_BUILTIN_SFENCE,
16466
16467 /* 3DNow! Original */
16468 IX86_BUILTIN_FEMMS,
16469 IX86_BUILTIN_PAVGUSB,
16470 IX86_BUILTIN_PF2ID,
16471 IX86_BUILTIN_PFACC,
16472 IX86_BUILTIN_PFADD,
16473 IX86_BUILTIN_PFCMPEQ,
16474 IX86_BUILTIN_PFCMPGE,
16475 IX86_BUILTIN_PFCMPGT,
16476 IX86_BUILTIN_PFMAX,
16477 IX86_BUILTIN_PFMIN,
16478 IX86_BUILTIN_PFMUL,
16479 IX86_BUILTIN_PFRCP,
16480 IX86_BUILTIN_PFRCPIT1,
16481 IX86_BUILTIN_PFRCPIT2,
16482 IX86_BUILTIN_PFRSQIT1,
16483 IX86_BUILTIN_PFRSQRT,
16484 IX86_BUILTIN_PFSUB,
16485 IX86_BUILTIN_PFSUBR,
16486 IX86_BUILTIN_PI2FD,
16487 IX86_BUILTIN_PMULHRW,
16488
16489 /* 3DNow! Athlon Extensions */
16490 IX86_BUILTIN_PF2IW,
16491 IX86_BUILTIN_PFNACC,
16492 IX86_BUILTIN_PFPNACC,
16493 IX86_BUILTIN_PI2FW,
16494 IX86_BUILTIN_PSWAPDSI,
16495 IX86_BUILTIN_PSWAPDSF,
16496
16497 /* SSE2 */
16498 IX86_BUILTIN_ADDPD,
16499 IX86_BUILTIN_ADDSD,
16500 IX86_BUILTIN_DIVPD,
16501 IX86_BUILTIN_DIVSD,
16502 IX86_BUILTIN_MULPD,
16503 IX86_BUILTIN_MULSD,
16504 IX86_BUILTIN_SUBPD,
16505 IX86_BUILTIN_SUBSD,
16506
16507 IX86_BUILTIN_CMPEQPD,
16508 IX86_BUILTIN_CMPLTPD,
16509 IX86_BUILTIN_CMPLEPD,
16510 IX86_BUILTIN_CMPGTPD,
16511 IX86_BUILTIN_CMPGEPD,
16512 IX86_BUILTIN_CMPNEQPD,
16513 IX86_BUILTIN_CMPNLTPD,
16514 IX86_BUILTIN_CMPNLEPD,
16515 IX86_BUILTIN_CMPNGTPD,
16516 IX86_BUILTIN_CMPNGEPD,
16517 IX86_BUILTIN_CMPORDPD,
16518 IX86_BUILTIN_CMPUNORDPD,
16519 IX86_BUILTIN_CMPEQSD,
16520 IX86_BUILTIN_CMPLTSD,
16521 IX86_BUILTIN_CMPLESD,
16522 IX86_BUILTIN_CMPNEQSD,
16523 IX86_BUILTIN_CMPNLTSD,
16524 IX86_BUILTIN_CMPNLESD,
16525 IX86_BUILTIN_CMPORDSD,
16526 IX86_BUILTIN_CMPUNORDSD,
16527
16528 IX86_BUILTIN_COMIEQSD,
16529 IX86_BUILTIN_COMILTSD,
16530 IX86_BUILTIN_COMILESD,
16531 IX86_BUILTIN_COMIGTSD,
16532 IX86_BUILTIN_COMIGESD,
16533 IX86_BUILTIN_COMINEQSD,
16534 IX86_BUILTIN_UCOMIEQSD,
16535 IX86_BUILTIN_UCOMILTSD,
16536 IX86_BUILTIN_UCOMILESD,
16537 IX86_BUILTIN_UCOMIGTSD,
16538 IX86_BUILTIN_UCOMIGESD,
16539 IX86_BUILTIN_UCOMINEQSD,
16540
16541 IX86_BUILTIN_MAXPD,
16542 IX86_BUILTIN_MAXSD,
16543 IX86_BUILTIN_MINPD,
16544 IX86_BUILTIN_MINSD,
16545
16546 IX86_BUILTIN_ANDPD,
16547 IX86_BUILTIN_ANDNPD,
16548 IX86_BUILTIN_ORPD,
16549 IX86_BUILTIN_XORPD,
16550
16551 IX86_BUILTIN_SQRTPD,
16552 IX86_BUILTIN_SQRTSD,
16553
16554 IX86_BUILTIN_UNPCKHPD,
16555 IX86_BUILTIN_UNPCKLPD,
16556
16557 IX86_BUILTIN_SHUFPD,
16558
16559 IX86_BUILTIN_LOADUPD,
16560 IX86_BUILTIN_STOREUPD,
16561 IX86_BUILTIN_MOVSD,
16562
16563 IX86_BUILTIN_LOADHPD,
16564 IX86_BUILTIN_LOADLPD,
16565
16566 IX86_BUILTIN_CVTDQ2PD,
16567 IX86_BUILTIN_CVTDQ2PS,
16568
16569 IX86_BUILTIN_CVTPD2DQ,
16570 IX86_BUILTIN_CVTPD2PI,
16571 IX86_BUILTIN_CVTPD2PS,
16572 IX86_BUILTIN_CVTTPD2DQ,
16573 IX86_BUILTIN_CVTTPD2PI,
16574
16575 IX86_BUILTIN_CVTPI2PD,
16576 IX86_BUILTIN_CVTSI2SD,
16577 IX86_BUILTIN_CVTSI642SD,
16578
16579 IX86_BUILTIN_CVTSD2SI,
16580 IX86_BUILTIN_CVTSD2SI64,
16581 IX86_BUILTIN_CVTSD2SS,
16582 IX86_BUILTIN_CVTSS2SD,
16583 IX86_BUILTIN_CVTTSD2SI,
16584 IX86_BUILTIN_CVTTSD2SI64,
16585
16586 IX86_BUILTIN_CVTPS2DQ,
16587 IX86_BUILTIN_CVTPS2PD,
16588 IX86_BUILTIN_CVTTPS2DQ,
16589
16590 IX86_BUILTIN_MOVNTI,
16591 IX86_BUILTIN_MOVNTPD,
16592 IX86_BUILTIN_MOVNTDQ,
16593
16594 /* SSE2 MMX */
16595 IX86_BUILTIN_MASKMOVDQU,
16596 IX86_BUILTIN_MOVMSKPD,
16597 IX86_BUILTIN_PMOVMSKB128,
16598
16599 IX86_BUILTIN_PACKSSWB128,
16600 IX86_BUILTIN_PACKSSDW128,
16601 IX86_BUILTIN_PACKUSWB128,
16602
16603 IX86_BUILTIN_PADDB128,
16604 IX86_BUILTIN_PADDW128,
16605 IX86_BUILTIN_PADDD128,
16606 IX86_BUILTIN_PADDQ128,
16607 IX86_BUILTIN_PADDSB128,
16608 IX86_BUILTIN_PADDSW128,
16609 IX86_BUILTIN_PADDUSB128,
16610 IX86_BUILTIN_PADDUSW128,
16611 IX86_BUILTIN_PSUBB128,
16612 IX86_BUILTIN_PSUBW128,
16613 IX86_BUILTIN_PSUBD128,
16614 IX86_BUILTIN_PSUBQ128,
16615 IX86_BUILTIN_PSUBSB128,
16616 IX86_BUILTIN_PSUBSW128,
16617 IX86_BUILTIN_PSUBUSB128,
16618 IX86_BUILTIN_PSUBUSW128,
16619
16620 IX86_BUILTIN_PAND128,
16621 IX86_BUILTIN_PANDN128,
16622 IX86_BUILTIN_POR128,
16623 IX86_BUILTIN_PXOR128,
16624
16625 IX86_BUILTIN_PAVGB128,
16626 IX86_BUILTIN_PAVGW128,
16627
16628 IX86_BUILTIN_PCMPEQB128,
16629 IX86_BUILTIN_PCMPEQW128,
16630 IX86_BUILTIN_PCMPEQD128,
16631 IX86_BUILTIN_PCMPGTB128,
16632 IX86_BUILTIN_PCMPGTW128,
16633 IX86_BUILTIN_PCMPGTD128,
16634
16635 IX86_BUILTIN_PMADDWD128,
16636
16637 IX86_BUILTIN_PMAXSW128,
16638 IX86_BUILTIN_PMAXUB128,
16639 IX86_BUILTIN_PMINSW128,
16640 IX86_BUILTIN_PMINUB128,
16641
16642 IX86_BUILTIN_PMULUDQ,
16643 IX86_BUILTIN_PMULUDQ128,
16644 IX86_BUILTIN_PMULHUW128,
16645 IX86_BUILTIN_PMULHW128,
16646 IX86_BUILTIN_PMULLW128,
16647
16648 IX86_BUILTIN_PSADBW128,
16649 IX86_BUILTIN_PSHUFHW,
16650 IX86_BUILTIN_PSHUFLW,
16651 IX86_BUILTIN_PSHUFD,
16652
16653 IX86_BUILTIN_PSLLDQI128,
16654 IX86_BUILTIN_PSLLWI128,
16655 IX86_BUILTIN_PSLLDI128,
16656 IX86_BUILTIN_PSLLQI128,
16657 IX86_BUILTIN_PSRAWI128,
16658 IX86_BUILTIN_PSRADI128,
16659 IX86_BUILTIN_PSRLDQI128,
16660 IX86_BUILTIN_PSRLWI128,
16661 IX86_BUILTIN_PSRLDI128,
16662 IX86_BUILTIN_PSRLQI128,
16663
16664 IX86_BUILTIN_PSLLDQ128,
16665 IX86_BUILTIN_PSLLW128,
16666 IX86_BUILTIN_PSLLD128,
16667 IX86_BUILTIN_PSLLQ128,
16668 IX86_BUILTIN_PSRAW128,
16669 IX86_BUILTIN_PSRAD128,
16670 IX86_BUILTIN_PSRLW128,
16671 IX86_BUILTIN_PSRLD128,
16672 IX86_BUILTIN_PSRLQ128,
16673
16674 IX86_BUILTIN_PUNPCKHBW128,
16675 IX86_BUILTIN_PUNPCKHWD128,
16676 IX86_BUILTIN_PUNPCKHDQ128,
16677 IX86_BUILTIN_PUNPCKHQDQ128,
16678 IX86_BUILTIN_PUNPCKLBW128,
16679 IX86_BUILTIN_PUNPCKLWD128,
16680 IX86_BUILTIN_PUNPCKLDQ128,
16681 IX86_BUILTIN_PUNPCKLQDQ128,
16682
16683 IX86_BUILTIN_CLFLUSH,
16684 IX86_BUILTIN_MFENCE,
16685 IX86_BUILTIN_LFENCE,
16686
16687 /* Prescott New Instructions. */
16688 IX86_BUILTIN_ADDSUBPS,
16689 IX86_BUILTIN_HADDPS,
16690 IX86_BUILTIN_HSUBPS,
16691 IX86_BUILTIN_MOVSHDUP,
16692 IX86_BUILTIN_MOVSLDUP,
16693 IX86_BUILTIN_ADDSUBPD,
16694 IX86_BUILTIN_HADDPD,
16695 IX86_BUILTIN_HSUBPD,
16696 IX86_BUILTIN_LDDQU,
16697
16698 IX86_BUILTIN_MONITOR,
16699 IX86_BUILTIN_MWAIT,
16700
16701 /* SSSE3. */
16702 IX86_BUILTIN_PHADDW,
16703 IX86_BUILTIN_PHADDD,
16704 IX86_BUILTIN_PHADDSW,
16705 IX86_BUILTIN_PHSUBW,
16706 IX86_BUILTIN_PHSUBD,
16707 IX86_BUILTIN_PHSUBSW,
16708 IX86_BUILTIN_PMADDUBSW,
16709 IX86_BUILTIN_PMULHRSW,
16710 IX86_BUILTIN_PSHUFB,
16711 IX86_BUILTIN_PSIGNB,
16712 IX86_BUILTIN_PSIGNW,
16713 IX86_BUILTIN_PSIGND,
16714 IX86_BUILTIN_PALIGNR,
16715 IX86_BUILTIN_PABSB,
16716 IX86_BUILTIN_PABSW,
16717 IX86_BUILTIN_PABSD,
16718
16719 IX86_BUILTIN_PHADDW128,
16720 IX86_BUILTIN_PHADDD128,
16721 IX86_BUILTIN_PHADDSW128,
16722 IX86_BUILTIN_PHSUBW128,
16723 IX86_BUILTIN_PHSUBD128,
16724 IX86_BUILTIN_PHSUBSW128,
16725 IX86_BUILTIN_PMADDUBSW128,
16726 IX86_BUILTIN_PMULHRSW128,
16727 IX86_BUILTIN_PSHUFB128,
16728 IX86_BUILTIN_PSIGNB128,
16729 IX86_BUILTIN_PSIGNW128,
16730 IX86_BUILTIN_PSIGND128,
16731 IX86_BUILTIN_PALIGNR128,
16732 IX86_BUILTIN_PABSB128,
16733 IX86_BUILTIN_PABSW128,
16734 IX86_BUILTIN_PABSD128,
16735
16736 /* AMDFAM10 - SSE4A New Instructions. */
16737 IX86_BUILTIN_MOVNTSD,
16738 IX86_BUILTIN_MOVNTSS,
16739 IX86_BUILTIN_EXTRQI,
16740 IX86_BUILTIN_EXTRQ,
16741 IX86_BUILTIN_INSERTQI,
16742 IX86_BUILTIN_INSERTQ,
16743
16744 /* SSE4.1. */
16745 IX86_BUILTIN_BLENDPD,
16746 IX86_BUILTIN_BLENDPS,
16747 IX86_BUILTIN_BLENDVPD,
16748 IX86_BUILTIN_BLENDVPS,
16749 IX86_BUILTIN_PBLENDVB128,
16750 IX86_BUILTIN_PBLENDW128,
16751
16752 IX86_BUILTIN_DPPD,
16753 IX86_BUILTIN_DPPS,
16754
16755 IX86_BUILTIN_INSERTPS128,
16756
16757 IX86_BUILTIN_MOVNTDQA,
16758 IX86_BUILTIN_MPSADBW128,
16759 IX86_BUILTIN_PACKUSDW128,
16760 IX86_BUILTIN_PCMPEQQ,
16761 IX86_BUILTIN_PHMINPOSUW128,
16762
16763 IX86_BUILTIN_PMAXSB128,
16764 IX86_BUILTIN_PMAXSD128,
16765 IX86_BUILTIN_PMAXUD128,
16766 IX86_BUILTIN_PMAXUW128,
16767
16768 IX86_BUILTIN_PMINSB128,
16769 IX86_BUILTIN_PMINSD128,
16770 IX86_BUILTIN_PMINUD128,
16771 IX86_BUILTIN_PMINUW128,
16772
16773 IX86_BUILTIN_PMOVSXBW128,
16774 IX86_BUILTIN_PMOVSXBD128,
16775 IX86_BUILTIN_PMOVSXBQ128,
16776 IX86_BUILTIN_PMOVSXWD128,
16777 IX86_BUILTIN_PMOVSXWQ128,
16778 IX86_BUILTIN_PMOVSXDQ128,
16779
16780 IX86_BUILTIN_PMOVZXBW128,
16781 IX86_BUILTIN_PMOVZXBD128,
16782 IX86_BUILTIN_PMOVZXBQ128,
16783 IX86_BUILTIN_PMOVZXWD128,
16784 IX86_BUILTIN_PMOVZXWQ128,
16785 IX86_BUILTIN_PMOVZXDQ128,
16786
16787 IX86_BUILTIN_PMULDQ128,
16788 IX86_BUILTIN_PMULLD128,
16789
16790 IX86_BUILTIN_ROUNDPD,
16791 IX86_BUILTIN_ROUNDPS,
16792 IX86_BUILTIN_ROUNDSD,
16793 IX86_BUILTIN_ROUNDSS,
16794
16795 IX86_BUILTIN_PTESTZ,
16796 IX86_BUILTIN_PTESTC,
16797 IX86_BUILTIN_PTESTNZC,
16798
16799 IX86_BUILTIN_VEC_INIT_V2SI,
16800 IX86_BUILTIN_VEC_INIT_V4HI,
16801 IX86_BUILTIN_VEC_INIT_V8QI,
16802 IX86_BUILTIN_VEC_EXT_V2DF,
16803 IX86_BUILTIN_VEC_EXT_V2DI,
16804 IX86_BUILTIN_VEC_EXT_V4SF,
16805 IX86_BUILTIN_VEC_EXT_V4SI,
16806 IX86_BUILTIN_VEC_EXT_V8HI,
16807 IX86_BUILTIN_VEC_EXT_V2SI,
16808 IX86_BUILTIN_VEC_EXT_V4HI,
16809 IX86_BUILTIN_VEC_EXT_V16QI,
16810 IX86_BUILTIN_VEC_SET_V2DI,
16811 IX86_BUILTIN_VEC_SET_V4SF,
16812 IX86_BUILTIN_VEC_SET_V4SI,
16813 IX86_BUILTIN_VEC_SET_V8HI,
16814 IX86_BUILTIN_VEC_SET_V4HI,
16815 IX86_BUILTIN_VEC_SET_V16QI,
16816
16817 /* SSE4.2. */
16818 IX86_BUILTIN_CRC32QI,
16819 IX86_BUILTIN_CRC32HI,
16820 IX86_BUILTIN_CRC32SI,
16821 IX86_BUILTIN_CRC32DI,
16822
16823 IX86_BUILTIN_PCMPESTRI128,
16824 IX86_BUILTIN_PCMPESTRM128,
16825 IX86_BUILTIN_PCMPESTRA128,
16826 IX86_BUILTIN_PCMPESTRC128,
16827 IX86_BUILTIN_PCMPESTRO128,
16828 IX86_BUILTIN_PCMPESTRS128,
16829 IX86_BUILTIN_PCMPESTRZ128,
16830 IX86_BUILTIN_PCMPISTRI128,
16831 IX86_BUILTIN_PCMPISTRM128,
16832 IX86_BUILTIN_PCMPISTRA128,
16833 IX86_BUILTIN_PCMPISTRC128,
16834 IX86_BUILTIN_PCMPISTRO128,
16835 IX86_BUILTIN_PCMPISTRS128,
16836 IX86_BUILTIN_PCMPISTRZ128,
16837
16838 IX86_BUILTIN_PCMPGTQ,
16839
16840 /* TFmode support builtins. */
16841 IX86_BUILTIN_INFQ,
16842 IX86_BUILTIN_FABSQ,
16843 IX86_BUILTIN_COPYSIGNQ,
16844
16845 IX86_BUILTIN_MAX
16846 };
16847
16848 /* Table for the ix86 builtin decls. */
16849 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16850
16851 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
16852 * if the target_flags include one of MASK. Stores the function decl
16853 * in the ix86_builtins array.
16854 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16855
16856 static inline tree
16857 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16858 {
16859 tree decl = NULL_TREE;
16860
16861 if (mask & ix86_isa_flags
16862 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
16863 {
16864 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16865 NULL, NULL_TREE);
16866 ix86_builtins[(int) code] = decl;
16867 }
16868
16869 return decl;
16870 }
16871
16872 /* Like def_builtin, but also marks the function decl "const". */
16873
16874 static inline tree
16875 def_builtin_const (int mask, const char *name, tree type,
16876 enum ix86_builtins code)
16877 {
16878 tree decl = def_builtin (mask, name, type, code);
16879 if (decl)
16880 TREE_READONLY (decl) = 1;
16881 return decl;
16882 }
16883
16884 /* Bits for builtin_description.flag. */
16885
16886 /* Set when we don't support the comparison natively, and should
16887 swap_comparison in order to support it. */
16888 #define BUILTIN_DESC_SWAP_OPERANDS 1
16889
16890 struct builtin_description
16891 {
16892 const unsigned int mask;
16893 const enum insn_code icode;
16894 const char *const name;
16895 const enum ix86_builtins code;
16896 const enum rtx_code comparison;
16897 const int flag;
16898 };
16899
16900 static const struct builtin_description bdesc_comi[] =
16901 {
16902 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16903 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16904 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16905 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16908 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16909 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16910 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16911 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16912 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16913 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16914 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16915 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16916 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16917 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16918 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16919 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16920 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16921 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16922 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16923 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16924 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16925 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16926 };
16927
16928 static const struct builtin_description bdesc_ptest[] =
16929 {
16930 /* SSE4.1 */
16931 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
16932 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
16933 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
16934 };
16935
16936 static const struct builtin_description bdesc_pcmpestr[] =
16937 {
16938 /* SSE4.2 */
16939 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
16940 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
16941 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
16942 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
16943 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
16944 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
16945 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
16946 };
16947
16948 static const struct builtin_description bdesc_pcmpistr[] =
16949 {
16950 /* SSE4.2 */
16951 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
16952 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
16953 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
16954 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
16955 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
16956 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
16957 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
16958 };
16959
16960 static const struct builtin_description bdesc_crc32[] =
16961 {
16962 /* SSE4.2 */
16963 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
16964 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
16965 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
16966 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
16967 };
16968
16969 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
16970 static const struct builtin_description bdesc_sse_3arg[] =
16971 {
16972 /* SSE4.1 */
16973 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
16974 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
16975 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
16976 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
16977 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
16978 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
16979 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
16980 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
16981 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
16982 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
16983 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
16984 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
16985 };
16986
16987 static const struct builtin_description bdesc_2arg[] =
16988 {
16989 /* SSE */
16990 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
16991 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
16992 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
16993 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
16994 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
16995 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
16996 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
16997 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
16998
16999 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17000 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17001 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17002 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17003 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17004 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17005 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17006 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17007 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17010 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17011 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17012 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17013 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17014 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17015 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17017 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17019 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17021
17022 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17023 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17024 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17026
17027 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17029 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17030 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17031
17032 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17036 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17037
17038 /* MMX */
17039 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17040 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17041 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17042 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17043 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17044 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17045 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17046 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17047
17048 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17049 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17050 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17051 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17052 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17053 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17054 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17055 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17056
17057 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17058 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17059 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17060
17061 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17062 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17063 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17064 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17065
17066 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17067 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17068
17069 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17070 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17071 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17072 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17073 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17074 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17075
17076 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17077 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17078 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17079 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17080
17081 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17082 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17083 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17084 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17085 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17086 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17087
17088 /* Special. */
17089 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17090 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17091 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17092
17093 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17094 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17095 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17096
17097 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17098 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17099 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17100 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17101 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17102 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17103
17104 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17105 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17106 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17107 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17108 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17109 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17110
17111 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17112 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17113 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17114 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17115
17116 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17117 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17118
17119 /* SSE2 */
17120 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17121 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17122 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17123 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17125 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17128
17129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17140 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
17141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
17142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
17143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
17144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
17145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
17146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
17147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
17148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
17149
17150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
17151 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
17152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
17153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
17154
17155 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
17156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
17157 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
17158 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
17159
17160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
17161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
17162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
17163
17164 /* SSE2 MMX */
17165 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
17166 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
17167 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
17168 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
17169 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
17170 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
17171 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
17172 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
17173
17174 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
17175 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
17176 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
17177 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
17178 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
17179 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
17180 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
17181 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
17182
17183 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
17184 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
17185
17186 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
17187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
17188 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
17189 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
17190
17191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
17192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
17193
17194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
17195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
17196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
17197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
17198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
17199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
17200
17201 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
17202 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
17203 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
17204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
17205
17206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
17207 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
17208 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
17209 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
17210 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
17211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
17212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
17213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
17214
17215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
17216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
17217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
17218
17219 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
17220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
17221
17222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
17223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
17224
17225 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
17226 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
17227 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
17228
17229 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
17230 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
17231 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
17232
17233 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
17234 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
17235
17236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
17237
17238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
17239 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
17240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
17241 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
17242
17243 /* SSE3 MMX */
17244 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
17245 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
17246 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
17247 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
17248 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
17249 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
17250
17251 /* SSSE3 */
17252 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
17253 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
17254 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
17255 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
17256 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
17257 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
17258 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
17259 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
17260 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
17261 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
17262 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
17263 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
17264 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
17265 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
17266 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
17267 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
17268 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
17269 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
17270 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
17271 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
17272 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
17273 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
17274 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
17275 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
17276
17277 /* SSE4.1 */
17278 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
17279 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
17280 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
17281 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
17282 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
17283 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
17284 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
17285 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
17286 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
17287 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
17288 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
17289 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
17290
17291 /* SSE4.2 */
17292 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
17293 };
17294
17295 static const struct builtin_description bdesc_1arg[] =
17296 {
17297 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
17298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
17299
17300 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
17301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
17302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
17303
17304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
17305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
17306 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
17307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
17308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
17309 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
17310
17311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
17312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
17313
17314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
17315
17316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
17317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
17318
17319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
17320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
17321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
17322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
17323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
17324
17325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
17326
17327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
17328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
17329 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
17330 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
17331
17332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
17333 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
17334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
17335
17336 /* SSE3 */
17337 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
17338 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
17339
17340 /* SSSE3 */
17341 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
17342 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
17343 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
17344 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
17345 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
17346 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
17347
17348 /* SSE4.1 */
17349 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
17350 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
17351 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
17352 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
17353 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
17354 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
17355 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
17356 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
17357 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
17358 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
17359 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
17360 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
17361 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
17362
17363 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
17364 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
17365 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
17366 };
17367
17368 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17369 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17370 builtins. */
17371 static void
17372 ix86_init_mmx_sse_builtins (void)
17373 {
17374 const struct builtin_description * d;
17375 size_t i;
17376
17377 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
17378 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17379 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
17380 tree V2DI_type_node
17381 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
17382 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
17383 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
17384 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
17385 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17386 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
17387 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
17388
17389 tree pchar_type_node = build_pointer_type (char_type_node);
17390 tree pcchar_type_node = build_pointer_type (
17391 build_type_variant (char_type_node, 1, 0));
17392 tree pfloat_type_node = build_pointer_type (float_type_node);
17393 tree pcfloat_type_node = build_pointer_type (
17394 build_type_variant (float_type_node, 1, 0));
17395 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
17396 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
17397 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
17398
17399 /* Comparisons. */
17400 tree int_ftype_v4sf_v4sf
17401 = build_function_type_list (integer_type_node,
17402 V4SF_type_node, V4SF_type_node, NULL_TREE);
17403 tree v4si_ftype_v4sf_v4sf
17404 = build_function_type_list (V4SI_type_node,
17405 V4SF_type_node, V4SF_type_node, NULL_TREE);
17406 /* MMX/SSE/integer conversions. */
17407 tree int_ftype_v4sf
17408 = build_function_type_list (integer_type_node,
17409 V4SF_type_node, NULL_TREE);
17410 tree int64_ftype_v4sf
17411 = build_function_type_list (long_long_integer_type_node,
17412 V4SF_type_node, NULL_TREE);
17413 tree int_ftype_v8qi
17414 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
17415 tree v4sf_ftype_v4sf_int
17416 = build_function_type_list (V4SF_type_node,
17417 V4SF_type_node, integer_type_node, NULL_TREE);
17418 tree v4sf_ftype_v4sf_int64
17419 = build_function_type_list (V4SF_type_node,
17420 V4SF_type_node, long_long_integer_type_node,
17421 NULL_TREE);
17422 tree v4sf_ftype_v4sf_v2si
17423 = build_function_type_list (V4SF_type_node,
17424 V4SF_type_node, V2SI_type_node, NULL_TREE);
17425
17426 /* Miscellaneous. */
17427 tree v8qi_ftype_v4hi_v4hi
17428 = build_function_type_list (V8QI_type_node,
17429 V4HI_type_node, V4HI_type_node, NULL_TREE);
17430 tree v4hi_ftype_v2si_v2si
17431 = build_function_type_list (V4HI_type_node,
17432 V2SI_type_node, V2SI_type_node, NULL_TREE);
17433 tree v4sf_ftype_v4sf_v4sf_int
17434 = build_function_type_list (V4SF_type_node,
17435 V4SF_type_node, V4SF_type_node,
17436 integer_type_node, NULL_TREE);
17437 tree v2si_ftype_v4hi_v4hi
17438 = build_function_type_list (V2SI_type_node,
17439 V4HI_type_node, V4HI_type_node, NULL_TREE);
17440 tree v4hi_ftype_v4hi_int
17441 = build_function_type_list (V4HI_type_node,
17442 V4HI_type_node, integer_type_node, NULL_TREE);
17443 tree v4hi_ftype_v4hi_di
17444 = build_function_type_list (V4HI_type_node,
17445 V4HI_type_node, long_long_unsigned_type_node,
17446 NULL_TREE);
17447 tree v2si_ftype_v2si_di
17448 = build_function_type_list (V2SI_type_node,
17449 V2SI_type_node, long_long_unsigned_type_node,
17450 NULL_TREE);
17451 tree void_ftype_void
17452 = build_function_type (void_type_node, void_list_node);
17453 tree void_ftype_unsigned
17454 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
17455 tree void_ftype_unsigned_unsigned
17456 = build_function_type_list (void_type_node, unsigned_type_node,
17457 unsigned_type_node, NULL_TREE);
17458 tree void_ftype_pcvoid_unsigned_unsigned
17459 = build_function_type_list (void_type_node, const_ptr_type_node,
17460 unsigned_type_node, unsigned_type_node,
17461 NULL_TREE);
17462 tree unsigned_ftype_void
17463 = build_function_type (unsigned_type_node, void_list_node);
17464 tree v2si_ftype_v4sf
17465 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
17466 /* Loads/stores. */
17467 tree void_ftype_v8qi_v8qi_pchar
17468 = build_function_type_list (void_type_node,
17469 V8QI_type_node, V8QI_type_node,
17470 pchar_type_node, NULL_TREE);
17471 tree v4sf_ftype_pcfloat
17472 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
17473 /* @@@ the type is bogus */
17474 tree v4sf_ftype_v4sf_pv2si
17475 = build_function_type_list (V4SF_type_node,
17476 V4SF_type_node, pv2si_type_node, NULL_TREE);
17477 tree void_ftype_pv2si_v4sf
17478 = build_function_type_list (void_type_node,
17479 pv2si_type_node, V4SF_type_node, NULL_TREE);
17480 tree void_ftype_pfloat_v4sf
17481 = build_function_type_list (void_type_node,
17482 pfloat_type_node, V4SF_type_node, NULL_TREE);
17483 tree void_ftype_pdi_di
17484 = build_function_type_list (void_type_node,
17485 pdi_type_node, long_long_unsigned_type_node,
17486 NULL_TREE);
17487 tree void_ftype_pv2di_v2di
17488 = build_function_type_list (void_type_node,
17489 pv2di_type_node, V2DI_type_node, NULL_TREE);
17490 /* Normal vector unops. */
17491 tree v4sf_ftype_v4sf
17492 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17493 tree v16qi_ftype_v16qi
17494 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17495 tree v8hi_ftype_v8hi
17496 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17497 tree v4si_ftype_v4si
17498 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17499 tree v8qi_ftype_v8qi
17500 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
17501 tree v4hi_ftype_v4hi
17502 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
17503
17504 /* Normal vector binops. */
17505 tree v4sf_ftype_v4sf_v4sf
17506 = build_function_type_list (V4SF_type_node,
17507 V4SF_type_node, V4SF_type_node, NULL_TREE);
17508 tree v8qi_ftype_v8qi_v8qi
17509 = build_function_type_list (V8QI_type_node,
17510 V8QI_type_node, V8QI_type_node, NULL_TREE);
17511 tree v4hi_ftype_v4hi_v4hi
17512 = build_function_type_list (V4HI_type_node,
17513 V4HI_type_node, V4HI_type_node, NULL_TREE);
17514 tree v2si_ftype_v2si_v2si
17515 = build_function_type_list (V2SI_type_node,
17516 V2SI_type_node, V2SI_type_node, NULL_TREE);
17517 tree di_ftype_di_di
17518 = build_function_type_list (long_long_unsigned_type_node,
17519 long_long_unsigned_type_node,
17520 long_long_unsigned_type_node, NULL_TREE);
17521
17522 tree di_ftype_di_di_int
17523 = build_function_type_list (long_long_unsigned_type_node,
17524 long_long_unsigned_type_node,
17525 long_long_unsigned_type_node,
17526 integer_type_node, NULL_TREE);
17527
17528 tree v2si_ftype_v2sf
17529 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
17530 tree v2sf_ftype_v2si
17531 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
17532 tree v2si_ftype_v2si
17533 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
17534 tree v2sf_ftype_v2sf
17535 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
17536 tree v2sf_ftype_v2sf_v2sf
17537 = build_function_type_list (V2SF_type_node,
17538 V2SF_type_node, V2SF_type_node, NULL_TREE);
17539 tree v2si_ftype_v2sf_v2sf
17540 = build_function_type_list (V2SI_type_node,
17541 V2SF_type_node, V2SF_type_node, NULL_TREE);
17542 tree pint_type_node = build_pointer_type (integer_type_node);
17543 tree pdouble_type_node = build_pointer_type (double_type_node);
17544 tree pcdouble_type_node = build_pointer_type (
17545 build_type_variant (double_type_node, 1, 0));
17546 tree int_ftype_v2df_v2df
17547 = build_function_type_list (integer_type_node,
17548 V2DF_type_node, V2DF_type_node, NULL_TREE);
17549
17550 tree void_ftype_pcvoid
17551 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
17552 tree v4sf_ftype_v4si
17553 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
17554 tree v4si_ftype_v4sf
17555 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
17556 tree v2df_ftype_v4si
17557 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
17558 tree v4si_ftype_v2df
17559 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17560 tree v2si_ftype_v2df
17561 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17562 tree v4sf_ftype_v2df
17563 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17564 tree v2df_ftype_v2si
17565 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17566 tree v2df_ftype_v4sf
17567 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17568 tree int_ftype_v2df
17569 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17570 tree int64_ftype_v2df
17571 = build_function_type_list (long_long_integer_type_node,
17572 V2DF_type_node, NULL_TREE);
17573 tree v2df_ftype_v2df_int
17574 = build_function_type_list (V2DF_type_node,
17575 V2DF_type_node, integer_type_node, NULL_TREE);
17576 tree v2df_ftype_v2df_int64
17577 = build_function_type_list (V2DF_type_node,
17578 V2DF_type_node, long_long_integer_type_node,
17579 NULL_TREE);
17580 tree v4sf_ftype_v4sf_v2df
17581 = build_function_type_list (V4SF_type_node,
17582 V4SF_type_node, V2DF_type_node, NULL_TREE);
17583 tree v2df_ftype_v2df_v4sf
17584 = build_function_type_list (V2DF_type_node,
17585 V2DF_type_node, V4SF_type_node, NULL_TREE);
17586 tree v2df_ftype_v2df_v2df_int
17587 = build_function_type_list (V2DF_type_node,
17588 V2DF_type_node, V2DF_type_node,
17589 integer_type_node,
17590 NULL_TREE);
17591 tree v2df_ftype_v2df_pcdouble
17592 = build_function_type_list (V2DF_type_node,
17593 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17594 tree void_ftype_pdouble_v2df
17595 = build_function_type_list (void_type_node,
17596 pdouble_type_node, V2DF_type_node, NULL_TREE);
17597 tree void_ftype_pint_int
17598 = build_function_type_list (void_type_node,
17599 pint_type_node, integer_type_node, NULL_TREE);
17600 tree void_ftype_v16qi_v16qi_pchar
17601 = build_function_type_list (void_type_node,
17602 V16QI_type_node, V16QI_type_node,
17603 pchar_type_node, NULL_TREE);
17604 tree v2df_ftype_pcdouble
17605 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17606 tree v2df_ftype_v2df_v2df
17607 = build_function_type_list (V2DF_type_node,
17608 V2DF_type_node, V2DF_type_node, NULL_TREE);
17609 tree v16qi_ftype_v16qi_v16qi
17610 = build_function_type_list (V16QI_type_node,
17611 V16QI_type_node, V16QI_type_node, NULL_TREE);
17612 tree v8hi_ftype_v8hi_v8hi
17613 = build_function_type_list (V8HI_type_node,
17614 V8HI_type_node, V8HI_type_node, NULL_TREE);
17615 tree v4si_ftype_v4si_v4si
17616 = build_function_type_list (V4SI_type_node,
17617 V4SI_type_node, V4SI_type_node, NULL_TREE);
17618 tree v2di_ftype_v2di_v2di
17619 = build_function_type_list (V2DI_type_node,
17620 V2DI_type_node, V2DI_type_node, NULL_TREE);
17621 tree v2di_ftype_v2df_v2df
17622 = build_function_type_list (V2DI_type_node,
17623 V2DF_type_node, V2DF_type_node, NULL_TREE);
17624 tree v2df_ftype_v2df
17625 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17626 tree v2di_ftype_v2di_int
17627 = build_function_type_list (V2DI_type_node,
17628 V2DI_type_node, integer_type_node, NULL_TREE);
17629 tree v2di_ftype_v2di_v2di_int
17630 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17631 V2DI_type_node, integer_type_node, NULL_TREE);
17632 tree v4si_ftype_v4si_int
17633 = build_function_type_list (V4SI_type_node,
17634 V4SI_type_node, integer_type_node, NULL_TREE);
17635 tree v8hi_ftype_v8hi_int
17636 = build_function_type_list (V8HI_type_node,
17637 V8HI_type_node, integer_type_node, NULL_TREE);
17638 tree v4si_ftype_v8hi_v8hi
17639 = build_function_type_list (V4SI_type_node,
17640 V8HI_type_node, V8HI_type_node, NULL_TREE);
17641 tree di_ftype_v8qi_v8qi
17642 = build_function_type_list (long_long_unsigned_type_node,
17643 V8QI_type_node, V8QI_type_node, NULL_TREE);
17644 tree di_ftype_v2si_v2si
17645 = build_function_type_list (long_long_unsigned_type_node,
17646 V2SI_type_node, V2SI_type_node, NULL_TREE);
17647 tree v2di_ftype_v16qi_v16qi
17648 = build_function_type_list (V2DI_type_node,
17649 V16QI_type_node, V16QI_type_node, NULL_TREE);
17650 tree v2di_ftype_v4si_v4si
17651 = build_function_type_list (V2DI_type_node,
17652 V4SI_type_node, V4SI_type_node, NULL_TREE);
17653 tree int_ftype_v16qi
17654 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17655 tree v16qi_ftype_pcchar
17656 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17657 tree void_ftype_pchar_v16qi
17658 = build_function_type_list (void_type_node,
17659 pchar_type_node, V16QI_type_node, NULL_TREE);
17660
17661 tree v2di_ftype_v2di_unsigned_unsigned
17662 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17663 unsigned_type_node, unsigned_type_node,
17664 NULL_TREE);
17665 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17666 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17667 unsigned_type_node, unsigned_type_node,
17668 NULL_TREE);
17669 tree v2di_ftype_v2di_v16qi
17670 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17671 NULL_TREE);
17672 tree v2df_ftype_v2df_v2df_v2df
17673 = build_function_type_list (V2DF_type_node,
17674 V2DF_type_node, V2DF_type_node,
17675 V2DF_type_node, NULL_TREE);
17676 tree v4sf_ftype_v4sf_v4sf_v4sf
17677 = build_function_type_list (V4SF_type_node,
17678 V4SF_type_node, V4SF_type_node,
17679 V4SF_type_node, NULL_TREE);
17680 tree v8hi_ftype_v16qi
17681 = build_function_type_list (V8HI_type_node, V16QI_type_node,
17682 NULL_TREE);
17683 tree v4si_ftype_v16qi
17684 = build_function_type_list (V4SI_type_node, V16QI_type_node,
17685 NULL_TREE);
17686 tree v2di_ftype_v16qi
17687 = build_function_type_list (V2DI_type_node, V16QI_type_node,
17688 NULL_TREE);
17689 tree v4si_ftype_v8hi
17690 = build_function_type_list (V4SI_type_node, V8HI_type_node,
17691 NULL_TREE);
17692 tree v2di_ftype_v8hi
17693 = build_function_type_list (V2DI_type_node, V8HI_type_node,
17694 NULL_TREE);
17695 tree v2di_ftype_v4si
17696 = build_function_type_list (V2DI_type_node, V4SI_type_node,
17697 NULL_TREE);
17698 tree v2di_ftype_pv2di
17699 = build_function_type_list (V2DI_type_node, pv2di_type_node,
17700 NULL_TREE);
17701 tree v16qi_ftype_v16qi_v16qi_int
17702 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17703 V16QI_type_node, integer_type_node,
17704 NULL_TREE);
17705 tree v16qi_ftype_v16qi_v16qi_v16qi
17706 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17707 V16QI_type_node, V16QI_type_node,
17708 NULL_TREE);
17709 tree v8hi_ftype_v8hi_v8hi_int
17710 = build_function_type_list (V8HI_type_node, V8HI_type_node,
17711 V8HI_type_node, integer_type_node,
17712 NULL_TREE);
17713 tree v4si_ftype_v4si_v4si_int
17714 = build_function_type_list (V4SI_type_node, V4SI_type_node,
17715 V4SI_type_node, integer_type_node,
17716 NULL_TREE);
17717 tree int_ftype_v2di_v2di
17718 = build_function_type_list (integer_type_node,
17719 V2DI_type_node, V2DI_type_node,
17720 NULL_TREE);
17721 tree int_ftype_v16qi_int_v16qi_int_int
17722 = build_function_type_list (integer_type_node,
17723 V16QI_type_node,
17724 integer_type_node,
17725 V16QI_type_node,
17726 integer_type_node,
17727 integer_type_node,
17728 NULL_TREE);
17729 tree v16qi_ftype_v16qi_int_v16qi_int_int
17730 = build_function_type_list (V16QI_type_node,
17731 V16QI_type_node,
17732 integer_type_node,
17733 V16QI_type_node,
17734 integer_type_node,
17735 integer_type_node,
17736 NULL_TREE);
17737 tree int_ftype_v16qi_v16qi_int
17738 = build_function_type_list (integer_type_node,
17739 V16QI_type_node,
17740 V16QI_type_node,
17741 integer_type_node,
17742 NULL_TREE);
17743 tree ftype;
17744
17745 /* The __float80 type. */
17746 if (TYPE_MODE (long_double_type_node) == XFmode)
17747 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17748 "__float80");
17749 else
17750 {
17751 /* The __float80 type. */
17752 tree float80_type_node = make_node (REAL_TYPE);
17753
17754 TYPE_PRECISION (float80_type_node) = 80;
17755 layout_type (float80_type_node);
17756 (*lang_hooks.types.register_builtin_type) (float80_type_node,
17757 "__float80");
17758 }
17759
17760 if (TARGET_64BIT)
17761 {
17762 tree float128_type_node = make_node (REAL_TYPE);
17763
17764 TYPE_PRECISION (float128_type_node) = 128;
17765 layout_type (float128_type_node);
17766 (*lang_hooks.types.register_builtin_type) (float128_type_node,
17767 "__float128");
17768
17769 /* TFmode support builtins. */
17770 ftype = build_function_type (float128_type_node,
17771 void_list_node);
17772 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
17773
17774 ftype = build_function_type_list (float128_type_node,
17775 float128_type_node,
17776 NULL_TREE);
17777 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
17778
17779 ftype = build_function_type_list (float128_type_node,
17780 float128_type_node,
17781 float128_type_node,
17782 NULL_TREE);
17783 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
17784 }
17785
17786 /* Add all SSE builtins that are more or less simple operations on
17787 three operands. */
17788 for (i = 0, d = bdesc_sse_3arg;
17789 i < ARRAY_SIZE (bdesc_sse_3arg);
17790 i++, d++)
17791 {
17792 /* Use one of the operands; the target can have a different mode for
17793 mask-generating compares. */
17794 enum machine_mode mode;
17795 tree type;
17796
17797 if (d->name == 0)
17798 continue;
17799 mode = insn_data[d->icode].operand[1].mode;
17800
17801 switch (mode)
17802 {
17803 case V16QImode:
17804 type = v16qi_ftype_v16qi_v16qi_int;
17805 break;
17806 case V8HImode:
17807 type = v8hi_ftype_v8hi_v8hi_int;
17808 break;
17809 case V4SImode:
17810 type = v4si_ftype_v4si_v4si_int;
17811 break;
17812 case V2DImode:
17813 type = v2di_ftype_v2di_v2di_int;
17814 break;
17815 case V2DFmode:
17816 type = v2df_ftype_v2df_v2df_int;
17817 break;
17818 case V4SFmode:
17819 type = v4sf_ftype_v4sf_v4sf_int;
17820 break;
17821 default:
17822 gcc_unreachable ();
17823 }
17824
17825 /* Override for variable blends. */
17826 switch (d->icode)
17827 {
17828 case CODE_FOR_sse4_1_blendvpd:
17829 type = v2df_ftype_v2df_v2df_v2df;
17830 break;
17831 case CODE_FOR_sse4_1_blendvps:
17832 type = v4sf_ftype_v4sf_v4sf_v4sf;
17833 break;
17834 case CODE_FOR_sse4_1_pblendvb:
17835 type = v16qi_ftype_v16qi_v16qi_v16qi;
17836 break;
17837 default:
17838 break;
17839 }
17840
17841 def_builtin (d->mask, d->name, type, d->code);
17842 }
17843
17844 /* Add all builtins that are more or less simple operations on two
17845 operands. */
17846 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17847 {
17848 /* Use one of the operands; the target can have a different mode for
17849 mask-generating compares. */
17850 enum machine_mode mode;
17851 tree type;
17852
17853 if (d->name == 0)
17854 continue;
17855 mode = insn_data[d->icode].operand[1].mode;
17856
17857 switch (mode)
17858 {
17859 case V16QImode:
17860 type = v16qi_ftype_v16qi_v16qi;
17861 break;
17862 case V8HImode:
17863 type = v8hi_ftype_v8hi_v8hi;
17864 break;
17865 case V4SImode:
17866 type = v4si_ftype_v4si_v4si;
17867 break;
17868 case V2DImode:
17869 type = v2di_ftype_v2di_v2di;
17870 break;
17871 case V2DFmode:
17872 type = v2df_ftype_v2df_v2df;
17873 break;
17874 case V4SFmode:
17875 type = v4sf_ftype_v4sf_v4sf;
17876 break;
17877 case V8QImode:
17878 type = v8qi_ftype_v8qi_v8qi;
17879 break;
17880 case V4HImode:
17881 type = v4hi_ftype_v4hi_v4hi;
17882 break;
17883 case V2SImode:
17884 type = v2si_ftype_v2si_v2si;
17885 break;
17886 case DImode:
17887 type = di_ftype_di_di;
17888 break;
17889
17890 default:
17891 gcc_unreachable ();
17892 }
17893
17894 /* Override for comparisons. */
17895 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17896 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17897 type = v4si_ftype_v4sf_v4sf;
17898
17899 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17900 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17901 type = v2di_ftype_v2df_v2df;
17902
17903 def_builtin (d->mask, d->name, type, d->code);
17904 }
17905
17906 /* Add all builtins that are more or less simple operations on 1 operand. */
17907 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17908 {
17909 enum machine_mode mode;
17910 tree type;
17911
17912 if (d->name == 0)
17913 continue;
17914 mode = insn_data[d->icode].operand[1].mode;
17915
17916 switch (mode)
17917 {
17918 case V16QImode:
17919 type = v16qi_ftype_v16qi;
17920 break;
17921 case V8HImode:
17922 type = v8hi_ftype_v8hi;
17923 break;
17924 case V4SImode:
17925 type = v4si_ftype_v4si;
17926 break;
17927 case V2DFmode:
17928 type = v2df_ftype_v2df;
17929 break;
17930 case V4SFmode:
17931 type = v4sf_ftype_v4sf;
17932 break;
17933 case V8QImode:
17934 type = v8qi_ftype_v8qi;
17935 break;
17936 case V4HImode:
17937 type = v4hi_ftype_v4hi;
17938 break;
17939 case V2SImode:
17940 type = v2si_ftype_v2si;
17941 break;
17942
17943 default:
17944 abort ();
17945 }
17946
17947 def_builtin (d->mask, d->name, type, d->code);
17948 }
17949
17950 /* pcmpestr[im] insns. */
17951 for (i = 0, d = bdesc_pcmpestr;
17952 i < ARRAY_SIZE (bdesc_pcmpestr);
17953 i++, d++)
17954 {
17955 if (d->code == IX86_BUILTIN_PCMPESTRM128)
17956 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
17957 else
17958 ftype = int_ftype_v16qi_int_v16qi_int_int;
17959 def_builtin (d->mask, d->name, ftype, d->code);
17960 }
17961
17962 /* pcmpistr[im] insns. */
17963 for (i = 0, d = bdesc_pcmpistr;
17964 i < ARRAY_SIZE (bdesc_pcmpistr);
17965 i++, d++)
17966 {
17967 if (d->code == IX86_BUILTIN_PCMPISTRM128)
17968 ftype = v16qi_ftype_v16qi_v16qi_int;
17969 else
17970 ftype = int_ftype_v16qi_v16qi_int;
17971 def_builtin (d->mask, d->name, ftype, d->code);
17972 }
17973
17974 /* Add the remaining MMX insns with somewhat more complicated types. */
17975 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17976 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17977 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17978 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17979
17980 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17981 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17982 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17983
17984 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17985 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17986
17987 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17988 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17989
17990 /* comi/ucomi insns. */
17991 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17992 if (d->mask == OPTION_MASK_ISA_SSE2)
17993 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17994 else
17995 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17996
17997 /* ptest insns. */
17998 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
17999 def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code);
18000
18001 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
18002 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
18003 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
18004
18005 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
18006 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
18007 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
18008 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
18009 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
18010 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
18011 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
18012 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
18013 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
18014 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
18015 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
18016
18017 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
18018
18019 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
18020 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
18021
18022 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
18023 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
18024 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
18025 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
18026
18027 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
18028 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
18029 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
18030 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
18031
18032 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
18033
18034 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
18035
18036 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
18037 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
18038 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
18039 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
18040 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
18041 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
18042
18043 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
18044
18045 /* Original 3DNow! */
18046 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
18047 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
18048 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
18049 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
18050 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
18051 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
18052 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
18053 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
18054 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
18055 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
18056 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
18057 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
18058 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
18059 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
18060 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
18061 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
18062 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
18063 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
18064 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
18065 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
18066
18067 /* 3DNow! extension as used in the Athlon CPU. */
18068 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
18069 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
18070 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
18071 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
18072 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
18073 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
18074
18075 /* SSE2 */
18076 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
18077
18078 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
18079 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
18080
18081 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
18082 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
18083
18084 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
18085 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
18086 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
18087 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
18088 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
18089
18090 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
18091 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
18092 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
18093 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
18094
18095 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
18096 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
18097
18098 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
18099
18100 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
18101 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
18102
18103 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
18104 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
18105 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
18106 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
18107 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
18108
18109 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
18110
18111 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
18112 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
18113 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
18114 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
18115
18116 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
18117 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
18118 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
18119
18120 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
18121 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
18122 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
18123 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
18124
18125 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
18126 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
18127 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
18128
18129 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
18130 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
18131
18132 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
18133 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
18134
18135 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
18136 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
18137 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
18138 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
18139 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
18140 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
18141 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
18142
18143 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
18144 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
18145 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
18146 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
18147 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
18148 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
18149 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
18150
18151 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
18152 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
18153 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
18154 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
18155
18156 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
18157
18158 /* Prescott New Instructions. */
18159 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
18160 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
18161 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
18162
18163 /* SSSE3. */
18164 def_builtin (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
18165 def_builtin (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
18166
18167 /* SSE4.1. */
18168 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
18169 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
18170 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
18171 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
18172 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
18173 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
18174 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
18175 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
18176 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
18177 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
18178 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
18179 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
18180 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
18181 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
18182 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
18183 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
18184 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
18185 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
18186
18187 /* SSE4.2. */
18188 ftype = build_function_type_list (unsigned_type_node,
18189 unsigned_type_node,
18190 unsigned_char_type_node,
18191 NULL_TREE);
18192 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
18193 ftype = build_function_type_list (unsigned_type_node,
18194 unsigned_type_node,
18195 short_unsigned_type_node,
18196 NULL_TREE);
18197 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
18198 ftype = build_function_type_list (unsigned_type_node,
18199 unsigned_type_node,
18200 unsigned_type_node,
18201 NULL_TREE);
18202 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
18203 ftype = build_function_type_list (long_long_unsigned_type_node,
18204 long_long_unsigned_type_node,
18205 long_long_unsigned_type_node,
18206 NULL_TREE);
18207 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
18208
18209 /* AMDFAM10 SSE4A New built-ins */
18210 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
18211 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
18212 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
18213 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
18214 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
18215 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
18216
18217 /* Access to the vec_init patterns. */
18218 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
18219 integer_type_node, NULL_TREE);
18220 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
18221
18222 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
18223 short_integer_type_node,
18224 short_integer_type_node,
18225 short_integer_type_node, NULL_TREE);
18226 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
18227
18228 ftype = build_function_type_list (V8QI_type_node, char_type_node,
18229 char_type_node, char_type_node,
18230 char_type_node, char_type_node,
18231 char_type_node, char_type_node,
18232 char_type_node, NULL_TREE);
18233 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
18234
18235 /* Access to the vec_extract patterns. */
18236 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18237 integer_type_node, NULL_TREE);
18238 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
18239
18240 ftype = build_function_type_list (long_long_integer_type_node,
18241 V2DI_type_node, integer_type_node,
18242 NULL_TREE);
18243 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
18244
18245 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18246 integer_type_node, NULL_TREE);
18247 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
18248
18249 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18250 integer_type_node, NULL_TREE);
18251 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
18252
18253 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18254 integer_type_node, NULL_TREE);
18255 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
18256
18257 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
18258 integer_type_node, NULL_TREE);
18259 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
18260
18261 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
18262 integer_type_node, NULL_TREE);
18263 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
18264
18265 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18266 integer_type_node, NULL_TREE);
18267 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
18268
18269 /* Access to the vec_set patterns. */
18270 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18271 intDI_type_node,
18272 integer_type_node, NULL_TREE);
18273 def_builtin (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
18274
18275 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18276 float_type_node,
18277 integer_type_node, NULL_TREE);
18278 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
18279
18280 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18281 intSI_type_node,
18282 integer_type_node, NULL_TREE);
18283 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
18284
18285 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18286 intHI_type_node,
18287 integer_type_node, NULL_TREE);
18288 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
18289
18290 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
18291 intHI_type_node,
18292 integer_type_node, NULL_TREE);
18293 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
18294
18295 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18296 intQI_type_node,
18297 integer_type_node, NULL_TREE);
18298 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
18299 }
18300
18301 static void
18302 ix86_init_builtins (void)
18303 {
18304 if (TARGET_MMX)
18305 ix86_init_mmx_sse_builtins ();
18306 }
18307
18308 /* Errors in the source file can cause expand_expr to return const0_rtx
18309 where we expect a vector. To avoid crashing, use one of the vector
18310 clear instructions. */
18311 static rtx
18312 safe_vector_operand (rtx x, enum machine_mode mode)
18313 {
18314 if (x == const0_rtx)
18315 x = CONST0_RTX (mode);
18316 return x;
18317 }
18318
18319 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
18320 4 operands. The third argument must be a constant smaller than 8
18321 bits or xmm0. */
18322
18323 static rtx
18324 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
18325 rtx target)
18326 {
18327 rtx pat;
18328 tree arg0 = CALL_EXPR_ARG (exp, 0);
18329 tree arg1 = CALL_EXPR_ARG (exp, 1);
18330 tree arg2 = CALL_EXPR_ARG (exp, 2);
18331 rtx op0 = expand_normal (arg0);
18332 rtx op1 = expand_normal (arg1);
18333 rtx op2 = expand_normal (arg2);
18334 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18335 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18336 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
18337 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
18338
18339 if (VECTOR_MODE_P (mode1))
18340 op0 = safe_vector_operand (op0, mode1);
18341 if (VECTOR_MODE_P (mode2))
18342 op1 = safe_vector_operand (op1, mode2);
18343 if (VECTOR_MODE_P (mode3))
18344 op2 = safe_vector_operand (op2, mode3);
18345
18346 if (optimize
18347 || target == 0
18348 || GET_MODE (target) != tmode
18349 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18350 target = gen_reg_rtx (tmode);
18351
18352 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18353 op0 = copy_to_mode_reg (mode1, op0);
18354 if ((optimize && !register_operand (op1, mode2))
18355 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
18356 op1 = copy_to_mode_reg (mode2, op1);
18357
18358 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18359 switch (icode)
18360 {
18361 case CODE_FOR_sse4_1_blendvpd:
18362 case CODE_FOR_sse4_1_blendvps:
18363 case CODE_FOR_sse4_1_pblendvb:
18364 op2 = copy_to_mode_reg (mode3, op2);
18365 break;
18366
18367 case CODE_FOR_sse4_1_roundsd:
18368 case CODE_FOR_sse4_1_roundss:
18369 error ("the third argument must be a 4-bit immediate");
18370 return const0_rtx;
18371
18372 default:
18373 error ("the third argument must be an 8-bit immediate");
18374 return const0_rtx;
18375 }
18376
18377 pat = GEN_FCN (icode) (target, op0, op1, op2);
18378 if (! pat)
18379 return 0;
18380 emit_insn (pat);
18381 return target;
18382 }
18383
18384 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
18385
18386 static rtx
18387 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
18388 {
18389 rtx pat;
18390 tree arg0 = CALL_EXPR_ARG (exp, 0);
18391 tree arg1 = CALL_EXPR_ARG (exp, 1);
18392 rtx op0 = expand_normal (arg0);
18393 rtx op1 = expand_normal (arg1);
18394 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18395 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18396 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18397
18398 if (optimize
18399 || !target
18400 || GET_MODE (target) != tmode
18401 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18402 target = gen_reg_rtx (tmode);
18403
18404 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18405 op0 = copy_to_mode_reg (mode0, op0);
18406 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18407 {
18408 op1 = copy_to_reg (op1);
18409 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
18410 }
18411
18412 pat = GEN_FCN (icode) (target, op0, op1);
18413 if (! pat)
18414 return 0;
18415 emit_insn (pat);
18416 return target;
18417 }
18418
18419 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
18420
18421 static rtx
18422 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
18423 {
18424 rtx pat, xops[3];
18425 tree arg0 = CALL_EXPR_ARG (exp, 0);
18426 tree arg1 = CALL_EXPR_ARG (exp, 1);
18427 rtx op0 = expand_normal (arg0);
18428 rtx op1 = expand_normal (arg1);
18429 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18430 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18431 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18432
18433 if (VECTOR_MODE_P (mode0))
18434 op0 = safe_vector_operand (op0, mode0);
18435 if (VECTOR_MODE_P (mode1))
18436 op1 = safe_vector_operand (op1, mode1);
18437
18438 if (optimize || !target
18439 || GET_MODE (target) != tmode
18440 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18441 target = gen_reg_rtx (tmode);
18442
18443 if (GET_MODE (op1) == SImode && mode1 == TImode)
18444 {
18445 rtx x = gen_reg_rtx (V4SImode);
18446 emit_insn (gen_sse2_loadd (x, op1));
18447 op1 = gen_lowpart (TImode, x);
18448 }
18449
18450 /* The insn must want input operands in the same modes as the
18451 result. */
18452 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
18453 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
18454
18455 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18456 op0 = copy_to_mode_reg (mode0, op0);
18457 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18458 op1 = copy_to_mode_reg (mode1, op1);
18459
18460 /* ??? Using ix86_fixup_binary_operands is problematic when
18461 we've got mismatched modes. Fake it. */
18462
18463 xops[0] = target;
18464 xops[1] = op0;
18465 xops[2] = op1;
18466
18467 if (tmode == mode0 && tmode == mode1)
18468 {
18469 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
18470 op0 = xops[1];
18471 op1 = xops[2];
18472 }
18473 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
18474 {
18475 op0 = force_reg (mode0, op0);
18476 op1 = force_reg (mode1, op1);
18477 target = gen_reg_rtx (tmode);
18478 }
18479
18480 pat = GEN_FCN (icode) (target, op0, op1);
18481 if (! pat)
18482 return 0;
18483 emit_insn (pat);
18484 return target;
18485 }
18486
18487 /* Subroutine of ix86_expand_builtin to take care of stores. */
18488
18489 static rtx
18490 ix86_expand_store_builtin (enum insn_code icode, tree exp)
18491 {
18492 rtx pat;
18493 tree arg0 = CALL_EXPR_ARG (exp, 0);
18494 tree arg1 = CALL_EXPR_ARG (exp, 1);
18495 rtx op0 = expand_normal (arg0);
18496 rtx op1 = expand_normal (arg1);
18497 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
18498 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18499
18500 if (VECTOR_MODE_P (mode1))
18501 op1 = safe_vector_operand (op1, mode1);
18502
18503 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18504 op1 = copy_to_mode_reg (mode1, op1);
18505
18506 pat = GEN_FCN (icode) (op0, op1);
18507 if (pat)
18508 emit_insn (pat);
18509 return 0;
18510 }
18511
18512 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18513
18514 static rtx
18515 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
18516 rtx target, int do_load)
18517 {
18518 rtx pat;
18519 tree arg0 = CALL_EXPR_ARG (exp, 0);
18520 rtx op0 = expand_normal (arg0);
18521 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18522 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18523
18524 if (optimize || !target
18525 || GET_MODE (target) != tmode
18526 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18527 target = gen_reg_rtx (tmode);
18528 if (do_load)
18529 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18530 else
18531 {
18532 if (VECTOR_MODE_P (mode0))
18533 op0 = safe_vector_operand (op0, mode0);
18534
18535 if ((optimize && !register_operand (op0, mode0))
18536 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18537 op0 = copy_to_mode_reg (mode0, op0);
18538 }
18539
18540 switch (icode)
18541 {
18542 case CODE_FOR_sse4_1_roundpd:
18543 case CODE_FOR_sse4_1_roundps:
18544 {
18545 tree arg1 = CALL_EXPR_ARG (exp, 1);
18546 rtx op1 = expand_normal (arg1);
18547 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18548
18549 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18550 {
18551 error ("the second argument must be a 4-bit immediate");
18552 return const0_rtx;
18553 }
18554 pat = GEN_FCN (icode) (target, op0, op1);
18555 }
18556 break;
18557 default:
18558 pat = GEN_FCN (icode) (target, op0);
18559 break;
18560 }
18561
18562 if (! pat)
18563 return 0;
18564 emit_insn (pat);
18565 return target;
18566 }
18567
18568 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18569 sqrtss, rsqrtss, rcpss. */
18570
18571 static rtx
18572 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
18573 {
18574 rtx pat;
18575 tree arg0 = CALL_EXPR_ARG (exp, 0);
18576 rtx op1, op0 = expand_normal (arg0);
18577 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18578 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18579
18580 if (optimize || !target
18581 || GET_MODE (target) != tmode
18582 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18583 target = gen_reg_rtx (tmode);
18584
18585 if (VECTOR_MODE_P (mode0))
18586 op0 = safe_vector_operand (op0, mode0);
18587
18588 if ((optimize && !register_operand (op0, mode0))
18589 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18590 op0 = copy_to_mode_reg (mode0, op0);
18591
18592 op1 = op0;
18593 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
18594 op1 = copy_to_mode_reg (mode0, op1);
18595
18596 pat = GEN_FCN (icode) (target, op0, op1);
18597 if (! pat)
18598 return 0;
18599 emit_insn (pat);
18600 return target;
18601 }
18602
18603 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18604
18605 static rtx
18606 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
18607 rtx target)
18608 {
18609 rtx pat;
18610 tree arg0 = CALL_EXPR_ARG (exp, 0);
18611 tree arg1 = CALL_EXPR_ARG (exp, 1);
18612 rtx op0 = expand_normal (arg0);
18613 rtx op1 = expand_normal (arg1);
18614 rtx op2;
18615 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
18616 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
18617 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
18618 enum rtx_code comparison = d->comparison;
18619
18620 if (VECTOR_MODE_P (mode0))
18621 op0 = safe_vector_operand (op0, mode0);
18622 if (VECTOR_MODE_P (mode1))
18623 op1 = safe_vector_operand (op1, mode1);
18624
18625 /* Swap operands if we have a comparison that isn't available in
18626 hardware. */
18627 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18628 {
18629 rtx tmp = gen_reg_rtx (mode1);
18630 emit_move_insn (tmp, op1);
18631 op1 = op0;
18632 op0 = tmp;
18633 }
18634
18635 if (optimize || !target
18636 || GET_MODE (target) != tmode
18637 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
18638 target = gen_reg_rtx (tmode);
18639
18640 if ((optimize && !register_operand (op0, mode0))
18641 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
18642 op0 = copy_to_mode_reg (mode0, op0);
18643 if ((optimize && !register_operand (op1, mode1))
18644 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
18645 op1 = copy_to_mode_reg (mode1, op1);
18646
18647 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
18648 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
18649 if (! pat)
18650 return 0;
18651 emit_insn (pat);
18652 return target;
18653 }
18654
18655 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18656
18657 static rtx
18658 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
18659 rtx target)
18660 {
18661 rtx pat;
18662 tree arg0 = CALL_EXPR_ARG (exp, 0);
18663 tree arg1 = CALL_EXPR_ARG (exp, 1);
18664 rtx op0 = expand_normal (arg0);
18665 rtx op1 = expand_normal (arg1);
18666 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18667 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18668 enum rtx_code comparison = d->comparison;
18669
18670 if (VECTOR_MODE_P (mode0))
18671 op0 = safe_vector_operand (op0, mode0);
18672 if (VECTOR_MODE_P (mode1))
18673 op1 = safe_vector_operand (op1, mode1);
18674
18675 /* Swap operands if we have a comparison that isn't available in
18676 hardware. */
18677 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18678 {
18679 rtx tmp = op1;
18680 op1 = op0;
18681 op0 = tmp;
18682 }
18683
18684 target = gen_reg_rtx (SImode);
18685 emit_move_insn (target, const0_rtx);
18686 target = gen_rtx_SUBREG (QImode, target, 0);
18687
18688 if ((optimize && !register_operand (op0, mode0))
18689 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18690 op0 = copy_to_mode_reg (mode0, op0);
18691 if ((optimize && !register_operand (op1, mode1))
18692 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18693 op1 = copy_to_mode_reg (mode1, op1);
18694
18695 pat = GEN_FCN (d->icode) (op0, op1);
18696 if (! pat)
18697 return 0;
18698 emit_insn (pat);
18699 emit_insn (gen_rtx_SET (VOIDmode,
18700 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18701 gen_rtx_fmt_ee (comparison, QImode,
18702 SET_DEST (pat),
18703 const0_rtx)));
18704
18705 return SUBREG_REG (target);
18706 }
18707
18708 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18709
18710 static rtx
18711 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
18712 rtx target)
18713 {
18714 rtx pat;
18715 tree arg0 = CALL_EXPR_ARG (exp, 0);
18716 tree arg1 = CALL_EXPR_ARG (exp, 1);
18717 rtx op0 = expand_normal (arg0);
18718 rtx op1 = expand_normal (arg1);
18719 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18720 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18721 enum rtx_code comparison = d->comparison;
18722
18723 if (VECTOR_MODE_P (mode0))
18724 op0 = safe_vector_operand (op0, mode0);
18725 if (VECTOR_MODE_P (mode1))
18726 op1 = safe_vector_operand (op1, mode1);
18727
18728 target = gen_reg_rtx (SImode);
18729 emit_move_insn (target, const0_rtx);
18730 target = gen_rtx_SUBREG (QImode, target, 0);
18731
18732 if ((optimize && !register_operand (op0, mode0))
18733 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18734 op0 = copy_to_mode_reg (mode0, op0);
18735 if ((optimize && !register_operand (op1, mode1))
18736 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18737 op1 = copy_to_mode_reg (mode1, op1);
18738
18739 pat = GEN_FCN (d->icode) (op0, op1);
18740 if (! pat)
18741 return 0;
18742 emit_insn (pat);
18743 emit_insn (gen_rtx_SET (VOIDmode,
18744 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18745 gen_rtx_fmt_ee (comparison, QImode,
18746 SET_DEST (pat),
18747 const0_rtx)));
18748
18749 return SUBREG_REG (target);
18750 }
18751
18752 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
18753
18754 static rtx
18755 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
18756 tree exp, rtx target)
18757 {
18758 rtx pat;
18759 tree arg0 = CALL_EXPR_ARG (exp, 0);
18760 tree arg1 = CALL_EXPR_ARG (exp, 1);
18761 tree arg2 = CALL_EXPR_ARG (exp, 2);
18762 tree arg3 = CALL_EXPR_ARG (exp, 3);
18763 tree arg4 = CALL_EXPR_ARG (exp, 4);
18764 rtx scratch0, scratch1;
18765 rtx op0 = expand_normal (arg0);
18766 rtx op1 = expand_normal (arg1);
18767 rtx op2 = expand_normal (arg2);
18768 rtx op3 = expand_normal (arg3);
18769 rtx op4 = expand_normal (arg4);
18770 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
18771
18772 tmode0 = insn_data[d->icode].operand[0].mode;
18773 tmode1 = insn_data[d->icode].operand[1].mode;
18774 modev2 = insn_data[d->icode].operand[2].mode;
18775 modei3 = insn_data[d->icode].operand[3].mode;
18776 modev4 = insn_data[d->icode].operand[4].mode;
18777 modei5 = insn_data[d->icode].operand[5].mode;
18778 modeimm = insn_data[d->icode].operand[6].mode;
18779
18780 if (VECTOR_MODE_P (modev2))
18781 op0 = safe_vector_operand (op0, modev2);
18782 if (VECTOR_MODE_P (modev4))
18783 op2 = safe_vector_operand (op2, modev4);
18784
18785 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18786 op0 = copy_to_mode_reg (modev2, op0);
18787 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
18788 op1 = copy_to_mode_reg (modei3, op1);
18789 if ((optimize && !register_operand (op2, modev4))
18790 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
18791 op2 = copy_to_mode_reg (modev4, op2);
18792 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
18793 op3 = copy_to_mode_reg (modei5, op3);
18794
18795 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
18796 {
18797 error ("the fifth argument must be a 8-bit immediate");
18798 return const0_rtx;
18799 }
18800
18801 if (d->code == IX86_BUILTIN_PCMPESTRI128)
18802 {
18803 if (optimize || !target
18804 || GET_MODE (target) != tmode0
18805 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18806 target = gen_reg_rtx (tmode0);
18807
18808 scratch1 = gen_reg_rtx (tmode1);
18809
18810 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
18811 }
18812 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
18813 {
18814 if (optimize || !target
18815 || GET_MODE (target) != tmode1
18816 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18817 target = gen_reg_rtx (tmode1);
18818
18819 scratch0 = gen_reg_rtx (tmode0);
18820
18821 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
18822 }
18823 else
18824 {
18825 gcc_assert (d->flag);
18826
18827 scratch0 = gen_reg_rtx (tmode0);
18828 scratch1 = gen_reg_rtx (tmode1);
18829
18830 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
18831 }
18832
18833 if (! pat)
18834 return 0;
18835
18836 emit_insn (pat);
18837
18838 if (d->flag)
18839 {
18840 target = gen_reg_rtx (SImode);
18841 emit_move_insn (target, const0_rtx);
18842 target = gen_rtx_SUBREG (QImode, target, 0);
18843
18844 emit_insn
18845 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18846 gen_rtx_fmt_ee (EQ, QImode,
18847 gen_rtx_REG ((enum machine_mode) d->flag,
18848 FLAGS_REG),
18849 const0_rtx)));
18850 return SUBREG_REG (target);
18851 }
18852 else
18853 return target;
18854 }
18855
18856
18857 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
18858
18859 static rtx
18860 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
18861 tree exp, rtx target)
18862 {
18863 rtx pat;
18864 tree arg0 = CALL_EXPR_ARG (exp, 0);
18865 tree arg1 = CALL_EXPR_ARG (exp, 1);
18866 tree arg2 = CALL_EXPR_ARG (exp, 2);
18867 rtx scratch0, scratch1;
18868 rtx op0 = expand_normal (arg0);
18869 rtx op1 = expand_normal (arg1);
18870 rtx op2 = expand_normal (arg2);
18871 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
18872
18873 tmode0 = insn_data[d->icode].operand[0].mode;
18874 tmode1 = insn_data[d->icode].operand[1].mode;
18875 modev2 = insn_data[d->icode].operand[2].mode;
18876 modev3 = insn_data[d->icode].operand[3].mode;
18877 modeimm = insn_data[d->icode].operand[4].mode;
18878
18879 if (VECTOR_MODE_P (modev2))
18880 op0 = safe_vector_operand (op0, modev2);
18881 if (VECTOR_MODE_P (modev3))
18882 op1 = safe_vector_operand (op1, modev3);
18883
18884 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18885 op0 = copy_to_mode_reg (modev2, op0);
18886 if ((optimize && !register_operand (op1, modev3))
18887 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
18888 op1 = copy_to_mode_reg (modev3, op1);
18889
18890 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
18891 {
18892 error ("the third argument must be a 8-bit immediate");
18893 return const0_rtx;
18894 }
18895
18896 if (d->code == IX86_BUILTIN_PCMPISTRI128)
18897 {
18898 if (optimize || !target
18899 || GET_MODE (target) != tmode0
18900 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18901 target = gen_reg_rtx (tmode0);
18902
18903 scratch1 = gen_reg_rtx (tmode1);
18904
18905 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
18906 }
18907 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
18908 {
18909 if (optimize || !target
18910 || GET_MODE (target) != tmode1
18911 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18912 target = gen_reg_rtx (tmode1);
18913
18914 scratch0 = gen_reg_rtx (tmode0);
18915
18916 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
18917 }
18918 else
18919 {
18920 gcc_assert (d->flag);
18921
18922 scratch0 = gen_reg_rtx (tmode0);
18923 scratch1 = gen_reg_rtx (tmode1);
18924
18925 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
18926 }
18927
18928 if (! pat)
18929 return 0;
18930
18931 emit_insn (pat);
18932
18933 if (d->flag)
18934 {
18935 target = gen_reg_rtx (SImode);
18936 emit_move_insn (target, const0_rtx);
18937 target = gen_rtx_SUBREG (QImode, target, 0);
18938
18939 emit_insn
18940 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18941 gen_rtx_fmt_ee (EQ, QImode,
18942 gen_rtx_REG ((enum machine_mode) d->flag,
18943 FLAGS_REG),
18944 const0_rtx)));
18945 return SUBREG_REG (target);
18946 }
18947 else
18948 return target;
18949 }
18950
18951 /* Return the integer constant in ARG. Constrain it to be in the range
18952 of the subparts of VEC_TYPE; issue an error if not. */
18953
18954 static int
18955 get_element_number (tree vec_type, tree arg)
18956 {
18957 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
18958
18959 if (!host_integerp (arg, 1)
18960 || (elt = tree_low_cst (arg, 1), elt > max))
18961 {
18962 error ("selector must be an integer constant in the range 0..%wi", max);
18963 return 0;
18964 }
18965
18966 return elt;
18967 }
18968
18969 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18970 ix86_expand_vector_init. We DO have language-level syntax for this, in
18971 the form of (type){ init-list }. Except that since we can't place emms
18972 instructions from inside the compiler, we can't allow the use of MMX
18973 registers unless the user explicitly asks for it. So we do *not* define
18974 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
18975 we have builtins invoked by mmintrin.h that gives us license to emit
18976 these sorts of instructions. */
18977
18978 static rtx
18979 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
18980 {
18981 enum machine_mode tmode = TYPE_MODE (type);
18982 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
18983 int i, n_elt = GET_MODE_NUNITS (tmode);
18984 rtvec v = rtvec_alloc (n_elt);
18985
18986 gcc_assert (VECTOR_MODE_P (tmode));
18987 gcc_assert (call_expr_nargs (exp) == n_elt);
18988
18989 for (i = 0; i < n_elt; ++i)
18990 {
18991 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
18992 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
18993 }
18994
18995 if (!target || !register_operand (target, tmode))
18996 target = gen_reg_rtx (tmode);
18997
18998 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
18999 return target;
19000 }
19001
19002 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19003 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
19004 had a language-level syntax for referencing vector elements. */
19005
19006 static rtx
19007 ix86_expand_vec_ext_builtin (tree exp, rtx target)
19008 {
19009 enum machine_mode tmode, mode0;
19010 tree arg0, arg1;
19011 int elt;
19012 rtx op0;
19013
19014 arg0 = CALL_EXPR_ARG (exp, 0);
19015 arg1 = CALL_EXPR_ARG (exp, 1);
19016
19017 op0 = expand_normal (arg0);
19018 elt = get_element_number (TREE_TYPE (arg0), arg1);
19019
19020 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19021 mode0 = TYPE_MODE (TREE_TYPE (arg0));
19022 gcc_assert (VECTOR_MODE_P (mode0));
19023
19024 op0 = force_reg (mode0, op0);
19025
19026 if (optimize || !target || !register_operand (target, tmode))
19027 target = gen_reg_rtx (tmode);
19028
19029 ix86_expand_vector_extract (true, target, op0, elt);
19030
19031 return target;
19032 }
19033
19034 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19035 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
19036 a language-level syntax for referencing vector elements. */
19037
19038 static rtx
19039 ix86_expand_vec_set_builtin (tree exp)
19040 {
19041 enum machine_mode tmode, mode1;
19042 tree arg0, arg1, arg2;
19043 int elt;
19044 rtx op0, op1, target;
19045
19046 arg0 = CALL_EXPR_ARG (exp, 0);
19047 arg1 = CALL_EXPR_ARG (exp, 1);
19048 arg2 = CALL_EXPR_ARG (exp, 2);
19049
19050 tmode = TYPE_MODE (TREE_TYPE (arg0));
19051 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19052 gcc_assert (VECTOR_MODE_P (tmode));
19053
19054 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
19055 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
19056 elt = get_element_number (TREE_TYPE (arg0), arg2);
19057
19058 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
19059 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
19060
19061 op0 = force_reg (tmode, op0);
19062 op1 = force_reg (mode1, op1);
19063
19064 /* OP0 is the source of these builtin functions and shouldn't be
19065 modified. Create a copy, use it and return it as target. */
19066 target = gen_reg_rtx (tmode);
19067 emit_move_insn (target, op0);
19068 ix86_expand_vector_set (true, target, op1, elt);
19069
19070 return target;
19071 }
19072
19073 /* Expand an expression EXP that calls a built-in function,
19074 with result going to TARGET if that's convenient
19075 (and in mode MODE if that's convenient).
19076 SUBTARGET may be used as the target for computing one of EXP's operands.
19077 IGNORE is nonzero if the value is to be ignored. */
19078
19079 static rtx
19080 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
19081 enum machine_mode mode ATTRIBUTE_UNUSED,
19082 int ignore ATTRIBUTE_UNUSED)
19083 {
19084 const struct builtin_description *d;
19085 size_t i;
19086 enum insn_code icode;
19087 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19088 tree arg0, arg1, arg2, arg3;
19089 rtx op0, op1, op2, op3, pat;
19090 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
19091 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
19092
19093 switch (fcode)
19094 {
19095 case IX86_BUILTIN_EMMS:
19096 emit_insn (gen_mmx_emms ());
19097 return 0;
19098
19099 case IX86_BUILTIN_SFENCE:
19100 emit_insn (gen_sse_sfence ());
19101 return 0;
19102
19103 case IX86_BUILTIN_MASKMOVQ:
19104 case IX86_BUILTIN_MASKMOVDQU:
19105 icode = (fcode == IX86_BUILTIN_MASKMOVQ
19106 ? CODE_FOR_mmx_maskmovq
19107 : CODE_FOR_sse2_maskmovdqu);
19108 /* Note the arg order is different from the operand order. */
19109 arg1 = CALL_EXPR_ARG (exp, 0);
19110 arg2 = CALL_EXPR_ARG (exp, 1);
19111 arg0 = CALL_EXPR_ARG (exp, 2);
19112 op0 = expand_normal (arg0);
19113 op1 = expand_normal (arg1);
19114 op2 = expand_normal (arg2);
19115 mode0 = insn_data[icode].operand[0].mode;
19116 mode1 = insn_data[icode].operand[1].mode;
19117 mode2 = insn_data[icode].operand[2].mode;
19118
19119 op0 = force_reg (Pmode, op0);
19120 op0 = gen_rtx_MEM (mode1, op0);
19121
19122 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
19123 op0 = copy_to_mode_reg (mode0, op0);
19124 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
19125 op1 = copy_to_mode_reg (mode1, op1);
19126 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
19127 op2 = copy_to_mode_reg (mode2, op2);
19128 pat = GEN_FCN (icode) (op0, op1, op2);
19129 if (! pat)
19130 return 0;
19131 emit_insn (pat);
19132 return 0;
19133
19134 case IX86_BUILTIN_SQRTSS:
19135 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
19136 case IX86_BUILTIN_RSQRTSS:
19137 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
19138 case IX86_BUILTIN_RCPSS:
19139 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
19140
19141 case IX86_BUILTIN_LOADUPS:
19142 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
19143
19144 case IX86_BUILTIN_STOREUPS:
19145 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
19146
19147 case IX86_BUILTIN_LOADHPS:
19148 case IX86_BUILTIN_LOADLPS:
19149 case IX86_BUILTIN_LOADHPD:
19150 case IX86_BUILTIN_LOADLPD:
19151 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
19152 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
19153 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
19154 : CODE_FOR_sse2_loadlpd);
19155 arg0 = CALL_EXPR_ARG (exp, 0);
19156 arg1 = CALL_EXPR_ARG (exp, 1);
19157 op0 = expand_normal (arg0);
19158 op1 = expand_normal (arg1);
19159 tmode = insn_data[icode].operand[0].mode;
19160 mode0 = insn_data[icode].operand[1].mode;
19161 mode1 = insn_data[icode].operand[2].mode;
19162
19163 op0 = force_reg (mode0, op0);
19164 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
19165 if (optimize || target == 0
19166 || GET_MODE (target) != tmode
19167 || !register_operand (target, tmode))
19168 target = gen_reg_rtx (tmode);
19169 pat = GEN_FCN (icode) (target, op0, op1);
19170 if (! pat)
19171 return 0;
19172 emit_insn (pat);
19173 return target;
19174
19175 case IX86_BUILTIN_STOREHPS:
19176 case IX86_BUILTIN_STORELPS:
19177 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
19178 : CODE_FOR_sse_storelps);
19179 arg0 = CALL_EXPR_ARG (exp, 0);
19180 arg1 = CALL_EXPR_ARG (exp, 1);
19181 op0 = expand_normal (arg0);
19182 op1 = expand_normal (arg1);
19183 mode0 = insn_data[icode].operand[0].mode;
19184 mode1 = insn_data[icode].operand[1].mode;
19185
19186 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19187 op1 = force_reg (mode1, op1);
19188
19189 pat = GEN_FCN (icode) (op0, op1);
19190 if (! pat)
19191 return 0;
19192 emit_insn (pat);
19193 return const0_rtx;
19194
19195 case IX86_BUILTIN_MOVNTPS:
19196 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
19197 case IX86_BUILTIN_MOVNTQ:
19198 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
19199
19200 case IX86_BUILTIN_LDMXCSR:
19201 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
19202 target = assign_386_stack_local (SImode, SLOT_TEMP);
19203 emit_move_insn (target, op0);
19204 emit_insn (gen_sse_ldmxcsr (target));
19205 return 0;
19206
19207 case IX86_BUILTIN_STMXCSR:
19208 target = assign_386_stack_local (SImode, SLOT_TEMP);
19209 emit_insn (gen_sse_stmxcsr (target));
19210 return copy_to_mode_reg (SImode, target);
19211
19212 case IX86_BUILTIN_SHUFPS:
19213 case IX86_BUILTIN_SHUFPD:
19214 icode = (fcode == IX86_BUILTIN_SHUFPS
19215 ? CODE_FOR_sse_shufps
19216 : CODE_FOR_sse2_shufpd);
19217 arg0 = CALL_EXPR_ARG (exp, 0);
19218 arg1 = CALL_EXPR_ARG (exp, 1);
19219 arg2 = CALL_EXPR_ARG (exp, 2);
19220 op0 = expand_normal (arg0);
19221 op1 = expand_normal (arg1);
19222 op2 = expand_normal (arg2);
19223 tmode = insn_data[icode].operand[0].mode;
19224 mode0 = insn_data[icode].operand[1].mode;
19225 mode1 = insn_data[icode].operand[2].mode;
19226 mode2 = insn_data[icode].operand[3].mode;
19227
19228 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19229 op0 = copy_to_mode_reg (mode0, op0);
19230 if ((optimize && !register_operand (op1, mode1))
19231 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
19232 op1 = copy_to_mode_reg (mode1, op1);
19233 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19234 {
19235 /* @@@ better error message */
19236 error ("mask must be an immediate");
19237 return gen_reg_rtx (tmode);
19238 }
19239 if (optimize || target == 0
19240 || GET_MODE (target) != tmode
19241 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19242 target = gen_reg_rtx (tmode);
19243 pat = GEN_FCN (icode) (target, op0, op1, op2);
19244 if (! pat)
19245 return 0;
19246 emit_insn (pat);
19247 return target;
19248
19249 case IX86_BUILTIN_PSHUFW:
19250 case IX86_BUILTIN_PSHUFD:
19251 case IX86_BUILTIN_PSHUFHW:
19252 case IX86_BUILTIN_PSHUFLW:
19253 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
19254 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
19255 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
19256 : CODE_FOR_mmx_pshufw);
19257 arg0 = CALL_EXPR_ARG (exp, 0);
19258 arg1 = CALL_EXPR_ARG (exp, 1);
19259 op0 = expand_normal (arg0);
19260 op1 = expand_normal (arg1);
19261 tmode = insn_data[icode].operand[0].mode;
19262 mode1 = insn_data[icode].operand[1].mode;
19263 mode2 = insn_data[icode].operand[2].mode;
19264
19265 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19266 op0 = copy_to_mode_reg (mode1, op0);
19267 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19268 {
19269 /* @@@ better error message */
19270 error ("mask must be an immediate");
19271 return const0_rtx;
19272 }
19273 if (target == 0
19274 || GET_MODE (target) != tmode
19275 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19276 target = gen_reg_rtx (tmode);
19277 pat = GEN_FCN (icode) (target, op0, op1);
19278 if (! pat)
19279 return 0;
19280 emit_insn (pat);
19281 return target;
19282
19283 case IX86_BUILTIN_PSLLWI128:
19284 icode = CODE_FOR_ashlv8hi3;
19285 goto do_pshifti;
19286 case IX86_BUILTIN_PSLLDI128:
19287 icode = CODE_FOR_ashlv4si3;
19288 goto do_pshifti;
19289 case IX86_BUILTIN_PSLLQI128:
19290 icode = CODE_FOR_ashlv2di3;
19291 goto do_pshifti;
19292 case IX86_BUILTIN_PSRAWI128:
19293 icode = CODE_FOR_ashrv8hi3;
19294 goto do_pshifti;
19295 case IX86_BUILTIN_PSRADI128:
19296 icode = CODE_FOR_ashrv4si3;
19297 goto do_pshifti;
19298 case IX86_BUILTIN_PSRLWI128:
19299 icode = CODE_FOR_lshrv8hi3;
19300 goto do_pshifti;
19301 case IX86_BUILTIN_PSRLDI128:
19302 icode = CODE_FOR_lshrv4si3;
19303 goto do_pshifti;
19304 case IX86_BUILTIN_PSRLQI128:
19305 icode = CODE_FOR_lshrv2di3;
19306 goto do_pshifti;
19307 do_pshifti:
19308 arg0 = CALL_EXPR_ARG (exp, 0);
19309 arg1 = CALL_EXPR_ARG (exp, 1);
19310 op0 = expand_normal (arg0);
19311 op1 = expand_normal (arg1);
19312
19313 if (!CONST_INT_P (op1))
19314 {
19315 error ("shift must be an immediate");
19316 return const0_rtx;
19317 }
19318 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
19319 op1 = GEN_INT (255);
19320
19321 tmode = insn_data[icode].operand[0].mode;
19322 mode1 = insn_data[icode].operand[1].mode;
19323 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19324 op0 = copy_to_reg (op0);
19325
19326 target = gen_reg_rtx (tmode);
19327 pat = GEN_FCN (icode) (target, op0, op1);
19328 if (!pat)
19329 return 0;
19330 emit_insn (pat);
19331 return target;
19332
19333 case IX86_BUILTIN_PSLLW128:
19334 icode = CODE_FOR_ashlv8hi3;
19335 goto do_pshift;
19336 case IX86_BUILTIN_PSLLD128:
19337 icode = CODE_FOR_ashlv4si3;
19338 goto do_pshift;
19339 case IX86_BUILTIN_PSLLQ128:
19340 icode = CODE_FOR_ashlv2di3;
19341 goto do_pshift;
19342 case IX86_BUILTIN_PSRAW128:
19343 icode = CODE_FOR_ashrv8hi3;
19344 goto do_pshift;
19345 case IX86_BUILTIN_PSRAD128:
19346 icode = CODE_FOR_ashrv4si3;
19347 goto do_pshift;
19348 case IX86_BUILTIN_PSRLW128:
19349 icode = CODE_FOR_lshrv8hi3;
19350 goto do_pshift;
19351 case IX86_BUILTIN_PSRLD128:
19352 icode = CODE_FOR_lshrv4si3;
19353 goto do_pshift;
19354 case IX86_BUILTIN_PSRLQ128:
19355 icode = CODE_FOR_lshrv2di3;
19356 goto do_pshift;
19357 do_pshift:
19358 arg0 = CALL_EXPR_ARG (exp, 0);
19359 arg1 = CALL_EXPR_ARG (exp, 1);
19360 op0 = expand_normal (arg0);
19361 op1 = expand_normal (arg1);
19362
19363 tmode = insn_data[icode].operand[0].mode;
19364 mode1 = insn_data[icode].operand[1].mode;
19365
19366 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19367 op0 = copy_to_reg (op0);
19368
19369 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
19370 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
19371 op1 = copy_to_reg (op1);
19372
19373 target = gen_reg_rtx (tmode);
19374 pat = GEN_FCN (icode) (target, op0, op1);
19375 if (!pat)
19376 return 0;
19377 emit_insn (pat);
19378 return target;
19379
19380 case IX86_BUILTIN_PSLLDQI128:
19381 case IX86_BUILTIN_PSRLDQI128:
19382 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
19383 : CODE_FOR_sse2_lshrti3);
19384 arg0 = CALL_EXPR_ARG (exp, 0);
19385 arg1 = CALL_EXPR_ARG (exp, 1);
19386 op0 = expand_normal (arg0);
19387 op1 = expand_normal (arg1);
19388 tmode = insn_data[icode].operand[0].mode;
19389 mode1 = insn_data[icode].operand[1].mode;
19390 mode2 = insn_data[icode].operand[2].mode;
19391
19392 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19393 {
19394 op0 = copy_to_reg (op0);
19395 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19396 }
19397 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19398 {
19399 error ("shift must be an immediate");
19400 return const0_rtx;
19401 }
19402 target = gen_reg_rtx (V2DImode);
19403 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
19404 op0, op1);
19405 if (! pat)
19406 return 0;
19407 emit_insn (pat);
19408 return target;
19409
19410 case IX86_BUILTIN_FEMMS:
19411 emit_insn (gen_mmx_femms ());
19412 return NULL_RTX;
19413
19414 case IX86_BUILTIN_PAVGUSB:
19415 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
19416
19417 case IX86_BUILTIN_PF2ID:
19418 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
19419
19420 case IX86_BUILTIN_PFACC:
19421 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
19422
19423 case IX86_BUILTIN_PFADD:
19424 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
19425
19426 case IX86_BUILTIN_PFCMPEQ:
19427 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
19428
19429 case IX86_BUILTIN_PFCMPGE:
19430 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
19431
19432 case IX86_BUILTIN_PFCMPGT:
19433 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
19434
19435 case IX86_BUILTIN_PFMAX:
19436 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
19437
19438 case IX86_BUILTIN_PFMIN:
19439 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
19440
19441 case IX86_BUILTIN_PFMUL:
19442 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
19443
19444 case IX86_BUILTIN_PFRCP:
19445 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
19446
19447 case IX86_BUILTIN_PFRCPIT1:
19448 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
19449
19450 case IX86_BUILTIN_PFRCPIT2:
19451 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
19452
19453 case IX86_BUILTIN_PFRSQIT1:
19454 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
19455
19456 case IX86_BUILTIN_PFRSQRT:
19457 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
19458
19459 case IX86_BUILTIN_PFSUB:
19460 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
19461
19462 case IX86_BUILTIN_PFSUBR:
19463 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
19464
19465 case IX86_BUILTIN_PI2FD:
19466 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
19467
19468 case IX86_BUILTIN_PMULHRW:
19469 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
19470
19471 case IX86_BUILTIN_PF2IW:
19472 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
19473
19474 case IX86_BUILTIN_PFNACC:
19475 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
19476
19477 case IX86_BUILTIN_PFPNACC:
19478 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
19479
19480 case IX86_BUILTIN_PI2FW:
19481 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
19482
19483 case IX86_BUILTIN_PSWAPDSI:
19484 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
19485
19486 case IX86_BUILTIN_PSWAPDSF:
19487 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
19488
19489 case IX86_BUILTIN_SQRTSD:
19490 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
19491 case IX86_BUILTIN_LOADUPD:
19492 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
19493 case IX86_BUILTIN_STOREUPD:
19494 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
19495
19496 case IX86_BUILTIN_MFENCE:
19497 emit_insn (gen_sse2_mfence ());
19498 return 0;
19499 case IX86_BUILTIN_LFENCE:
19500 emit_insn (gen_sse2_lfence ());
19501 return 0;
19502
19503 case IX86_BUILTIN_CLFLUSH:
19504 arg0 = CALL_EXPR_ARG (exp, 0);
19505 op0 = expand_normal (arg0);
19506 icode = CODE_FOR_sse2_clflush;
19507 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
19508 op0 = copy_to_mode_reg (Pmode, op0);
19509
19510 emit_insn (gen_sse2_clflush (op0));
19511 return 0;
19512
19513 case IX86_BUILTIN_MOVNTPD:
19514 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
19515 case IX86_BUILTIN_MOVNTDQ:
19516 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
19517 case IX86_BUILTIN_MOVNTI:
19518 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
19519
19520 case IX86_BUILTIN_LOADDQU:
19521 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
19522 case IX86_BUILTIN_STOREDQU:
19523 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
19524
19525 case IX86_BUILTIN_MONITOR:
19526 arg0 = CALL_EXPR_ARG (exp, 0);
19527 arg1 = CALL_EXPR_ARG (exp, 1);
19528 arg2 = CALL_EXPR_ARG (exp, 2);
19529 op0 = expand_normal (arg0);
19530 op1 = expand_normal (arg1);
19531 op2 = expand_normal (arg2);
19532 if (!REG_P (op0))
19533 op0 = copy_to_mode_reg (Pmode, op0);
19534 if (!REG_P (op1))
19535 op1 = copy_to_mode_reg (SImode, op1);
19536 if (!REG_P (op2))
19537 op2 = copy_to_mode_reg (SImode, op2);
19538 if (!TARGET_64BIT)
19539 emit_insn (gen_sse3_monitor (op0, op1, op2));
19540 else
19541 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
19542 return 0;
19543
19544 case IX86_BUILTIN_MWAIT:
19545 arg0 = CALL_EXPR_ARG (exp, 0);
19546 arg1 = CALL_EXPR_ARG (exp, 1);
19547 op0 = expand_normal (arg0);
19548 op1 = expand_normal (arg1);
19549 if (!REG_P (op0))
19550 op0 = copy_to_mode_reg (SImode, op0);
19551 if (!REG_P (op1))
19552 op1 = copy_to_mode_reg (SImode, op1);
19553 emit_insn (gen_sse3_mwait (op0, op1));
19554 return 0;
19555
19556 case IX86_BUILTIN_LDDQU:
19557 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
19558 target, 1);
19559
19560 case IX86_BUILTIN_PALIGNR:
19561 case IX86_BUILTIN_PALIGNR128:
19562 if (fcode == IX86_BUILTIN_PALIGNR)
19563 {
19564 icode = CODE_FOR_ssse3_palignrdi;
19565 mode = DImode;
19566 }
19567 else
19568 {
19569 icode = CODE_FOR_ssse3_palignrti;
19570 mode = V2DImode;
19571 }
19572 arg0 = CALL_EXPR_ARG (exp, 0);
19573 arg1 = CALL_EXPR_ARG (exp, 1);
19574 arg2 = CALL_EXPR_ARG (exp, 2);
19575 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19576 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19577 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19578 tmode = insn_data[icode].operand[0].mode;
19579 mode1 = insn_data[icode].operand[1].mode;
19580 mode2 = insn_data[icode].operand[2].mode;
19581 mode3 = insn_data[icode].operand[3].mode;
19582
19583 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19584 {
19585 op0 = copy_to_reg (op0);
19586 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19587 }
19588 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19589 {
19590 op1 = copy_to_reg (op1);
19591 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
19592 }
19593 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19594 {
19595 error ("shift must be an immediate");
19596 return const0_rtx;
19597 }
19598 target = gen_reg_rtx (mode);
19599 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
19600 op0, op1, op2);
19601 if (! pat)
19602 return 0;
19603 emit_insn (pat);
19604 return target;
19605
19606 case IX86_BUILTIN_MOVNTDQA:
19607 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
19608 target, 1);
19609
19610 case IX86_BUILTIN_MOVNTSD:
19611 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
19612
19613 case IX86_BUILTIN_MOVNTSS:
19614 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
19615
19616 case IX86_BUILTIN_INSERTQ:
19617 case IX86_BUILTIN_EXTRQ:
19618 icode = (fcode == IX86_BUILTIN_EXTRQ
19619 ? CODE_FOR_sse4a_extrq
19620 : CODE_FOR_sse4a_insertq);
19621 arg0 = CALL_EXPR_ARG (exp, 0);
19622 arg1 = CALL_EXPR_ARG (exp, 1);
19623 op0 = expand_normal (arg0);
19624 op1 = expand_normal (arg1);
19625 tmode = insn_data[icode].operand[0].mode;
19626 mode1 = insn_data[icode].operand[1].mode;
19627 mode2 = insn_data[icode].operand[2].mode;
19628 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19629 op0 = copy_to_mode_reg (mode1, op0);
19630 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19631 op1 = copy_to_mode_reg (mode2, op1);
19632 if (optimize || target == 0
19633 || GET_MODE (target) != tmode
19634 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19635 target = gen_reg_rtx (tmode);
19636 pat = GEN_FCN (icode) (target, op0, op1);
19637 if (! pat)
19638 return NULL_RTX;
19639 emit_insn (pat);
19640 return target;
19641
19642 case IX86_BUILTIN_EXTRQI:
19643 icode = CODE_FOR_sse4a_extrqi;
19644 arg0 = CALL_EXPR_ARG (exp, 0);
19645 arg1 = CALL_EXPR_ARG (exp, 1);
19646 arg2 = CALL_EXPR_ARG (exp, 2);
19647 op0 = expand_normal (arg0);
19648 op1 = expand_normal (arg1);
19649 op2 = expand_normal (arg2);
19650 tmode = insn_data[icode].operand[0].mode;
19651 mode1 = insn_data[icode].operand[1].mode;
19652 mode2 = insn_data[icode].operand[2].mode;
19653 mode3 = insn_data[icode].operand[3].mode;
19654 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19655 op0 = copy_to_mode_reg (mode1, op0);
19656 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19657 {
19658 error ("index mask must be an immediate");
19659 return gen_reg_rtx (tmode);
19660 }
19661 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19662 {
19663 error ("length mask must be an immediate");
19664 return gen_reg_rtx (tmode);
19665 }
19666 if (optimize || target == 0
19667 || GET_MODE (target) != tmode
19668 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19669 target = gen_reg_rtx (tmode);
19670 pat = GEN_FCN (icode) (target, op0, op1, op2);
19671 if (! pat)
19672 return NULL_RTX;
19673 emit_insn (pat);
19674 return target;
19675
19676 case IX86_BUILTIN_INSERTQI:
19677 icode = CODE_FOR_sse4a_insertqi;
19678 arg0 = CALL_EXPR_ARG (exp, 0);
19679 arg1 = CALL_EXPR_ARG (exp, 1);
19680 arg2 = CALL_EXPR_ARG (exp, 2);
19681 arg3 = CALL_EXPR_ARG (exp, 3);
19682 op0 = expand_normal (arg0);
19683 op1 = expand_normal (arg1);
19684 op2 = expand_normal (arg2);
19685 op3 = expand_normal (arg3);
19686 tmode = insn_data[icode].operand[0].mode;
19687 mode1 = insn_data[icode].operand[1].mode;
19688 mode2 = insn_data[icode].operand[2].mode;
19689 mode3 = insn_data[icode].operand[3].mode;
19690 mode4 = insn_data[icode].operand[4].mode;
19691
19692 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19693 op0 = copy_to_mode_reg (mode1, op0);
19694
19695 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19696 op1 = copy_to_mode_reg (mode2, op1);
19697
19698 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19699 {
19700 error ("index mask must be an immediate");
19701 return gen_reg_rtx (tmode);
19702 }
19703 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
19704 {
19705 error ("length mask must be an immediate");
19706 return gen_reg_rtx (tmode);
19707 }
19708 if (optimize || target == 0
19709 || GET_MODE (target) != tmode
19710 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19711 target = gen_reg_rtx (tmode);
19712 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
19713 if (! pat)
19714 return NULL_RTX;
19715 emit_insn (pat);
19716 return target;
19717
19718 case IX86_BUILTIN_VEC_INIT_V2SI:
19719 case IX86_BUILTIN_VEC_INIT_V4HI:
19720 case IX86_BUILTIN_VEC_INIT_V8QI:
19721 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
19722
19723 case IX86_BUILTIN_VEC_EXT_V2DF:
19724 case IX86_BUILTIN_VEC_EXT_V2DI:
19725 case IX86_BUILTIN_VEC_EXT_V4SF:
19726 case IX86_BUILTIN_VEC_EXT_V4SI:
19727 case IX86_BUILTIN_VEC_EXT_V8HI:
19728 case IX86_BUILTIN_VEC_EXT_V2SI:
19729 case IX86_BUILTIN_VEC_EXT_V4HI:
19730 case IX86_BUILTIN_VEC_EXT_V16QI:
19731 return ix86_expand_vec_ext_builtin (exp, target);
19732
19733 case IX86_BUILTIN_VEC_SET_V2DI:
19734 case IX86_BUILTIN_VEC_SET_V4SF:
19735 case IX86_BUILTIN_VEC_SET_V4SI:
19736 case IX86_BUILTIN_VEC_SET_V8HI:
19737 case IX86_BUILTIN_VEC_SET_V4HI:
19738 case IX86_BUILTIN_VEC_SET_V16QI:
19739 return ix86_expand_vec_set_builtin (exp);
19740
19741 case IX86_BUILTIN_INFQ:
19742 {
19743 REAL_VALUE_TYPE inf;
19744 rtx tmp;
19745
19746 real_inf (&inf);
19747 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
19748
19749 tmp = validize_mem (force_const_mem (mode, tmp));
19750
19751 if (target == 0)
19752 target = gen_reg_rtx (mode);
19753
19754 emit_move_insn (target, tmp);
19755 return target;
19756 }
19757
19758 case IX86_BUILTIN_FABSQ:
19759 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
19760
19761 case IX86_BUILTIN_COPYSIGNQ:
19762 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
19763
19764 default:
19765 break;
19766 }
19767
19768 for (i = 0, d = bdesc_sse_3arg;
19769 i < ARRAY_SIZE (bdesc_sse_3arg);
19770 i++, d++)
19771 if (d->code == fcode)
19772 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
19773 target);
19774
19775 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19776 if (d->code == fcode)
19777 {
19778 /* Compares are treated specially. */
19779 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19780 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
19781 || d->icode == CODE_FOR_sse2_maskcmpv2df3
19782 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19783 return ix86_expand_sse_compare (d, exp, target);
19784
19785 return ix86_expand_binop_builtin (d->icode, exp, target);
19786 }
19787
19788 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19789 if (d->code == fcode)
19790 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
19791
19792 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19793 if (d->code == fcode)
19794 return ix86_expand_sse_comi (d, exp, target);
19795
19796 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19797 if (d->code == fcode)
19798 return ix86_expand_sse_ptest (d, exp, target);
19799
19800 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
19801 if (d->code == fcode)
19802 return ix86_expand_crc32 (d->icode, exp, target);
19803
19804 for (i = 0, d = bdesc_pcmpestr;
19805 i < ARRAY_SIZE (bdesc_pcmpestr);
19806 i++, d++)
19807 if (d->code == fcode)
19808 return ix86_expand_sse_pcmpestr (d, exp, target);
19809
19810 for (i = 0, d = bdesc_pcmpistr;
19811 i < ARRAY_SIZE (bdesc_pcmpistr);
19812 i++, d++)
19813 if (d->code == fcode)
19814 return ix86_expand_sse_pcmpistr (d, exp, target);
19815
19816 gcc_unreachable ();
19817 }
19818
19819 /* Returns a function decl for a vectorized version of the builtin function
19820 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19821 if it is not available. */
19822
19823 static tree
19824 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
19825 tree type_in)
19826 {
19827 enum machine_mode in_mode, out_mode;
19828 int in_n, out_n;
19829
19830 if (TREE_CODE (type_out) != VECTOR_TYPE
19831 || TREE_CODE (type_in) != VECTOR_TYPE)
19832 return NULL_TREE;
19833
19834 out_mode = TYPE_MODE (TREE_TYPE (type_out));
19835 out_n = TYPE_VECTOR_SUBPARTS (type_out);
19836 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19837 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19838
19839 switch (fn)
19840 {
19841 case BUILT_IN_SQRT:
19842 if (out_mode == DFmode && out_n == 2
19843 && in_mode == DFmode && in_n == 2)
19844 return ix86_builtins[IX86_BUILTIN_SQRTPD];
19845 return NULL_TREE;
19846
19847 case BUILT_IN_SQRTF:
19848 if (out_mode == SFmode && out_n == 4
19849 && in_mode == SFmode && in_n == 4)
19850 return ix86_builtins[IX86_BUILTIN_SQRTPS];
19851 return NULL_TREE;
19852
19853 case BUILT_IN_LRINTF:
19854 if (out_mode == SImode && out_n == 4
19855 && in_mode == SFmode && in_n == 4)
19856 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
19857 return NULL_TREE;
19858
19859 default:
19860 ;
19861 }
19862
19863 return NULL_TREE;
19864 }
19865
19866 /* Returns a decl of a function that implements conversion of the
19867 input vector of type TYPE, or NULL_TREE if it is not available. */
19868
19869 static tree
19870 ix86_builtin_conversion (unsigned int code, tree type)
19871 {
19872 if (TREE_CODE (type) != VECTOR_TYPE)
19873 return NULL_TREE;
19874
19875 switch (code)
19876 {
19877 case FLOAT_EXPR:
19878 switch (TYPE_MODE (type))
19879 {
19880 case V4SImode:
19881 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
19882 default:
19883 return NULL_TREE;
19884 }
19885
19886 case FIX_TRUNC_EXPR:
19887 switch (TYPE_MODE (type))
19888 {
19889 case V4SFmode:
19890 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
19891 default:
19892 return NULL_TREE;
19893 }
19894 default:
19895 return NULL_TREE;
19896
19897 }
19898 }
19899
19900 /* Store OPERAND to the memory after reload is completed. This means
19901 that we can't easily use assign_stack_local. */
19902 rtx
19903 ix86_force_to_memory (enum machine_mode mode, rtx operand)
19904 {
19905 rtx result;
19906
19907 gcc_assert (reload_completed);
19908 if (TARGET_RED_ZONE)
19909 {
19910 result = gen_rtx_MEM (mode,
19911 gen_rtx_PLUS (Pmode,
19912 stack_pointer_rtx,
19913 GEN_INT (-RED_ZONE_SIZE)));
19914 emit_move_insn (result, operand);
19915 }
19916 else if (!TARGET_RED_ZONE && TARGET_64BIT)
19917 {
19918 switch (mode)
19919 {
19920 case HImode:
19921 case SImode:
19922 operand = gen_lowpart (DImode, operand);
19923 /* FALLTHRU */
19924 case DImode:
19925 emit_insn (
19926 gen_rtx_SET (VOIDmode,
19927 gen_rtx_MEM (DImode,
19928 gen_rtx_PRE_DEC (DImode,
19929 stack_pointer_rtx)),
19930 operand));
19931 break;
19932 default:
19933 gcc_unreachable ();
19934 }
19935 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19936 }
19937 else
19938 {
19939 switch (mode)
19940 {
19941 case DImode:
19942 {
19943 rtx operands[2];
19944 split_di (&operand, 1, operands, operands + 1);
19945 emit_insn (
19946 gen_rtx_SET (VOIDmode,
19947 gen_rtx_MEM (SImode,
19948 gen_rtx_PRE_DEC (Pmode,
19949 stack_pointer_rtx)),
19950 operands[1]));
19951 emit_insn (
19952 gen_rtx_SET (VOIDmode,
19953 gen_rtx_MEM (SImode,
19954 gen_rtx_PRE_DEC (Pmode,
19955 stack_pointer_rtx)),
19956 operands[0]));
19957 }
19958 break;
19959 case HImode:
19960 /* Store HImodes as SImodes. */
19961 operand = gen_lowpart (SImode, operand);
19962 /* FALLTHRU */
19963 case SImode:
19964 emit_insn (
19965 gen_rtx_SET (VOIDmode,
19966 gen_rtx_MEM (GET_MODE (operand),
19967 gen_rtx_PRE_DEC (SImode,
19968 stack_pointer_rtx)),
19969 operand));
19970 break;
19971 default:
19972 gcc_unreachable ();
19973 }
19974 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19975 }
19976 return result;
19977 }
19978
19979 /* Free operand from the memory. */
19980 void
19981 ix86_free_from_memory (enum machine_mode mode)
19982 {
19983 if (!TARGET_RED_ZONE)
19984 {
19985 int size;
19986
19987 if (mode == DImode || TARGET_64BIT)
19988 size = 8;
19989 else
19990 size = 4;
19991 /* Use LEA to deallocate stack space. In peephole2 it will be converted
19992 to pop or add instruction if registers are available. */
19993 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19994 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
19995 GEN_INT (size))));
19996 }
19997 }
19998
19999 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
20000 QImode must go into class Q_REGS.
20001 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20002 movdf to do mem-to-mem moves through integer regs. */
20003 enum reg_class
20004 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
20005 {
20006 enum machine_mode mode = GET_MODE (x);
20007
20008 /* We're only allowed to return a subclass of CLASS. Many of the
20009 following checks fail for NO_REGS, so eliminate that early. */
20010 if (regclass == NO_REGS)
20011 return NO_REGS;
20012
20013 /* All classes can load zeros. */
20014 if (x == CONST0_RTX (mode))
20015 return regclass;
20016
20017 /* Force constants into memory if we are loading a (nonzero) constant into
20018 an MMX or SSE register. This is because there are no MMX/SSE instructions
20019 to load from a constant. */
20020 if (CONSTANT_P (x)
20021 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
20022 return NO_REGS;
20023
20024 /* Prefer SSE regs only, if we can use them for math. */
20025 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
20026 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20027
20028 /* Floating-point constants need more complex checks. */
20029 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
20030 {
20031 /* General regs can load everything. */
20032 if (reg_class_subset_p (regclass, GENERAL_REGS))
20033 return regclass;
20034
20035 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20036 zero above. We only want to wind up preferring 80387 registers if
20037 we plan on doing computation with them. */
20038 if (TARGET_80387
20039 && standard_80387_constant_p (x))
20040 {
20041 /* Limit class to non-sse. */
20042 if (regclass == FLOAT_SSE_REGS)
20043 return FLOAT_REGS;
20044 if (regclass == FP_TOP_SSE_REGS)
20045 return FP_TOP_REG;
20046 if (regclass == FP_SECOND_SSE_REGS)
20047 return FP_SECOND_REG;
20048 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
20049 return regclass;
20050 }
20051
20052 return NO_REGS;
20053 }
20054
20055 /* Generally when we see PLUS here, it's the function invariant
20056 (plus soft-fp const_int). Which can only be computed into general
20057 regs. */
20058 if (GET_CODE (x) == PLUS)
20059 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
20060
20061 /* QImode constants are easy to load, but non-constant QImode data
20062 must go into Q_REGS. */
20063 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20064 {
20065 if (reg_class_subset_p (regclass, Q_REGS))
20066 return regclass;
20067 if (reg_class_subset_p (Q_REGS, regclass))
20068 return Q_REGS;
20069 return NO_REGS;
20070 }
20071
20072 return regclass;
20073 }
20074
20075 /* Discourage putting floating-point values in SSE registers unless
20076 SSE math is being used, and likewise for the 387 registers. */
20077 enum reg_class
20078 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
20079 {
20080 enum machine_mode mode = GET_MODE (x);
20081
20082 /* Restrict the output reload class to the register bank that we are doing
20083 math on. If we would like not to return a subset of CLASS, reject this
20084 alternative: if reload cannot do this, it will still use its choice. */
20085 mode = GET_MODE (x);
20086 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
20087 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
20088
20089 if (X87_FLOAT_MODE_P (mode))
20090 {
20091 if (regclass == FP_TOP_SSE_REGS)
20092 return FP_TOP_REG;
20093 else if (regclass == FP_SECOND_SSE_REGS)
20094 return FP_SECOND_REG;
20095 else
20096 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20097 }
20098
20099 return regclass;
20100 }
20101
20102 /* If we are copying between general and FP registers, we need a memory
20103 location. The same is true for SSE and MMX registers.
20104
20105 The macro can't work reliably when one of the CLASSES is class containing
20106 registers from multiple units (SSE, MMX, integer). We avoid this by never
20107 combining those units in single alternative in the machine description.
20108 Ensure that this constraint holds to avoid unexpected surprises.
20109
20110 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
20111 enforce these sanity checks. */
20112
20113 int
20114 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
20115 enum machine_mode mode, int strict)
20116 {
20117 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
20118 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
20119 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
20120 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
20121 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
20122 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
20123 {
20124 gcc_assert (!strict);
20125 return true;
20126 }
20127
20128 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
20129 return true;
20130
20131 /* ??? This is a lie. We do have moves between mmx/general, and for
20132 mmx/sse2. But by saying we need secondary memory we discourage the
20133 register allocator from using the mmx registers unless needed. */
20134 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20135 return true;
20136
20137 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20138 {
20139 /* SSE1 doesn't have any direct moves from other classes. */
20140 if (!TARGET_SSE2)
20141 return true;
20142
20143 /* If the target says that inter-unit moves are more expensive
20144 than moving through memory, then don't generate them. */
20145 if (!TARGET_INTER_UNIT_MOVES)
20146 return true;
20147
20148 /* Between SSE and general, we have moves no larger than word size. */
20149 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20150 return true;
20151 }
20152
20153 return false;
20154 }
20155
20156 /* Return true if the registers in CLASS cannot represent the change from
20157 modes FROM to TO. */
20158
20159 bool
20160 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
20161 enum reg_class regclass)
20162 {
20163 if (from == to)
20164 return false;
20165
20166 /* x87 registers can't do subreg at all, as all values are reformatted
20167 to extended precision. */
20168 if (MAYBE_FLOAT_CLASS_P (regclass))
20169 return true;
20170
20171 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
20172 {
20173 /* Vector registers do not support QI or HImode loads. If we don't
20174 disallow a change to these modes, reload will assume it's ok to
20175 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20176 the vec_dupv4hi pattern. */
20177 if (GET_MODE_SIZE (from) < 4)
20178 return true;
20179
20180 /* Vector registers do not support subreg with nonzero offsets, which
20181 are otherwise valid for integer registers. Since we can't see
20182 whether we have a nonzero offset from here, prohibit all
20183 nonparadoxical subregs changing size. */
20184 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
20185 return true;
20186 }
20187
20188 return false;
20189 }
20190
20191 /* Return the cost of moving data from a register in class CLASS1 to
20192 one in class CLASS2.
20193
20194 It is not required that the cost always equal 2 when FROM is the same as TO;
20195 on some machines it is expensive to move between registers if they are not
20196 general registers. */
20197
20198 int
20199 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
20200 enum reg_class class2)
20201 {
20202 /* In case we require secondary memory, compute cost of the store followed
20203 by load. In order to avoid bad register allocation choices, we need
20204 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20205
20206 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
20207 {
20208 int cost = 1;
20209
20210 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
20211 MEMORY_MOVE_COST (mode, class1, 1));
20212 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
20213 MEMORY_MOVE_COST (mode, class2, 1));
20214
20215 /* In case of copying from general_purpose_register we may emit multiple
20216 stores followed by single load causing memory size mismatch stall.
20217 Count this as arbitrarily high cost of 20. */
20218 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
20219 cost += 20;
20220
20221 /* In the case of FP/MMX moves, the registers actually overlap, and we
20222 have to switch modes in order to treat them differently. */
20223 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20224 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20225 cost += 20;
20226
20227 return cost;
20228 }
20229
20230 /* Moves between SSE/MMX and integer unit are expensive. */
20231 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
20232 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20233 return ix86_cost->mmxsse_to_integer;
20234 if (MAYBE_FLOAT_CLASS_P (class1))
20235 return ix86_cost->fp_move;
20236 if (MAYBE_SSE_CLASS_P (class1))
20237 return ix86_cost->sse_move;
20238 if (MAYBE_MMX_CLASS_P (class1))
20239 return ix86_cost->mmx_move;
20240 return 2;
20241 }
20242
20243 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
20244
20245 bool
20246 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
20247 {
20248 /* Flags and only flags can only hold CCmode values. */
20249 if (CC_REGNO_P (regno))
20250 return GET_MODE_CLASS (mode) == MODE_CC;
20251 if (GET_MODE_CLASS (mode) == MODE_CC
20252 || GET_MODE_CLASS (mode) == MODE_RANDOM
20253 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20254 return 0;
20255 if (FP_REGNO_P (regno))
20256 return VALID_FP_MODE_P (mode);
20257 if (SSE_REGNO_P (regno))
20258 {
20259 /* We implement the move patterns for all vector modes into and
20260 out of SSE registers, even when no operation instructions
20261 are available. */
20262 return (VALID_SSE_REG_MODE (mode)
20263 || VALID_SSE2_REG_MODE (mode)
20264 || VALID_MMX_REG_MODE (mode)
20265 || VALID_MMX_REG_MODE_3DNOW (mode));
20266 }
20267 if (MMX_REGNO_P (regno))
20268 {
20269 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20270 so if the register is available at all, then we can move data of
20271 the given mode into or out of it. */
20272 return (VALID_MMX_REG_MODE (mode)
20273 || VALID_MMX_REG_MODE_3DNOW (mode));
20274 }
20275
20276 if (mode == QImode)
20277 {
20278 /* Take care for QImode values - they can be in non-QI regs,
20279 but then they do cause partial register stalls. */
20280 if (regno < 4 || TARGET_64BIT)
20281 return 1;
20282 if (!TARGET_PARTIAL_REG_STALL)
20283 return 1;
20284 return reload_in_progress || reload_completed;
20285 }
20286 /* We handle both integer and floats in the general purpose registers. */
20287 else if (VALID_INT_MODE_P (mode))
20288 return 1;
20289 else if (VALID_FP_MODE_P (mode))
20290 return 1;
20291 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20292 on to use that value in smaller contexts, this can easily force a
20293 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20294 supporting DImode, allow it. */
20295 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
20296 return 1;
20297
20298 return 0;
20299 }
20300
20301 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20302 tieable integer mode. */
20303
20304 static bool
20305 ix86_tieable_integer_mode_p (enum machine_mode mode)
20306 {
20307 switch (mode)
20308 {
20309 case HImode:
20310 case SImode:
20311 return true;
20312
20313 case QImode:
20314 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
20315
20316 case DImode:
20317 return TARGET_64BIT;
20318
20319 default:
20320 return false;
20321 }
20322 }
20323
20324 /* Return true if MODE1 is accessible in a register that can hold MODE2
20325 without copying. That is, all register classes that can hold MODE2
20326 can also hold MODE1. */
20327
20328 bool
20329 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
20330 {
20331 if (mode1 == mode2)
20332 return true;
20333
20334 if (ix86_tieable_integer_mode_p (mode1)
20335 && ix86_tieable_integer_mode_p (mode2))
20336 return true;
20337
20338 /* MODE2 being XFmode implies fp stack or general regs, which means we
20339 can tie any smaller floating point modes to it. Note that we do not
20340 tie this with TFmode. */
20341 if (mode2 == XFmode)
20342 return mode1 == SFmode || mode1 == DFmode;
20343
20344 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20345 that we can tie it with SFmode. */
20346 if (mode2 == DFmode)
20347 return mode1 == SFmode;
20348
20349 /* If MODE2 is only appropriate for an SSE register, then tie with
20350 any other mode acceptable to SSE registers. */
20351 if (GET_MODE_SIZE (mode2) == 16
20352 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20353 return (GET_MODE_SIZE (mode1) == 16
20354 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20355
20356 /* If MODE2 is appropriate for an MMX register, then tie
20357 with any other mode acceptable to MMX registers. */
20358 if (GET_MODE_SIZE (mode2) == 8
20359 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
20360 return (GET_MODE_SIZE (mode1) == 8
20361 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
20362
20363 return false;
20364 }
20365
20366 /* Return the cost of moving data of mode M between a
20367 register and memory. A value of 2 is the default; this cost is
20368 relative to those in `REGISTER_MOVE_COST'.
20369
20370 If moving between registers and memory is more expensive than
20371 between two registers, you should define this macro to express the
20372 relative cost.
20373
20374 Model also increased moving costs of QImode registers in non
20375 Q_REGS classes.
20376 */
20377 int
20378 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
20379 {
20380 if (FLOAT_CLASS_P (regclass))
20381 {
20382 int index;
20383 switch (mode)
20384 {
20385 case SFmode:
20386 index = 0;
20387 break;
20388 case DFmode:
20389 index = 1;
20390 break;
20391 case XFmode:
20392 index = 2;
20393 break;
20394 default:
20395 return 100;
20396 }
20397 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
20398 }
20399 if (SSE_CLASS_P (regclass))
20400 {
20401 int index;
20402 switch (GET_MODE_SIZE (mode))
20403 {
20404 case 4:
20405 index = 0;
20406 break;
20407 case 8:
20408 index = 1;
20409 break;
20410 case 16:
20411 index = 2;
20412 break;
20413 default:
20414 return 100;
20415 }
20416 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
20417 }
20418 if (MMX_CLASS_P (regclass))
20419 {
20420 int index;
20421 switch (GET_MODE_SIZE (mode))
20422 {
20423 case 4:
20424 index = 0;
20425 break;
20426 case 8:
20427 index = 1;
20428 break;
20429 default:
20430 return 100;
20431 }
20432 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
20433 }
20434 switch (GET_MODE_SIZE (mode))
20435 {
20436 case 1:
20437 if (in)
20438 return (Q_CLASS_P (regclass) ? ix86_cost->int_load[0]
20439 : ix86_cost->movzbl_load);
20440 else
20441 return (Q_CLASS_P (regclass) ? ix86_cost->int_store[0]
20442 : ix86_cost->int_store[0] + 4);
20443 break;
20444 case 2:
20445 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
20446 default:
20447 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
20448 if (mode == TFmode)
20449 mode = XFmode;
20450 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
20451 * (((int) GET_MODE_SIZE (mode)
20452 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
20453 }
20454 }
20455
20456 /* Compute a (partial) cost for rtx X. Return true if the complete
20457 cost has been computed, and false if subexpressions should be
20458 scanned. In either case, *TOTAL contains the cost result. */
20459
20460 static bool
20461 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
20462 {
20463 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
20464 enum machine_mode mode = GET_MODE (x);
20465
20466 switch (code)
20467 {
20468 case CONST_INT:
20469 case CONST:
20470 case LABEL_REF:
20471 case SYMBOL_REF:
20472 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
20473 *total = 3;
20474 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
20475 *total = 2;
20476 else if (flag_pic && SYMBOLIC_CONST (x)
20477 && (!TARGET_64BIT
20478 || (!GET_CODE (x) != LABEL_REF
20479 && (GET_CODE (x) != SYMBOL_REF
20480 || !SYMBOL_REF_LOCAL_P (x)))))
20481 *total = 1;
20482 else
20483 *total = 0;
20484 return true;
20485
20486 case CONST_DOUBLE:
20487 if (mode == VOIDmode)
20488 *total = 0;
20489 else
20490 switch (standard_80387_constant_p (x))
20491 {
20492 case 1: /* 0.0 */
20493 *total = 1;
20494 break;
20495 default: /* Other constants */
20496 *total = 2;
20497 break;
20498 case 0:
20499 case -1:
20500 /* Start with (MEM (SYMBOL_REF)), since that's where
20501 it'll probably end up. Add a penalty for size. */
20502 *total = (COSTS_N_INSNS (1)
20503 + (flag_pic != 0 && !TARGET_64BIT)
20504 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
20505 break;
20506 }
20507 return true;
20508
20509 case ZERO_EXTEND:
20510 /* The zero extensions is often completely free on x86_64, so make
20511 it as cheap as possible. */
20512 if (TARGET_64BIT && mode == DImode
20513 && GET_MODE (XEXP (x, 0)) == SImode)
20514 *total = 1;
20515 else if (TARGET_ZERO_EXTEND_WITH_AND)
20516 *total = ix86_cost->add;
20517 else
20518 *total = ix86_cost->movzx;
20519 return false;
20520
20521 case SIGN_EXTEND:
20522 *total = ix86_cost->movsx;
20523 return false;
20524
20525 case ASHIFT:
20526 if (CONST_INT_P (XEXP (x, 1))
20527 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
20528 {
20529 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20530 if (value == 1)
20531 {
20532 *total = ix86_cost->add;
20533 return false;
20534 }
20535 if ((value == 2 || value == 3)
20536 && ix86_cost->lea <= ix86_cost->shift_const)
20537 {
20538 *total = ix86_cost->lea;
20539 return false;
20540 }
20541 }
20542 /* FALLTHRU */
20543
20544 case ROTATE:
20545 case ASHIFTRT:
20546 case LSHIFTRT:
20547 case ROTATERT:
20548 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
20549 {
20550 if (CONST_INT_P (XEXP (x, 1)))
20551 {
20552 if (INTVAL (XEXP (x, 1)) > 32)
20553 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
20554 else
20555 *total = ix86_cost->shift_const * 2;
20556 }
20557 else
20558 {
20559 if (GET_CODE (XEXP (x, 1)) == AND)
20560 *total = ix86_cost->shift_var * 2;
20561 else
20562 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
20563 }
20564 }
20565 else
20566 {
20567 if (CONST_INT_P (XEXP (x, 1)))
20568 *total = ix86_cost->shift_const;
20569 else
20570 *total = ix86_cost->shift_var;
20571 }
20572 return false;
20573
20574 case MULT:
20575 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20576 {
20577 /* ??? SSE scalar cost should be used here. */
20578 *total = ix86_cost->fmul;
20579 return false;
20580 }
20581 else if (X87_FLOAT_MODE_P (mode))
20582 {
20583 *total = ix86_cost->fmul;
20584 return false;
20585 }
20586 else if (FLOAT_MODE_P (mode))
20587 {
20588 /* ??? SSE vector cost should be used here. */
20589 *total = ix86_cost->fmul;
20590 return false;
20591 }
20592 else
20593 {
20594 rtx op0 = XEXP (x, 0);
20595 rtx op1 = XEXP (x, 1);
20596 int nbits;
20597 if (CONST_INT_P (XEXP (x, 1)))
20598 {
20599 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20600 for (nbits = 0; value != 0; value &= value - 1)
20601 nbits++;
20602 }
20603 else
20604 /* This is arbitrary. */
20605 nbits = 7;
20606
20607 /* Compute costs correctly for widening multiplication. */
20608 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
20609 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
20610 == GET_MODE_SIZE (mode))
20611 {
20612 int is_mulwiden = 0;
20613 enum machine_mode inner_mode = GET_MODE (op0);
20614
20615 if (GET_CODE (op0) == GET_CODE (op1))
20616 is_mulwiden = 1, op1 = XEXP (op1, 0);
20617 else if (CONST_INT_P (op1))
20618 {
20619 if (GET_CODE (op0) == SIGN_EXTEND)
20620 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
20621 == INTVAL (op1);
20622 else
20623 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
20624 }
20625
20626 if (is_mulwiden)
20627 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
20628 }
20629
20630 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
20631 + nbits * ix86_cost->mult_bit
20632 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
20633
20634 return true;
20635 }
20636
20637 case DIV:
20638 case UDIV:
20639 case MOD:
20640 case UMOD:
20641 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20642 /* ??? SSE cost should be used here. */
20643 *total = ix86_cost->fdiv;
20644 else if (X87_FLOAT_MODE_P (mode))
20645 *total = ix86_cost->fdiv;
20646 else if (FLOAT_MODE_P (mode))
20647 /* ??? SSE vector cost should be used here. */
20648 *total = ix86_cost->fdiv;
20649 else
20650 *total = ix86_cost->divide[MODE_INDEX (mode)];
20651 return false;
20652
20653 case PLUS:
20654 if (GET_MODE_CLASS (mode) == MODE_INT
20655 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
20656 {
20657 if (GET_CODE (XEXP (x, 0)) == PLUS
20658 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
20659 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
20660 && CONSTANT_P (XEXP (x, 1)))
20661 {
20662 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20663 if (val == 2 || val == 4 || val == 8)
20664 {
20665 *total = ix86_cost->lea;
20666 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20667 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
20668 outer_code);
20669 *total += rtx_cost (XEXP (x, 1), outer_code);
20670 return true;
20671 }
20672 }
20673 else if (GET_CODE (XEXP (x, 0)) == MULT
20674 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20675 {
20676 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20677 if (val == 2 || val == 4 || val == 8)
20678 {
20679 *total = ix86_cost->lea;
20680 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20681 *total += rtx_cost (XEXP (x, 1), outer_code);
20682 return true;
20683 }
20684 }
20685 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20686 {
20687 *total = ix86_cost->lea;
20688 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20689 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20690 *total += rtx_cost (XEXP (x, 1), outer_code);
20691 return true;
20692 }
20693 }
20694 /* FALLTHRU */
20695
20696 case MINUS:
20697 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20698 {
20699 /* ??? SSE cost should be used here. */
20700 *total = ix86_cost->fadd;
20701 return false;
20702 }
20703 else if (X87_FLOAT_MODE_P (mode))
20704 {
20705 *total = ix86_cost->fadd;
20706 return false;
20707 }
20708 else if (FLOAT_MODE_P (mode))
20709 {
20710 /* ??? SSE vector cost should be used here. */
20711 *total = ix86_cost->fadd;
20712 return false;
20713 }
20714 /* FALLTHRU */
20715
20716 case AND:
20717 case IOR:
20718 case XOR:
20719 if (!TARGET_64BIT && mode == DImode)
20720 {
20721 *total = (ix86_cost->add * 2
20722 + (rtx_cost (XEXP (x, 0), outer_code)
20723 << (GET_MODE (XEXP (x, 0)) != DImode))
20724 + (rtx_cost (XEXP (x, 1), outer_code)
20725 << (GET_MODE (XEXP (x, 1)) != DImode)));
20726 return true;
20727 }
20728 /* FALLTHRU */
20729
20730 case NEG:
20731 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20732 {
20733 /* ??? SSE cost should be used here. */
20734 *total = ix86_cost->fchs;
20735 return false;
20736 }
20737 else if (X87_FLOAT_MODE_P (mode))
20738 {
20739 *total = ix86_cost->fchs;
20740 return false;
20741 }
20742 else if (FLOAT_MODE_P (mode))
20743 {
20744 /* ??? SSE vector cost should be used here. */
20745 *total = ix86_cost->fchs;
20746 return false;
20747 }
20748 /* FALLTHRU */
20749
20750 case NOT:
20751 if (!TARGET_64BIT && mode == DImode)
20752 *total = ix86_cost->add * 2;
20753 else
20754 *total = ix86_cost->add;
20755 return false;
20756
20757 case COMPARE:
20758 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
20759 && XEXP (XEXP (x, 0), 1) == const1_rtx
20760 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
20761 && XEXP (x, 1) == const0_rtx)
20762 {
20763 /* This kind of construct is implemented using test[bwl].
20764 Treat it as if we had an AND. */
20765 *total = (ix86_cost->add
20766 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
20767 + rtx_cost (const1_rtx, outer_code));
20768 return true;
20769 }
20770 return false;
20771
20772 case FLOAT_EXTEND:
20773 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
20774 *total = 0;
20775 return false;
20776
20777 case ABS:
20778 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20779 /* ??? SSE cost should be used here. */
20780 *total = ix86_cost->fabs;
20781 else if (X87_FLOAT_MODE_P (mode))
20782 *total = ix86_cost->fabs;
20783 else if (FLOAT_MODE_P (mode))
20784 /* ??? SSE vector cost should be used here. */
20785 *total = ix86_cost->fabs;
20786 return false;
20787
20788 case SQRT:
20789 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20790 /* ??? SSE cost should be used here. */
20791 *total = ix86_cost->fsqrt;
20792 else if (X87_FLOAT_MODE_P (mode))
20793 *total = ix86_cost->fsqrt;
20794 else if (FLOAT_MODE_P (mode))
20795 /* ??? SSE vector cost should be used here. */
20796 *total = ix86_cost->fsqrt;
20797 return false;
20798
20799 case UNSPEC:
20800 if (XINT (x, 1) == UNSPEC_TP)
20801 *total = 0;
20802 return false;
20803
20804 default:
20805 return false;
20806 }
20807 }
20808
20809 #if TARGET_MACHO
20810
20811 static int current_machopic_label_num;
20812
20813 /* Given a symbol name and its associated stub, write out the
20814 definition of the stub. */
20815
20816 void
20817 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20818 {
20819 unsigned int length;
20820 char *binder_name, *symbol_name, lazy_ptr_name[32];
20821 int label = ++current_machopic_label_num;
20822
20823 /* For 64-bit we shouldn't get here. */
20824 gcc_assert (!TARGET_64BIT);
20825
20826 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20827 symb = (*targetm.strip_name_encoding) (symb);
20828
20829 length = strlen (stub);
20830 binder_name = alloca (length + 32);
20831 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
20832
20833 length = strlen (symb);
20834 symbol_name = alloca (length + 32);
20835 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20836
20837 sprintf (lazy_ptr_name, "L%d$lz", label);
20838
20839 if (MACHOPIC_PURE)
20840 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
20841 else
20842 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
20843
20844 fprintf (file, "%s:\n", stub);
20845 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20846
20847 if (MACHOPIC_PURE)
20848 {
20849 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
20850 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
20851 fprintf (file, "\tjmp\t*%%edx\n");
20852 }
20853 else
20854 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
20855
20856 fprintf (file, "%s:\n", binder_name);
20857
20858 if (MACHOPIC_PURE)
20859 {
20860 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
20861 fprintf (file, "\tpushl\t%%eax\n");
20862 }
20863 else
20864 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
20865
20866 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
20867
20868 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20869 fprintf (file, "%s:\n", lazy_ptr_name);
20870 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20871 fprintf (file, "\t.long %s\n", binder_name);
20872 }
20873
20874 void
20875 darwin_x86_file_end (void)
20876 {
20877 darwin_file_end ();
20878 ix86_file_end ();
20879 }
20880 #endif /* TARGET_MACHO */
20881
20882 /* Order the registers for register allocator. */
20883
20884 void
20885 x86_order_regs_for_local_alloc (void)
20886 {
20887 int pos = 0;
20888 int i;
20889
20890 /* First allocate the local general purpose registers. */
20891 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20892 if (GENERAL_REGNO_P (i) && call_used_regs[i])
20893 reg_alloc_order [pos++] = i;
20894
20895 /* Global general purpose registers. */
20896 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20897 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
20898 reg_alloc_order [pos++] = i;
20899
20900 /* x87 registers come first in case we are doing FP math
20901 using them. */
20902 if (!TARGET_SSE_MATH)
20903 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20904 reg_alloc_order [pos++] = i;
20905
20906 /* SSE registers. */
20907 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
20908 reg_alloc_order [pos++] = i;
20909 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
20910 reg_alloc_order [pos++] = i;
20911
20912 /* x87 registers. */
20913 if (TARGET_SSE_MATH)
20914 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20915 reg_alloc_order [pos++] = i;
20916
20917 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
20918 reg_alloc_order [pos++] = i;
20919
20920 /* Initialize the rest of array as we do not allocate some registers
20921 at all. */
20922 while (pos < FIRST_PSEUDO_REGISTER)
20923 reg_alloc_order [pos++] = 0;
20924 }
20925
20926 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20927 struct attribute_spec.handler. */
20928 static tree
20929 ix86_handle_struct_attribute (tree *node, tree name,
20930 tree args ATTRIBUTE_UNUSED,
20931 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20932 {
20933 tree *type = NULL;
20934 if (DECL_P (*node))
20935 {
20936 if (TREE_CODE (*node) == TYPE_DECL)
20937 type = &TREE_TYPE (*node);
20938 }
20939 else
20940 type = node;
20941
20942 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20943 || TREE_CODE (*type) == UNION_TYPE)))
20944 {
20945 warning (OPT_Wattributes, "%qs attribute ignored",
20946 IDENTIFIER_POINTER (name));
20947 *no_add_attrs = true;
20948 }
20949
20950 else if ((is_attribute_p ("ms_struct", name)
20951 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20952 || ((is_attribute_p ("gcc_struct", name)
20953 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20954 {
20955 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
20956 IDENTIFIER_POINTER (name));
20957 *no_add_attrs = true;
20958 }
20959
20960 return NULL_TREE;
20961 }
20962
20963 static bool
20964 ix86_ms_bitfield_layout_p (tree record_type)
20965 {
20966 return (TARGET_MS_BITFIELD_LAYOUT &&
20967 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20968 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20969 }
20970
20971 /* Returns an expression indicating where the this parameter is
20972 located on entry to the FUNCTION. */
20973
20974 static rtx
20975 x86_this_parameter (tree function)
20976 {
20977 tree type = TREE_TYPE (function);
20978 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
20979
20980 if (TARGET_64BIT)
20981 {
20982 const int *parm_regs;
20983
20984 if (TARGET_64BIT_MS_ABI)
20985 parm_regs = x86_64_ms_abi_int_parameter_registers;
20986 else
20987 parm_regs = x86_64_int_parameter_registers;
20988 return gen_rtx_REG (DImode, parm_regs[aggr]);
20989 }
20990
20991 if (ix86_function_regparm (type, function) > 0
20992 && !type_has_variadic_args_p (type))
20993 {
20994 int regno = 0;
20995 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
20996 regno = 2;
20997 return gen_rtx_REG (SImode, regno);
20998 }
20999
21000 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
21001 }
21002
21003 /* Determine whether x86_output_mi_thunk can succeed. */
21004
21005 static bool
21006 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
21007 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
21008 HOST_WIDE_INT vcall_offset, tree function)
21009 {
21010 /* 64-bit can handle anything. */
21011 if (TARGET_64BIT)
21012 return true;
21013
21014 /* For 32-bit, everything's fine if we have one free register. */
21015 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
21016 return true;
21017
21018 /* Need a free register for vcall_offset. */
21019 if (vcall_offset)
21020 return false;
21021
21022 /* Need a free register for GOT references. */
21023 if (flag_pic && !(*targetm.binds_local_p) (function))
21024 return false;
21025
21026 /* Otherwise ok. */
21027 return true;
21028 }
21029
21030 /* Output the assembler code for a thunk function. THUNK_DECL is the
21031 declaration for the thunk function itself, FUNCTION is the decl for
21032 the target function. DELTA is an immediate constant offset to be
21033 added to THIS. If VCALL_OFFSET is nonzero, the word at
21034 *(*this + vcall_offset) should be added to THIS. */
21035
21036 static void
21037 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
21038 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
21039 HOST_WIDE_INT vcall_offset, tree function)
21040 {
21041 rtx xops[3];
21042 rtx this_param = x86_this_parameter (function);
21043 rtx this_reg, tmp;
21044
21045 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21046 pull it in now and let DELTA benefit. */
21047 if (REG_P (this_param))
21048 this_reg = this_param;
21049 else if (vcall_offset)
21050 {
21051 /* Put the this parameter into %eax. */
21052 xops[0] = this_param;
21053 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
21054 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21055 }
21056 else
21057 this_reg = NULL_RTX;
21058
21059 /* Adjust the this parameter by a fixed constant. */
21060 if (delta)
21061 {
21062 xops[0] = GEN_INT (delta);
21063 xops[1] = this_reg ? this_reg : this_param;
21064 if (TARGET_64BIT)
21065 {
21066 if (!x86_64_general_operand (xops[0], DImode))
21067 {
21068 tmp = gen_rtx_REG (DImode, R10_REG);
21069 xops[1] = tmp;
21070 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
21071 xops[0] = tmp;
21072 xops[1] = this_param;
21073 }
21074 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21075 }
21076 else
21077 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21078 }
21079
21080 /* Adjust the this parameter by a value stored in the vtable. */
21081 if (vcall_offset)
21082 {
21083 if (TARGET_64BIT)
21084 tmp = gen_rtx_REG (DImode, R10_REG);
21085 else
21086 {
21087 int tmp_regno = 2 /* ECX */;
21088 if (lookup_attribute ("fastcall",
21089 TYPE_ATTRIBUTES (TREE_TYPE (function))))
21090 tmp_regno = 0 /* EAX */;
21091 tmp = gen_rtx_REG (SImode, tmp_regno);
21092 }
21093
21094 xops[0] = gen_rtx_MEM (Pmode, this_reg);
21095 xops[1] = tmp;
21096 if (TARGET_64BIT)
21097 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21098 else
21099 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21100
21101 /* Adjust the this parameter. */
21102 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
21103 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
21104 {
21105 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
21106 xops[0] = GEN_INT (vcall_offset);
21107 xops[1] = tmp2;
21108 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21109 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
21110 }
21111 xops[1] = this_reg;
21112 if (TARGET_64BIT)
21113 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21114 else
21115 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21116 }
21117
21118 /* If necessary, drop THIS back to its stack slot. */
21119 if (this_reg && this_reg != this_param)
21120 {
21121 xops[0] = this_reg;
21122 xops[1] = this_param;
21123 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21124 }
21125
21126 xops[0] = XEXP (DECL_RTL (function), 0);
21127 if (TARGET_64BIT)
21128 {
21129 if (!flag_pic || (*targetm.binds_local_p) (function))
21130 output_asm_insn ("jmp\t%P0", xops);
21131 /* All thunks should be in the same object as their target,
21132 and thus binds_local_p should be true. */
21133 else if (TARGET_64BIT_MS_ABI)
21134 gcc_unreachable ();
21135 else
21136 {
21137 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
21138 tmp = gen_rtx_CONST (Pmode, tmp);
21139 tmp = gen_rtx_MEM (QImode, tmp);
21140 xops[0] = tmp;
21141 output_asm_insn ("jmp\t%A0", xops);
21142 }
21143 }
21144 else
21145 {
21146 if (!flag_pic || (*targetm.binds_local_p) (function))
21147 output_asm_insn ("jmp\t%P0", xops);
21148 else
21149 #if TARGET_MACHO
21150 if (TARGET_MACHO)
21151 {
21152 rtx sym_ref = XEXP (DECL_RTL (function), 0);
21153 tmp = (gen_rtx_SYMBOL_REF
21154 (Pmode,
21155 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
21156 tmp = gen_rtx_MEM (QImode, tmp);
21157 xops[0] = tmp;
21158 output_asm_insn ("jmp\t%0", xops);
21159 }
21160 else
21161 #endif /* TARGET_MACHO */
21162 {
21163 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
21164 output_set_got (tmp, NULL_RTX);
21165
21166 xops[1] = tmp;
21167 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
21168 output_asm_insn ("jmp\t{*}%1", xops);
21169 }
21170 }
21171 }
21172
21173 static void
21174 x86_file_start (void)
21175 {
21176 default_file_start ();
21177 #if TARGET_MACHO
21178 darwin_file_start ();
21179 #endif
21180 if (X86_FILE_START_VERSION_DIRECTIVE)
21181 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
21182 if (X86_FILE_START_FLTUSED)
21183 fputs ("\t.global\t__fltused\n", asm_out_file);
21184 if (ix86_asm_dialect == ASM_INTEL)
21185 fputs ("\t.intel_syntax\n", asm_out_file);
21186 }
21187
21188 int
21189 x86_field_alignment (tree field, int computed)
21190 {
21191 enum machine_mode mode;
21192 tree type = TREE_TYPE (field);
21193
21194 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
21195 return computed;
21196 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
21197 ? get_inner_array_type (type) : type);
21198 if (mode == DFmode || mode == DCmode
21199 || GET_MODE_CLASS (mode) == MODE_INT
21200 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
21201 return MIN (32, computed);
21202 return computed;
21203 }
21204
21205 /* Output assembler code to FILE to increment profiler label # LABELNO
21206 for profiling a function entry. */
21207 void
21208 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
21209 {
21210 if (TARGET_64BIT)
21211 {
21212 #ifndef NO_PROFILE_COUNTERS
21213 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
21214 #endif
21215
21216 if (!TARGET_64BIT_MS_ABI && flag_pic)
21217 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
21218 else
21219 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21220 }
21221 else if (flag_pic)
21222 {
21223 #ifndef NO_PROFILE_COUNTERS
21224 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
21225 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
21226 #endif
21227 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
21228 }
21229 else
21230 {
21231 #ifndef NO_PROFILE_COUNTERS
21232 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
21233 PROFILE_COUNT_REGISTER);
21234 #endif
21235 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21236 }
21237 }
21238
21239 /* We don't have exact information about the insn sizes, but we may assume
21240 quite safely that we are informed about all 1 byte insns and memory
21241 address sizes. This is enough to eliminate unnecessary padding in
21242 99% of cases. */
21243
21244 static int
21245 min_insn_size (rtx insn)
21246 {
21247 int l = 0;
21248
21249 if (!INSN_P (insn) || !active_insn_p (insn))
21250 return 0;
21251
21252 /* Discard alignments we've emit and jump instructions. */
21253 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
21254 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
21255 return 0;
21256 if (JUMP_P (insn)
21257 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
21258 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
21259 return 0;
21260
21261 /* Important case - calls are always 5 bytes.
21262 It is common to have many calls in the row. */
21263 if (CALL_P (insn)
21264 && symbolic_reference_mentioned_p (PATTERN (insn))
21265 && !SIBLING_CALL_P (insn))
21266 return 5;
21267 if (get_attr_length (insn) <= 1)
21268 return 1;
21269
21270 /* For normal instructions we may rely on the sizes of addresses
21271 and the presence of symbol to require 4 bytes of encoding.
21272 This is not the case for jumps where references are PC relative. */
21273 if (!JUMP_P (insn))
21274 {
21275 l = get_attr_length_address (insn);
21276 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
21277 l = 4;
21278 }
21279 if (l)
21280 return 1+l;
21281 else
21282 return 2;
21283 }
21284
21285 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
21286 window. */
21287
21288 static void
21289 ix86_avoid_jump_misspredicts (void)
21290 {
21291 rtx insn, start = get_insns ();
21292 int nbytes = 0, njumps = 0;
21293 int isjump = 0;
21294
21295 /* Look for all minimal intervals of instructions containing 4 jumps.
21296 The intervals are bounded by START and INSN. NBYTES is the total
21297 size of instructions in the interval including INSN and not including
21298 START. When the NBYTES is smaller than 16 bytes, it is possible
21299 that the end of START and INSN ends up in the same 16byte page.
21300
21301 The smallest offset in the page INSN can start is the case where START
21302 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
21303 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
21304 */
21305 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21306 {
21307
21308 nbytes += min_insn_size (insn);
21309 if (dump_file)
21310 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
21311 INSN_UID (insn), min_insn_size (insn));
21312 if ((JUMP_P (insn)
21313 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21314 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
21315 || CALL_P (insn))
21316 njumps++;
21317 else
21318 continue;
21319
21320 while (njumps > 3)
21321 {
21322 start = NEXT_INSN (start);
21323 if ((JUMP_P (start)
21324 && GET_CODE (PATTERN (start)) != ADDR_VEC
21325 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
21326 || CALL_P (start))
21327 njumps--, isjump = 1;
21328 else
21329 isjump = 0;
21330 nbytes -= min_insn_size (start);
21331 }
21332 gcc_assert (njumps >= 0);
21333 if (dump_file)
21334 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
21335 INSN_UID (start), INSN_UID (insn), nbytes);
21336
21337 if (njumps == 3 && isjump && nbytes < 16)
21338 {
21339 int padsize = 15 - nbytes + min_insn_size (insn);
21340
21341 if (dump_file)
21342 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
21343 INSN_UID (insn), padsize);
21344 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
21345 }
21346 }
21347 }
21348
21349 /* AMD Athlon works faster
21350 when RET is not destination of conditional jump or directly preceded
21351 by other jump instruction. We avoid the penalty by inserting NOP just
21352 before the RET instructions in such cases. */
21353 static void
21354 ix86_pad_returns (void)
21355 {
21356 edge e;
21357 edge_iterator ei;
21358
21359 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
21360 {
21361 basic_block bb = e->src;
21362 rtx ret = BB_END (bb);
21363 rtx prev;
21364 bool replace = false;
21365
21366 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
21367 || !maybe_hot_bb_p (bb))
21368 continue;
21369 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
21370 if (active_insn_p (prev) || LABEL_P (prev))
21371 break;
21372 if (prev && LABEL_P (prev))
21373 {
21374 edge e;
21375 edge_iterator ei;
21376
21377 FOR_EACH_EDGE (e, ei, bb->preds)
21378 if (EDGE_FREQUENCY (e) && e->src->index >= 0
21379 && !(e->flags & EDGE_FALLTHRU))
21380 replace = true;
21381 }
21382 if (!replace)
21383 {
21384 prev = prev_active_insn (ret);
21385 if (prev
21386 && ((JUMP_P (prev) && any_condjump_p (prev))
21387 || CALL_P (prev)))
21388 replace = true;
21389 /* Empty functions get branch mispredict even when the jump destination
21390 is not visible to us. */
21391 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
21392 replace = true;
21393 }
21394 if (replace)
21395 {
21396 emit_insn_before (gen_return_internal_long (), ret);
21397 delete_insn (ret);
21398 }
21399 }
21400 }
21401
21402 /* Implement machine specific optimizations. We implement padding of returns
21403 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21404 static void
21405 ix86_reorg (void)
21406 {
21407 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
21408 ix86_pad_returns ();
21409 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
21410 ix86_avoid_jump_misspredicts ();
21411 }
21412
21413 /* Return nonzero when QImode register that must be represented via REX prefix
21414 is used. */
21415 bool
21416 x86_extended_QIreg_mentioned_p (rtx insn)
21417 {
21418 int i;
21419 extract_insn_cached (insn);
21420 for (i = 0; i < recog_data.n_operands; i++)
21421 if (REG_P (recog_data.operand[i])
21422 && REGNO (recog_data.operand[i]) >= 4)
21423 return true;
21424 return false;
21425 }
21426
21427 /* Return nonzero when P points to register encoded via REX prefix.
21428 Called via for_each_rtx. */
21429 static int
21430 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
21431 {
21432 unsigned int regno;
21433 if (!REG_P (*p))
21434 return 0;
21435 regno = REGNO (*p);
21436 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
21437 }
21438
21439 /* Return true when INSN mentions register that must be encoded using REX
21440 prefix. */
21441 bool
21442 x86_extended_reg_mentioned_p (rtx insn)
21443 {
21444 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
21445 }
21446
21447 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21448 optabs would emit if we didn't have TFmode patterns. */
21449
21450 void
21451 x86_emit_floatuns (rtx operands[2])
21452 {
21453 rtx neglab, donelab, i0, i1, f0, in, out;
21454 enum machine_mode mode, inmode;
21455
21456 inmode = GET_MODE (operands[1]);
21457 gcc_assert (inmode == SImode || inmode == DImode);
21458
21459 out = operands[0];
21460 in = force_reg (inmode, operands[1]);
21461 mode = GET_MODE (out);
21462 neglab = gen_label_rtx ();
21463 donelab = gen_label_rtx ();
21464 f0 = gen_reg_rtx (mode);
21465
21466 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
21467
21468 expand_float (out, in, 0);
21469
21470 emit_jump_insn (gen_jump (donelab));
21471 emit_barrier ();
21472
21473 emit_label (neglab);
21474
21475 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
21476 1, OPTAB_DIRECT);
21477 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
21478 1, OPTAB_DIRECT);
21479 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
21480
21481 expand_float (f0, i0, 0);
21482
21483 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
21484
21485 emit_label (donelab);
21486 }
21487 \f
21488 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21489 with all elements equal to VAR. Return true if successful. */
21490
21491 static bool
21492 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
21493 rtx target, rtx val)
21494 {
21495 enum machine_mode smode, wsmode, wvmode;
21496 rtx x;
21497
21498 switch (mode)
21499 {
21500 case V2SImode:
21501 case V2SFmode:
21502 if (!mmx_ok)
21503 return false;
21504 /* FALLTHRU */
21505
21506 case V2DFmode:
21507 case V2DImode:
21508 case V4SFmode:
21509 case V4SImode:
21510 val = force_reg (GET_MODE_INNER (mode), val);
21511 x = gen_rtx_VEC_DUPLICATE (mode, val);
21512 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21513 return true;
21514
21515 case V4HImode:
21516 if (!mmx_ok)
21517 return false;
21518 if (TARGET_SSE || TARGET_3DNOW_A)
21519 {
21520 val = gen_lowpart (SImode, val);
21521 x = gen_rtx_TRUNCATE (HImode, val);
21522 x = gen_rtx_VEC_DUPLICATE (mode, x);
21523 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21524 return true;
21525 }
21526 else
21527 {
21528 smode = HImode;
21529 wsmode = SImode;
21530 wvmode = V2SImode;
21531 goto widen;
21532 }
21533
21534 case V8QImode:
21535 if (!mmx_ok)
21536 return false;
21537 smode = QImode;
21538 wsmode = HImode;
21539 wvmode = V4HImode;
21540 goto widen;
21541 case V8HImode:
21542 if (TARGET_SSE2)
21543 {
21544 rtx tmp1, tmp2;
21545 /* Extend HImode to SImode using a paradoxical SUBREG. */
21546 tmp1 = gen_reg_rtx (SImode);
21547 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21548 /* Insert the SImode value as low element of V4SImode vector. */
21549 tmp2 = gen_reg_rtx (V4SImode);
21550 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21551 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21552 CONST0_RTX (V4SImode),
21553 const1_rtx);
21554 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21555 /* Cast the V4SImode vector back to a V8HImode vector. */
21556 tmp1 = gen_reg_rtx (V8HImode);
21557 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
21558 /* Duplicate the low short through the whole low SImode word. */
21559 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
21560 /* Cast the V8HImode vector back to a V4SImode vector. */
21561 tmp2 = gen_reg_rtx (V4SImode);
21562 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21563 /* Replicate the low element of the V4SImode vector. */
21564 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21565 /* Cast the V2SImode back to V8HImode, and store in target. */
21566 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
21567 return true;
21568 }
21569 smode = HImode;
21570 wsmode = SImode;
21571 wvmode = V4SImode;
21572 goto widen;
21573 case V16QImode:
21574 if (TARGET_SSE2)
21575 {
21576 rtx tmp1, tmp2;
21577 /* Extend QImode to SImode using a paradoxical SUBREG. */
21578 tmp1 = gen_reg_rtx (SImode);
21579 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21580 /* Insert the SImode value as low element of V4SImode vector. */
21581 tmp2 = gen_reg_rtx (V4SImode);
21582 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21583 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21584 CONST0_RTX (V4SImode),
21585 const1_rtx);
21586 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21587 /* Cast the V4SImode vector back to a V16QImode vector. */
21588 tmp1 = gen_reg_rtx (V16QImode);
21589 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
21590 /* Duplicate the low byte through the whole low SImode word. */
21591 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21592 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21593 /* Cast the V16QImode vector back to a V4SImode vector. */
21594 tmp2 = gen_reg_rtx (V4SImode);
21595 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21596 /* Replicate the low element of the V4SImode vector. */
21597 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21598 /* Cast the V2SImode back to V16QImode, and store in target. */
21599 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
21600 return true;
21601 }
21602 smode = QImode;
21603 wsmode = HImode;
21604 wvmode = V8HImode;
21605 goto widen;
21606 widen:
21607 /* Replicate the value once into the next wider mode and recurse. */
21608 val = convert_modes (wsmode, smode, val, true);
21609 x = expand_simple_binop (wsmode, ASHIFT, val,
21610 GEN_INT (GET_MODE_BITSIZE (smode)),
21611 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21612 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
21613
21614 x = gen_reg_rtx (wvmode);
21615 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
21616 gcc_unreachable ();
21617 emit_move_insn (target, gen_lowpart (mode, x));
21618 return true;
21619
21620 default:
21621 return false;
21622 }
21623 }
21624
21625 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21626 whose ONE_VAR element is VAR, and other elements are zero. Return true
21627 if successful. */
21628
21629 static bool
21630 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
21631 rtx target, rtx var, int one_var)
21632 {
21633 enum machine_mode vsimode;
21634 rtx new_target;
21635 rtx x, tmp;
21636
21637 switch (mode)
21638 {
21639 case V2SFmode:
21640 case V2SImode:
21641 if (!mmx_ok)
21642 return false;
21643 /* FALLTHRU */
21644
21645 case V2DFmode:
21646 case V2DImode:
21647 if (one_var != 0)
21648 return false;
21649 var = force_reg (GET_MODE_INNER (mode), var);
21650 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
21651 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21652 return true;
21653
21654 case V4SFmode:
21655 case V4SImode:
21656 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
21657 new_target = gen_reg_rtx (mode);
21658 else
21659 new_target = target;
21660 var = force_reg (GET_MODE_INNER (mode), var);
21661 x = gen_rtx_VEC_DUPLICATE (mode, var);
21662 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
21663 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
21664 if (one_var != 0)
21665 {
21666 /* We need to shuffle the value to the correct position, so
21667 create a new pseudo to store the intermediate result. */
21668
21669 /* With SSE2, we can use the integer shuffle insns. */
21670 if (mode != V4SFmode && TARGET_SSE2)
21671 {
21672 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
21673 GEN_INT (1),
21674 GEN_INT (one_var == 1 ? 0 : 1),
21675 GEN_INT (one_var == 2 ? 0 : 1),
21676 GEN_INT (one_var == 3 ? 0 : 1)));
21677 if (target != new_target)
21678 emit_move_insn (target, new_target);
21679 return true;
21680 }
21681
21682 /* Otherwise convert the intermediate result to V4SFmode and
21683 use the SSE1 shuffle instructions. */
21684 if (mode != V4SFmode)
21685 {
21686 tmp = gen_reg_rtx (V4SFmode);
21687 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
21688 }
21689 else
21690 tmp = new_target;
21691
21692 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
21693 GEN_INT (1),
21694 GEN_INT (one_var == 1 ? 0 : 1),
21695 GEN_INT (one_var == 2 ? 0+4 : 1+4),
21696 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
21697
21698 if (mode != V4SFmode)
21699 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
21700 else if (tmp != target)
21701 emit_move_insn (target, tmp);
21702 }
21703 else if (target != new_target)
21704 emit_move_insn (target, new_target);
21705 return true;
21706
21707 case V8HImode:
21708 case V16QImode:
21709 vsimode = V4SImode;
21710 goto widen;
21711 case V4HImode:
21712 case V8QImode:
21713 if (!mmx_ok)
21714 return false;
21715 vsimode = V2SImode;
21716 goto widen;
21717 widen:
21718 if (one_var != 0)
21719 return false;
21720
21721 /* Zero extend the variable element to SImode and recurse. */
21722 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
21723
21724 x = gen_reg_rtx (vsimode);
21725 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
21726 var, one_var))
21727 gcc_unreachable ();
21728
21729 emit_move_insn (target, gen_lowpart (mode, x));
21730 return true;
21731
21732 default:
21733 return false;
21734 }
21735 }
21736
21737 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21738 consisting of the values in VALS. It is known that all elements
21739 except ONE_VAR are constants. Return true if successful. */
21740
21741 static bool
21742 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
21743 rtx target, rtx vals, int one_var)
21744 {
21745 rtx var = XVECEXP (vals, 0, one_var);
21746 enum machine_mode wmode;
21747 rtx const_vec, x;
21748
21749 const_vec = copy_rtx (vals);
21750 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
21751 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
21752
21753 switch (mode)
21754 {
21755 case V2DFmode:
21756 case V2DImode:
21757 case V2SFmode:
21758 case V2SImode:
21759 /* For the two element vectors, it's just as easy to use
21760 the general case. */
21761 return false;
21762
21763 case V4SFmode:
21764 case V4SImode:
21765 case V8HImode:
21766 case V4HImode:
21767 break;
21768
21769 case V16QImode:
21770 wmode = V8HImode;
21771 goto widen;
21772 case V8QImode:
21773 wmode = V4HImode;
21774 goto widen;
21775 widen:
21776 /* There's no way to set one QImode entry easily. Combine
21777 the variable value with its adjacent constant value, and
21778 promote to an HImode set. */
21779 x = XVECEXP (vals, 0, one_var ^ 1);
21780 if (one_var & 1)
21781 {
21782 var = convert_modes (HImode, QImode, var, true);
21783 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
21784 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21785 x = GEN_INT (INTVAL (x) & 0xff);
21786 }
21787 else
21788 {
21789 var = convert_modes (HImode, QImode, var, true);
21790 x = gen_int_mode (INTVAL (x) << 8, HImode);
21791 }
21792 if (x != const0_rtx)
21793 var = expand_simple_binop (HImode, IOR, var, x, var,
21794 1, OPTAB_LIB_WIDEN);
21795
21796 x = gen_reg_rtx (wmode);
21797 emit_move_insn (x, gen_lowpart (wmode, const_vec));
21798 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
21799
21800 emit_move_insn (target, gen_lowpart (mode, x));
21801 return true;
21802
21803 default:
21804 return false;
21805 }
21806
21807 emit_move_insn (target, const_vec);
21808 ix86_expand_vector_set (mmx_ok, target, var, one_var);
21809 return true;
21810 }
21811
21812 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
21813 all values variable, and none identical. */
21814
21815 static void
21816 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
21817 rtx target, rtx vals)
21818 {
21819 enum machine_mode half_mode = GET_MODE_INNER (mode);
21820 rtx op0 = NULL, op1 = NULL;
21821 bool use_vec_concat = false;
21822
21823 switch (mode)
21824 {
21825 case V2SFmode:
21826 case V2SImode:
21827 if (!mmx_ok && !TARGET_SSE)
21828 break;
21829 /* FALLTHRU */
21830
21831 case V2DFmode:
21832 case V2DImode:
21833 /* For the two element vectors, we always implement VEC_CONCAT. */
21834 op0 = XVECEXP (vals, 0, 0);
21835 op1 = XVECEXP (vals, 0, 1);
21836 use_vec_concat = true;
21837 break;
21838
21839 case V4SFmode:
21840 half_mode = V2SFmode;
21841 goto half;
21842 case V4SImode:
21843 half_mode = V2SImode;
21844 goto half;
21845 half:
21846 {
21847 rtvec v;
21848
21849 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
21850 Recurse to load the two halves. */
21851
21852 op0 = gen_reg_rtx (half_mode);
21853 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
21854 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
21855
21856 op1 = gen_reg_rtx (half_mode);
21857 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
21858 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
21859
21860 use_vec_concat = true;
21861 }
21862 break;
21863
21864 case V8HImode:
21865 case V16QImode:
21866 case V4HImode:
21867 case V8QImode:
21868 break;
21869
21870 default:
21871 gcc_unreachable ();
21872 }
21873
21874 if (use_vec_concat)
21875 {
21876 if (!register_operand (op0, half_mode))
21877 op0 = force_reg (half_mode, op0);
21878 if (!register_operand (op1, half_mode))
21879 op1 = force_reg (half_mode, op1);
21880
21881 emit_insn (gen_rtx_SET (VOIDmode, target,
21882 gen_rtx_VEC_CONCAT (mode, op0, op1)));
21883 }
21884 else
21885 {
21886 int i, j, n_elts, n_words, n_elt_per_word;
21887 enum machine_mode inner_mode;
21888 rtx words[4], shift;
21889
21890 inner_mode = GET_MODE_INNER (mode);
21891 n_elts = GET_MODE_NUNITS (mode);
21892 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
21893 n_elt_per_word = n_elts / n_words;
21894 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
21895
21896 for (i = 0; i < n_words; ++i)
21897 {
21898 rtx word = NULL_RTX;
21899
21900 for (j = 0; j < n_elt_per_word; ++j)
21901 {
21902 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
21903 elt = convert_modes (word_mode, inner_mode, elt, true);
21904
21905 if (j == 0)
21906 word = elt;
21907 else
21908 {
21909 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
21910 word, 1, OPTAB_LIB_WIDEN);
21911 word = expand_simple_binop (word_mode, IOR, word, elt,
21912 word, 1, OPTAB_LIB_WIDEN);
21913 }
21914 }
21915
21916 words[i] = word;
21917 }
21918
21919 if (n_words == 1)
21920 emit_move_insn (target, gen_lowpart (mode, words[0]));
21921 else if (n_words == 2)
21922 {
21923 rtx tmp = gen_reg_rtx (mode);
21924 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
21925 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
21926 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
21927 emit_move_insn (target, tmp);
21928 }
21929 else if (n_words == 4)
21930 {
21931 rtx tmp = gen_reg_rtx (V4SImode);
21932 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
21933 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
21934 emit_move_insn (target, gen_lowpart (mode, tmp));
21935 }
21936 else
21937 gcc_unreachable ();
21938 }
21939 }
21940
21941 /* Initialize vector TARGET via VALS. Suppress the use of MMX
21942 instructions unless MMX_OK is true. */
21943
21944 void
21945 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
21946 {
21947 enum machine_mode mode = GET_MODE (target);
21948 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21949 int n_elts = GET_MODE_NUNITS (mode);
21950 int n_var = 0, one_var = -1;
21951 bool all_same = true, all_const_zero = true;
21952 int i;
21953 rtx x;
21954
21955 for (i = 0; i < n_elts; ++i)
21956 {
21957 x = XVECEXP (vals, 0, i);
21958 if (!CONSTANT_P (x))
21959 n_var++, one_var = i;
21960 else if (x != CONST0_RTX (inner_mode))
21961 all_const_zero = false;
21962 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
21963 all_same = false;
21964 }
21965
21966 /* Constants are best loaded from the constant pool. */
21967 if (n_var == 0)
21968 {
21969 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
21970 return;
21971 }
21972
21973 /* If all values are identical, broadcast the value. */
21974 if (all_same
21975 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
21976 XVECEXP (vals, 0, 0)))
21977 return;
21978
21979 /* Values where only one field is non-constant are best loaded from
21980 the pool and overwritten via move later. */
21981 if (n_var == 1)
21982 {
21983 if (all_const_zero
21984 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
21985 XVECEXP (vals, 0, one_var),
21986 one_var))
21987 return;
21988
21989 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
21990 return;
21991 }
21992
21993 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
21994 }
21995
21996 void
21997 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
21998 {
21999 enum machine_mode mode = GET_MODE (target);
22000 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22001 bool use_vec_merge = false;
22002 rtx tmp;
22003
22004 switch (mode)
22005 {
22006 case V2SFmode:
22007 case V2SImode:
22008 if (mmx_ok)
22009 {
22010 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
22011 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
22012 if (elt == 0)
22013 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
22014 else
22015 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
22016 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22017 return;
22018 }
22019 break;
22020
22021 case V2DImode:
22022 use_vec_merge = TARGET_SSE4_1;
22023 if (use_vec_merge)
22024 break;
22025
22026 case V2DFmode:
22027 {
22028 rtx op0, op1;
22029
22030 /* For the two element vectors, we implement a VEC_CONCAT with
22031 the extraction of the other element. */
22032
22033 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
22034 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
22035
22036 if (elt == 0)
22037 op0 = val, op1 = tmp;
22038 else
22039 op0 = tmp, op1 = val;
22040
22041 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
22042 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22043 }
22044 return;
22045
22046 case V4SFmode:
22047 use_vec_merge = TARGET_SSE4_1;
22048 if (use_vec_merge)
22049 break;
22050
22051 switch (elt)
22052 {
22053 case 0:
22054 use_vec_merge = true;
22055 break;
22056
22057 case 1:
22058 /* tmp = target = A B C D */
22059 tmp = copy_to_reg (target);
22060 /* target = A A B B */
22061 emit_insn (gen_sse_unpcklps (target, target, target));
22062 /* target = X A B B */
22063 ix86_expand_vector_set (false, target, val, 0);
22064 /* target = A X C D */
22065 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22066 GEN_INT (1), GEN_INT (0),
22067 GEN_INT (2+4), GEN_INT (3+4)));
22068 return;
22069
22070 case 2:
22071 /* tmp = target = A B C D */
22072 tmp = copy_to_reg (target);
22073 /* tmp = X B C D */
22074 ix86_expand_vector_set (false, tmp, val, 0);
22075 /* target = A B X D */
22076 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22077 GEN_INT (0), GEN_INT (1),
22078 GEN_INT (0+4), GEN_INT (3+4)));
22079 return;
22080
22081 case 3:
22082 /* tmp = target = A B C D */
22083 tmp = copy_to_reg (target);
22084 /* tmp = X B C D */
22085 ix86_expand_vector_set (false, tmp, val, 0);
22086 /* target = A B X D */
22087 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22088 GEN_INT (0), GEN_INT (1),
22089 GEN_INT (2+4), GEN_INT (0+4)));
22090 return;
22091
22092 default:
22093 gcc_unreachable ();
22094 }
22095 break;
22096
22097 case V4SImode:
22098 use_vec_merge = TARGET_SSE4_1;
22099 if (use_vec_merge)
22100 break;
22101
22102 /* Element 0 handled by vec_merge below. */
22103 if (elt == 0)
22104 {
22105 use_vec_merge = true;
22106 break;
22107 }
22108
22109 if (TARGET_SSE2)
22110 {
22111 /* With SSE2, use integer shuffles to swap element 0 and ELT,
22112 store into element 0, then shuffle them back. */
22113
22114 rtx order[4];
22115
22116 order[0] = GEN_INT (elt);
22117 order[1] = const1_rtx;
22118 order[2] = const2_rtx;
22119 order[3] = GEN_INT (3);
22120 order[elt] = const0_rtx;
22121
22122 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22123 order[1], order[2], order[3]));
22124
22125 ix86_expand_vector_set (false, target, val, 0);
22126
22127 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22128 order[1], order[2], order[3]));
22129 }
22130 else
22131 {
22132 /* For SSE1, we have to reuse the V4SF code. */
22133 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
22134 gen_lowpart (SFmode, val), elt);
22135 }
22136 return;
22137
22138 case V8HImode:
22139 use_vec_merge = TARGET_SSE2;
22140 break;
22141 case V4HImode:
22142 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22143 break;
22144
22145 case V16QImode:
22146 use_vec_merge = TARGET_SSE4_1;
22147 break;
22148
22149 case V8QImode:
22150 default:
22151 break;
22152 }
22153
22154 if (use_vec_merge)
22155 {
22156 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
22157 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
22158 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22159 }
22160 else
22161 {
22162 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22163
22164 emit_move_insn (mem, target);
22165
22166 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22167 emit_move_insn (tmp, val);
22168
22169 emit_move_insn (target, mem);
22170 }
22171 }
22172
22173 void
22174 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
22175 {
22176 enum machine_mode mode = GET_MODE (vec);
22177 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22178 bool use_vec_extr = false;
22179 rtx tmp;
22180
22181 switch (mode)
22182 {
22183 case V2SImode:
22184 case V2SFmode:
22185 if (!mmx_ok)
22186 break;
22187 /* FALLTHRU */
22188
22189 case V2DFmode:
22190 case V2DImode:
22191 use_vec_extr = true;
22192 break;
22193
22194 case V4SFmode:
22195 use_vec_extr = TARGET_SSE4_1;
22196 if (use_vec_extr)
22197 break;
22198
22199 switch (elt)
22200 {
22201 case 0:
22202 tmp = vec;
22203 break;
22204
22205 case 1:
22206 case 3:
22207 tmp = gen_reg_rtx (mode);
22208 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
22209 GEN_INT (elt), GEN_INT (elt),
22210 GEN_INT (elt+4), GEN_INT (elt+4)));
22211 break;
22212
22213 case 2:
22214 tmp = gen_reg_rtx (mode);
22215 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
22216 break;
22217
22218 default:
22219 gcc_unreachable ();
22220 }
22221 vec = tmp;
22222 use_vec_extr = true;
22223 elt = 0;
22224 break;
22225
22226 case V4SImode:
22227 use_vec_extr = TARGET_SSE4_1;
22228 if (use_vec_extr)
22229 break;
22230
22231 if (TARGET_SSE2)
22232 {
22233 switch (elt)
22234 {
22235 case 0:
22236 tmp = vec;
22237 break;
22238
22239 case 1:
22240 case 3:
22241 tmp = gen_reg_rtx (mode);
22242 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
22243 GEN_INT (elt), GEN_INT (elt),
22244 GEN_INT (elt), GEN_INT (elt)));
22245 break;
22246
22247 case 2:
22248 tmp = gen_reg_rtx (mode);
22249 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
22250 break;
22251
22252 default:
22253 gcc_unreachable ();
22254 }
22255 vec = tmp;
22256 use_vec_extr = true;
22257 elt = 0;
22258 }
22259 else
22260 {
22261 /* For SSE1, we have to reuse the V4SF code. */
22262 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
22263 gen_lowpart (V4SFmode, vec), elt);
22264 return;
22265 }
22266 break;
22267
22268 case V8HImode:
22269 use_vec_extr = TARGET_SSE2;
22270 break;
22271 case V4HImode:
22272 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22273 break;
22274
22275 case V16QImode:
22276 use_vec_extr = TARGET_SSE4_1;
22277 break;
22278
22279 case V8QImode:
22280 /* ??? Could extract the appropriate HImode element and shift. */
22281 default:
22282 break;
22283 }
22284
22285 if (use_vec_extr)
22286 {
22287 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
22288 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
22289
22290 /* Let the rtl optimizers know about the zero extension performed. */
22291 if (inner_mode == QImode || inner_mode == HImode)
22292 {
22293 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
22294 target = gen_lowpart (SImode, target);
22295 }
22296
22297 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22298 }
22299 else
22300 {
22301 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22302
22303 emit_move_insn (mem, vec);
22304
22305 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22306 emit_move_insn (target, tmp);
22307 }
22308 }
22309
22310 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
22311 pattern to reduce; DEST is the destination; IN is the input vector. */
22312
22313 void
22314 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
22315 {
22316 rtx tmp1, tmp2, tmp3;
22317
22318 tmp1 = gen_reg_rtx (V4SFmode);
22319 tmp2 = gen_reg_rtx (V4SFmode);
22320 tmp3 = gen_reg_rtx (V4SFmode);
22321
22322 emit_insn (gen_sse_movhlps (tmp1, in, in));
22323 emit_insn (fn (tmp2, tmp1, in));
22324
22325 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
22326 GEN_INT (1), GEN_INT (1),
22327 GEN_INT (1+4), GEN_INT (1+4)));
22328 emit_insn (fn (dest, tmp2, tmp3));
22329 }
22330 \f
22331 /* Target hook for scalar_mode_supported_p. */
22332 static bool
22333 ix86_scalar_mode_supported_p (enum machine_mode mode)
22334 {
22335 if (DECIMAL_FLOAT_MODE_P (mode))
22336 return true;
22337 else if (mode == TFmode)
22338 return TARGET_64BIT;
22339 else
22340 return default_scalar_mode_supported_p (mode);
22341 }
22342
22343 /* Implements target hook vector_mode_supported_p. */
22344 static bool
22345 ix86_vector_mode_supported_p (enum machine_mode mode)
22346 {
22347 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
22348 return true;
22349 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
22350 return true;
22351 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
22352 return true;
22353 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
22354 return true;
22355 return false;
22356 }
22357
22358 /* Worker function for TARGET_MD_ASM_CLOBBERS.
22359
22360 We do this in the new i386 backend to maintain source compatibility
22361 with the old cc0-based compiler. */
22362
22363 static tree
22364 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
22365 tree inputs ATTRIBUTE_UNUSED,
22366 tree clobbers)
22367 {
22368 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
22369 clobbers);
22370 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
22371 clobbers);
22372 return clobbers;
22373 }
22374
22375 /* Implements target vector targetm.asm.encode_section_info. This
22376 is not used by netware. */
22377
22378 static void ATTRIBUTE_UNUSED
22379 ix86_encode_section_info (tree decl, rtx rtl, int first)
22380 {
22381 default_encode_section_info (decl, rtl, first);
22382
22383 if (TREE_CODE (decl) == VAR_DECL
22384 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
22385 && ix86_in_large_data_p (decl))
22386 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
22387 }
22388
22389 /* Worker function for REVERSE_CONDITION. */
22390
22391 enum rtx_code
22392 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
22393 {
22394 return (mode != CCFPmode && mode != CCFPUmode
22395 ? reverse_condition (code)
22396 : reverse_condition_maybe_unordered (code));
22397 }
22398
22399 /* Output code to perform an x87 FP register move, from OPERANDS[1]
22400 to OPERANDS[0]. */
22401
22402 const char *
22403 output_387_reg_move (rtx insn, rtx *operands)
22404 {
22405 if (REG_P (operands[0]))
22406 {
22407 if (REG_P (operands[1])
22408 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22409 {
22410 if (REGNO (operands[0]) == FIRST_STACK_REG)
22411 return output_387_ffreep (operands, 0);
22412 return "fstp\t%y0";
22413 }
22414 if (STACK_TOP_P (operands[0]))
22415 return "fld%z1\t%y1";
22416 return "fst\t%y0";
22417 }
22418 else if (MEM_P (operands[0]))
22419 {
22420 gcc_assert (REG_P (operands[1]));
22421 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22422 return "fstp%z0\t%y0";
22423 else
22424 {
22425 /* There is no non-popping store to memory for XFmode.
22426 So if we need one, follow the store with a load. */
22427 if (GET_MODE (operands[0]) == XFmode)
22428 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
22429 else
22430 return "fst%z0\t%y0";
22431 }
22432 }
22433 else
22434 gcc_unreachable();
22435 }
22436
22437 /* Output code to perform a conditional jump to LABEL, if C2 flag in
22438 FP status register is set. */
22439
22440 void
22441 ix86_emit_fp_unordered_jump (rtx label)
22442 {
22443 rtx reg = gen_reg_rtx (HImode);
22444 rtx temp;
22445
22446 emit_insn (gen_x86_fnstsw_1 (reg));
22447
22448 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
22449 {
22450 emit_insn (gen_x86_sahf_1 (reg));
22451
22452 temp = gen_rtx_REG (CCmode, FLAGS_REG);
22453 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
22454 }
22455 else
22456 {
22457 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
22458
22459 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22460 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
22461 }
22462
22463 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
22464 gen_rtx_LABEL_REF (VOIDmode, label),
22465 pc_rtx);
22466 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
22467
22468 emit_jump_insn (temp);
22469 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22470 }
22471
22472 /* Output code to perform a log1p XFmode calculation. */
22473
22474 void ix86_emit_i387_log1p (rtx op0, rtx op1)
22475 {
22476 rtx label1 = gen_label_rtx ();
22477 rtx label2 = gen_label_rtx ();
22478
22479 rtx tmp = gen_reg_rtx (XFmode);
22480 rtx tmp2 = gen_reg_rtx (XFmode);
22481
22482 emit_insn (gen_absxf2 (tmp, op1));
22483 emit_insn (gen_cmpxf (tmp,
22484 CONST_DOUBLE_FROM_REAL_VALUE (
22485 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
22486 XFmode)));
22487 emit_jump_insn (gen_bge (label1));
22488
22489 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22490 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
22491 emit_jump (label2);
22492
22493 emit_label (label1);
22494 emit_move_insn (tmp, CONST1_RTX (XFmode));
22495 emit_insn (gen_addxf3 (tmp, op1, tmp));
22496 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22497 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
22498
22499 emit_label (label2);
22500 }
22501
22502 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
22503
22504 static void ATTRIBUTE_UNUSED
22505 i386_solaris_elf_named_section (const char *name, unsigned int flags,
22506 tree decl)
22507 {
22508 /* With Binutils 2.15, the "@unwind" marker must be specified on
22509 every occurrence of the ".eh_frame" section, not just the first
22510 one. */
22511 if (TARGET_64BIT
22512 && strcmp (name, ".eh_frame") == 0)
22513 {
22514 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
22515 flags & SECTION_WRITE ? "aw" : "a");
22516 return;
22517 }
22518 default_elf_asm_named_section (name, flags, decl);
22519 }
22520
22521 /* Return the mangling of TYPE if it is an extended fundamental type. */
22522
22523 static const char *
22524 ix86_mangle_fundamental_type (tree type)
22525 {
22526 switch (TYPE_MODE (type))
22527 {
22528 case TFmode:
22529 /* __float128 is "g". */
22530 return "g";
22531 case XFmode:
22532 /* "long double" or __float80 is "e". */
22533 return "e";
22534 default:
22535 return NULL;
22536 }
22537 }
22538
22539 /* For 32-bit code we can save PIC register setup by using
22540 __stack_chk_fail_local hidden function instead of calling
22541 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
22542 register, so it is better to call __stack_chk_fail directly. */
22543
22544 static tree
22545 ix86_stack_protect_fail (void)
22546 {
22547 return TARGET_64BIT
22548 ? default_external_stack_protect_fail ()
22549 : default_hidden_stack_protect_fail ();
22550 }
22551
22552 /* Select a format to encode pointers in exception handling data. CODE
22553 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
22554 true if the symbol may be affected by dynamic relocations.
22555
22556 ??? All x86 object file formats are capable of representing this.
22557 After all, the relocation needed is the same as for the call insn.
22558 Whether or not a particular assembler allows us to enter such, I
22559 guess we'll have to see. */
22560 int
22561 asm_preferred_eh_data_format (int code, int global)
22562 {
22563 if (flag_pic)
22564 {
22565 int type = DW_EH_PE_sdata8;
22566 if (!TARGET_64BIT
22567 || ix86_cmodel == CM_SMALL_PIC
22568 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
22569 type = DW_EH_PE_sdata4;
22570 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
22571 }
22572 if (ix86_cmodel == CM_SMALL
22573 || (ix86_cmodel == CM_MEDIUM && code))
22574 return DW_EH_PE_udata4;
22575 return DW_EH_PE_absptr;
22576 }
22577 \f
22578 /* Expand copysign from SIGN to the positive value ABS_VALUE
22579 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
22580 the sign-bit. */
22581 static void
22582 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
22583 {
22584 enum machine_mode mode = GET_MODE (sign);
22585 rtx sgn = gen_reg_rtx (mode);
22586 if (mask == NULL_RTX)
22587 {
22588 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
22589 if (!VECTOR_MODE_P (mode))
22590 {
22591 /* We need to generate a scalar mode mask in this case. */
22592 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22593 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22594 mask = gen_reg_rtx (mode);
22595 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22596 }
22597 }
22598 else
22599 mask = gen_rtx_NOT (mode, mask);
22600 emit_insn (gen_rtx_SET (VOIDmode, sgn,
22601 gen_rtx_AND (mode, mask, sign)));
22602 emit_insn (gen_rtx_SET (VOIDmode, result,
22603 gen_rtx_IOR (mode, abs_value, sgn)));
22604 }
22605
22606 /* Expand fabs (OP0) and return a new rtx that holds the result. The
22607 mask for masking out the sign-bit is stored in *SMASK, if that is
22608 non-null. */
22609 static rtx
22610 ix86_expand_sse_fabs (rtx op0, rtx *smask)
22611 {
22612 enum machine_mode mode = GET_MODE (op0);
22613 rtx xa, mask;
22614
22615 xa = gen_reg_rtx (mode);
22616 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
22617 if (!VECTOR_MODE_P (mode))
22618 {
22619 /* We need to generate a scalar mode mask in this case. */
22620 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22621 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22622 mask = gen_reg_rtx (mode);
22623 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22624 }
22625 emit_insn (gen_rtx_SET (VOIDmode, xa,
22626 gen_rtx_AND (mode, op0, mask)));
22627
22628 if (smask)
22629 *smask = mask;
22630
22631 return xa;
22632 }
22633
22634 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
22635 swapping the operands if SWAP_OPERANDS is true. The expanded
22636 code is a forward jump to a newly created label in case the
22637 comparison is true. The generated label rtx is returned. */
22638 static rtx
22639 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
22640 bool swap_operands)
22641 {
22642 rtx label, tmp;
22643
22644 if (swap_operands)
22645 {
22646 tmp = op0;
22647 op0 = op1;
22648 op1 = tmp;
22649 }
22650
22651 label = gen_label_rtx ();
22652 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
22653 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22654 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
22655 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
22656 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22657 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
22658 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22659 JUMP_LABEL (tmp) = label;
22660
22661 return label;
22662 }
22663
22664 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
22665 using comparison code CODE. Operands are swapped for the comparison if
22666 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
22667 static rtx
22668 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
22669 bool swap_operands)
22670 {
22671 enum machine_mode mode = GET_MODE (op0);
22672 rtx mask = gen_reg_rtx (mode);
22673
22674 if (swap_operands)
22675 {
22676 rtx tmp = op0;
22677 op0 = op1;
22678 op1 = tmp;
22679 }
22680
22681 if (mode == DFmode)
22682 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
22683 gen_rtx_fmt_ee (code, mode, op0, op1)));
22684 else
22685 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
22686 gen_rtx_fmt_ee (code, mode, op0, op1)));
22687
22688 return mask;
22689 }
22690
22691 /* Generate and return a rtx of mode MODE for 2**n where n is the number
22692 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
22693 static rtx
22694 ix86_gen_TWO52 (enum machine_mode mode)
22695 {
22696 REAL_VALUE_TYPE TWO52r;
22697 rtx TWO52;
22698
22699 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
22700 TWO52 = const_double_from_real_value (TWO52r, mode);
22701 TWO52 = force_reg (mode, TWO52);
22702
22703 return TWO52;
22704 }
22705
22706 /* Expand SSE sequence for computing lround from OP1 storing
22707 into OP0. */
22708 void
22709 ix86_expand_lround (rtx op0, rtx op1)
22710 {
22711 /* C code for the stuff we're doing below:
22712 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
22713 return (long)tmp;
22714 */
22715 enum machine_mode mode = GET_MODE (op1);
22716 const struct real_format *fmt;
22717 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22718 rtx adj;
22719
22720 /* load nextafter (0.5, 0.0) */
22721 fmt = REAL_MODE_FORMAT (mode);
22722 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
22723 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
22724
22725 /* adj = copysign (0.5, op1) */
22726 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
22727 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
22728
22729 /* adj = op1 + adj */
22730 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
22731
22732 /* op0 = (imode)adj */
22733 expand_fix (op0, adj, 0);
22734 }
22735
22736 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
22737 into OPERAND0. */
22738 void
22739 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
22740 {
22741 /* C code for the stuff we're doing below (for do_floor):
22742 xi = (long)op1;
22743 xi -= (double)xi > op1 ? 1 : 0;
22744 return xi;
22745 */
22746 enum machine_mode fmode = GET_MODE (op1);
22747 enum machine_mode imode = GET_MODE (op0);
22748 rtx ireg, freg, label, tmp;
22749
22750 /* reg = (long)op1 */
22751 ireg = gen_reg_rtx (imode);
22752 expand_fix (ireg, op1, 0);
22753
22754 /* freg = (double)reg */
22755 freg = gen_reg_rtx (fmode);
22756 expand_float (freg, ireg, 0);
22757
22758 /* ireg = (freg > op1) ? ireg - 1 : ireg */
22759 label = ix86_expand_sse_compare_and_jump (UNLE,
22760 freg, op1, !do_floor);
22761 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
22762 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
22763 emit_move_insn (ireg, tmp);
22764
22765 emit_label (label);
22766 LABEL_NUSES (label) = 1;
22767
22768 emit_move_insn (op0, ireg);
22769 }
22770
22771 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
22772 result in OPERAND0. */
22773 void
22774 ix86_expand_rint (rtx operand0, rtx operand1)
22775 {
22776 /* C code for the stuff we're doing below:
22777 xa = fabs (operand1);
22778 if (!isless (xa, 2**52))
22779 return operand1;
22780 xa = xa + 2**52 - 2**52;
22781 return copysign (xa, operand1);
22782 */
22783 enum machine_mode mode = GET_MODE (operand0);
22784 rtx res, xa, label, TWO52, mask;
22785
22786 res = gen_reg_rtx (mode);
22787 emit_move_insn (res, operand1);
22788
22789 /* xa = abs (operand1) */
22790 xa = ix86_expand_sse_fabs (res, &mask);
22791
22792 /* if (!isless (xa, TWO52)) goto label; */
22793 TWO52 = ix86_gen_TWO52 (mode);
22794 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22795
22796 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22797 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22798
22799 ix86_sse_copysign_to_positive (res, xa, res, mask);
22800
22801 emit_label (label);
22802 LABEL_NUSES (label) = 1;
22803
22804 emit_move_insn (operand0, res);
22805 }
22806
22807 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22808 into OPERAND0. */
22809 void
22810 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
22811 {
22812 /* C code for the stuff we expand below.
22813 double xa = fabs (x), x2;
22814 if (!isless (xa, TWO52))
22815 return x;
22816 xa = xa + TWO52 - TWO52;
22817 x2 = copysign (xa, x);
22818 Compensate. Floor:
22819 if (x2 > x)
22820 x2 -= 1;
22821 Compensate. Ceil:
22822 if (x2 < x)
22823 x2 -= -1;
22824 return x2;
22825 */
22826 enum machine_mode mode = GET_MODE (operand0);
22827 rtx xa, TWO52, tmp, label, one, res, mask;
22828
22829 TWO52 = ix86_gen_TWO52 (mode);
22830
22831 /* Temporary for holding the result, initialized to the input
22832 operand to ease control flow. */
22833 res = gen_reg_rtx (mode);
22834 emit_move_insn (res, operand1);
22835
22836 /* xa = abs (operand1) */
22837 xa = ix86_expand_sse_fabs (res, &mask);
22838
22839 /* if (!isless (xa, TWO52)) goto label; */
22840 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22841
22842 /* xa = xa + TWO52 - TWO52; */
22843 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22844 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22845
22846 /* xa = copysign (xa, operand1) */
22847 ix86_sse_copysign_to_positive (xa, xa, res, mask);
22848
22849 /* generate 1.0 or -1.0 */
22850 one = force_reg (mode,
22851 const_double_from_real_value (do_floor
22852 ? dconst1 : dconstm1, mode));
22853
22854 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22855 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
22856 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22857 gen_rtx_AND (mode, one, tmp)));
22858 /* We always need to subtract here to preserve signed zero. */
22859 tmp = expand_simple_binop (mode, MINUS,
22860 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22861 emit_move_insn (res, tmp);
22862
22863 emit_label (label);
22864 LABEL_NUSES (label) = 1;
22865
22866 emit_move_insn (operand0, res);
22867 }
22868
22869 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22870 into OPERAND0. */
22871 void
22872 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
22873 {
22874 /* C code for the stuff we expand below.
22875 double xa = fabs (x), x2;
22876 if (!isless (xa, TWO52))
22877 return x;
22878 x2 = (double)(long)x;
22879 Compensate. Floor:
22880 if (x2 > x)
22881 x2 -= 1;
22882 Compensate. Ceil:
22883 if (x2 < x)
22884 x2 += 1;
22885 if (HONOR_SIGNED_ZEROS (mode))
22886 return copysign (x2, x);
22887 return x2;
22888 */
22889 enum machine_mode mode = GET_MODE (operand0);
22890 rtx xa, xi, TWO52, tmp, label, one, res, mask;
22891
22892 TWO52 = ix86_gen_TWO52 (mode);
22893
22894 /* Temporary for holding the result, initialized to the input
22895 operand to ease control flow. */
22896 res = gen_reg_rtx (mode);
22897 emit_move_insn (res, operand1);
22898
22899 /* xa = abs (operand1) */
22900 xa = ix86_expand_sse_fabs (res, &mask);
22901
22902 /* if (!isless (xa, TWO52)) goto label; */
22903 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22904
22905 /* xa = (double)(long)x */
22906 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22907 expand_fix (xi, res, 0);
22908 expand_float (xa, xi, 0);
22909
22910 /* generate 1.0 */
22911 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
22912
22913 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22914 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
22915 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22916 gen_rtx_AND (mode, one, tmp)));
22917 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
22918 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22919 emit_move_insn (res, tmp);
22920
22921 if (HONOR_SIGNED_ZEROS (mode))
22922 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
22923
22924 emit_label (label);
22925 LABEL_NUSES (label) = 1;
22926
22927 emit_move_insn (operand0, res);
22928 }
22929
22930 /* Expand SSE sequence for computing round from OPERAND1 storing
22931 into OPERAND0. Sequence that works without relying on DImode truncation
22932 via cvttsd2siq that is only available on 64bit targets. */
22933 void
22934 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
22935 {
22936 /* C code for the stuff we expand below.
22937 double xa = fabs (x), xa2, x2;
22938 if (!isless (xa, TWO52))
22939 return x;
22940 Using the absolute value and copying back sign makes
22941 -0.0 -> -0.0 correct.
22942 xa2 = xa + TWO52 - TWO52;
22943 Compensate.
22944 dxa = xa2 - xa;
22945 if (dxa <= -0.5)
22946 xa2 += 1;
22947 else if (dxa > 0.5)
22948 xa2 -= 1;
22949 x2 = copysign (xa2, x);
22950 return x2;
22951 */
22952 enum machine_mode mode = GET_MODE (operand0);
22953 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
22954
22955 TWO52 = ix86_gen_TWO52 (mode);
22956
22957 /* Temporary for holding the result, initialized to the input
22958 operand to ease control flow. */
22959 res = gen_reg_rtx (mode);
22960 emit_move_insn (res, operand1);
22961
22962 /* xa = abs (operand1) */
22963 xa = ix86_expand_sse_fabs (res, &mask);
22964
22965 /* if (!isless (xa, TWO52)) goto label; */
22966 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22967
22968 /* xa2 = xa + TWO52 - TWO52; */
22969 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22970 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
22971
22972 /* dxa = xa2 - xa; */
22973 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
22974
22975 /* generate 0.5, 1.0 and -0.5 */
22976 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
22977 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
22978 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
22979 0, OPTAB_DIRECT);
22980
22981 /* Compensate. */
22982 tmp = gen_reg_rtx (mode);
22983 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
22984 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
22985 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22986 gen_rtx_AND (mode, one, tmp)));
22987 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22988 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
22989 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
22990 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22991 gen_rtx_AND (mode, one, tmp)));
22992 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22993
22994 /* res = copysign (xa2, operand1) */
22995 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
22996
22997 emit_label (label);
22998 LABEL_NUSES (label) = 1;
22999
23000 emit_move_insn (operand0, res);
23001 }
23002
23003 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23004 into OPERAND0. */
23005 void
23006 ix86_expand_trunc (rtx operand0, rtx operand1)
23007 {
23008 /* C code for SSE variant we expand below.
23009 double xa = fabs (x), x2;
23010 if (!isless (xa, TWO52))
23011 return x;
23012 x2 = (double)(long)x;
23013 if (HONOR_SIGNED_ZEROS (mode))
23014 return copysign (x2, x);
23015 return x2;
23016 */
23017 enum machine_mode mode = GET_MODE (operand0);
23018 rtx xa, xi, TWO52, label, res, mask;
23019
23020 TWO52 = ix86_gen_TWO52 (mode);
23021
23022 /* Temporary for holding the result, initialized to the input
23023 operand to ease control flow. */
23024 res = gen_reg_rtx (mode);
23025 emit_move_insn (res, operand1);
23026
23027 /* xa = abs (operand1) */
23028 xa = ix86_expand_sse_fabs (res, &mask);
23029
23030 /* if (!isless (xa, TWO52)) goto label; */
23031 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23032
23033 /* x = (double)(long)x */
23034 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23035 expand_fix (xi, res, 0);
23036 expand_float (res, xi, 0);
23037
23038 if (HONOR_SIGNED_ZEROS (mode))
23039 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23040
23041 emit_label (label);
23042 LABEL_NUSES (label) = 1;
23043
23044 emit_move_insn (operand0, res);
23045 }
23046
23047 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23048 into OPERAND0. */
23049 void
23050 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
23051 {
23052 enum machine_mode mode = GET_MODE (operand0);
23053 rtx xa, mask, TWO52, label, one, res, smask, tmp;
23054
23055 /* C code for SSE variant we expand below.
23056 double xa = fabs (x), x2;
23057 if (!isless (xa, TWO52))
23058 return x;
23059 xa2 = xa + TWO52 - TWO52;
23060 Compensate:
23061 if (xa2 > xa)
23062 xa2 -= 1.0;
23063 x2 = copysign (xa2, x);
23064 return x2;
23065 */
23066
23067 TWO52 = ix86_gen_TWO52 (mode);
23068
23069 /* Temporary for holding the result, initialized to the input
23070 operand to ease control flow. */
23071 res = gen_reg_rtx (mode);
23072 emit_move_insn (res, operand1);
23073
23074 /* xa = abs (operand1) */
23075 xa = ix86_expand_sse_fabs (res, &smask);
23076
23077 /* if (!isless (xa, TWO52)) goto label; */
23078 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23079
23080 /* res = xa + TWO52 - TWO52; */
23081 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23082 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
23083 emit_move_insn (res, tmp);
23084
23085 /* generate 1.0 */
23086 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23087
23088 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
23089 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
23090 emit_insn (gen_rtx_SET (VOIDmode, mask,
23091 gen_rtx_AND (mode, mask, one)));
23092 tmp = expand_simple_binop (mode, MINUS,
23093 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
23094 emit_move_insn (res, tmp);
23095
23096 /* res = copysign (res, operand1) */
23097 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
23098
23099 emit_label (label);
23100 LABEL_NUSES (label) = 1;
23101
23102 emit_move_insn (operand0, res);
23103 }
23104
23105 /* Expand SSE sequence for computing round from OPERAND1 storing
23106 into OPERAND0. */
23107 void
23108 ix86_expand_round (rtx operand0, rtx operand1)
23109 {
23110 /* C code for the stuff we're doing below:
23111 double xa = fabs (x);
23112 if (!isless (xa, TWO52))
23113 return x;
23114 xa = (double)(long)(xa + nextafter (0.5, 0.0));
23115 return copysign (xa, x);
23116 */
23117 enum machine_mode mode = GET_MODE (operand0);
23118 rtx res, TWO52, xa, label, xi, half, mask;
23119 const struct real_format *fmt;
23120 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
23121
23122 /* Temporary for holding the result, initialized to the input
23123 operand to ease control flow. */
23124 res = gen_reg_rtx (mode);
23125 emit_move_insn (res, operand1);
23126
23127 TWO52 = ix86_gen_TWO52 (mode);
23128 xa = ix86_expand_sse_fabs (res, &mask);
23129 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23130
23131 /* load nextafter (0.5, 0.0) */
23132 fmt = REAL_MODE_FORMAT (mode);
23133 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
23134 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
23135
23136 /* xa = xa + 0.5 */
23137 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
23138 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
23139
23140 /* xa = (double)(int64_t)xa */
23141 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23142 expand_fix (xi, xa, 0);
23143 expand_float (xa, xi, 0);
23144
23145 /* res = copysign (xa, operand1) */
23146 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
23147
23148 emit_label (label);
23149 LABEL_NUSES (label) = 1;
23150
23151 emit_move_insn (operand0, res);
23152 }
23153
23154 \f
23155 /* Table of valid machine attributes. */
23156 static const struct attribute_spec ix86_attribute_table[] =
23157 {
23158 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
23159 /* Stdcall attribute says callee is responsible for popping arguments
23160 if they are not variable. */
23161 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23162 /* Fastcall attribute says callee is responsible for popping arguments
23163 if they are not variable. */
23164 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23165 /* Cdecl attribute says the callee is a normal C declaration */
23166 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23167 /* Regparm attribute specifies how many integer arguments are to be
23168 passed in registers. */
23169 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
23170 /* Sseregparm attribute says we are using x86_64 calling conventions
23171 for FP arguments. */
23172 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23173 /* force_align_arg_pointer says this function realigns the stack at entry. */
23174 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
23175 false, true, true, ix86_handle_cconv_attribute },
23176 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23177 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
23178 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
23179 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
23180 #endif
23181 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23182 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23183 #ifdef SUBTARGET_ATTRIBUTE_TABLE
23184 SUBTARGET_ATTRIBUTE_TABLE,
23185 #endif
23186 { NULL, 0, 0, false, false, false, NULL }
23187 };
23188
23189 /* Initialize the GCC target structure. */
23190 #undef TARGET_ATTRIBUTE_TABLE
23191 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23192 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23193 # undef TARGET_MERGE_DECL_ATTRIBUTES
23194 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23195 #endif
23196
23197 #undef TARGET_COMP_TYPE_ATTRIBUTES
23198 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23199
23200 #undef TARGET_INIT_BUILTINS
23201 #define TARGET_INIT_BUILTINS ix86_init_builtins
23202 #undef TARGET_EXPAND_BUILTIN
23203 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23204
23205 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23206 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
23207 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
23208 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
23209
23210 #undef TARGET_ASM_FUNCTION_EPILOGUE
23211 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23212
23213 #undef TARGET_ENCODE_SECTION_INFO
23214 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23215 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23216 #else
23217 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23218 #endif
23219
23220 #undef TARGET_ASM_OPEN_PAREN
23221 #define TARGET_ASM_OPEN_PAREN ""
23222 #undef TARGET_ASM_CLOSE_PAREN
23223 #define TARGET_ASM_CLOSE_PAREN ""
23224
23225 #undef TARGET_ASM_ALIGNED_HI_OP
23226 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23227 #undef TARGET_ASM_ALIGNED_SI_OP
23228 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23229 #ifdef ASM_QUAD
23230 #undef TARGET_ASM_ALIGNED_DI_OP
23231 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23232 #endif
23233
23234 #undef TARGET_ASM_UNALIGNED_HI_OP
23235 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23236 #undef TARGET_ASM_UNALIGNED_SI_OP
23237 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23238 #undef TARGET_ASM_UNALIGNED_DI_OP
23239 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23240
23241 #undef TARGET_SCHED_ADJUST_COST
23242 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23243 #undef TARGET_SCHED_ISSUE_RATE
23244 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23245 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23246 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23247 ia32_multipass_dfa_lookahead
23248
23249 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23250 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23251
23252 #ifdef HAVE_AS_TLS
23253 #undef TARGET_HAVE_TLS
23254 #define TARGET_HAVE_TLS true
23255 #endif
23256 #undef TARGET_CANNOT_FORCE_CONST_MEM
23257 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23258 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23259 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
23260
23261 #undef TARGET_DELEGITIMIZE_ADDRESS
23262 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23263
23264 #undef TARGET_MS_BITFIELD_LAYOUT_P
23265 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23266
23267 #if TARGET_MACHO
23268 #undef TARGET_BINDS_LOCAL_P
23269 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23270 #endif
23271 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23272 #undef TARGET_BINDS_LOCAL_P
23273 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23274 #endif
23275
23276 #undef TARGET_ASM_OUTPUT_MI_THUNK
23277 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23278 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23279 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23280
23281 #undef TARGET_ASM_FILE_START
23282 #define TARGET_ASM_FILE_START x86_file_start
23283
23284 #undef TARGET_DEFAULT_TARGET_FLAGS
23285 #define TARGET_DEFAULT_TARGET_FLAGS \
23286 (TARGET_DEFAULT \
23287 | TARGET_SUBTARGET_DEFAULT \
23288 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
23289
23290 #undef TARGET_HANDLE_OPTION
23291 #define TARGET_HANDLE_OPTION ix86_handle_option
23292
23293 #undef TARGET_RTX_COSTS
23294 #define TARGET_RTX_COSTS ix86_rtx_costs
23295 #undef TARGET_ADDRESS_COST
23296 #define TARGET_ADDRESS_COST ix86_address_cost
23297
23298 #undef TARGET_FIXED_CONDITION_CODE_REGS
23299 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23300 #undef TARGET_CC_MODES_COMPATIBLE
23301 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23302
23303 #undef TARGET_MACHINE_DEPENDENT_REORG
23304 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23305
23306 #undef TARGET_BUILD_BUILTIN_VA_LIST
23307 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23308
23309 #undef TARGET_MD_ASM_CLOBBERS
23310 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
23311
23312 #undef TARGET_PROMOTE_PROTOTYPES
23313 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
23314 #undef TARGET_STRUCT_VALUE_RTX
23315 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
23316 #undef TARGET_SETUP_INCOMING_VARARGS
23317 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23318 #undef TARGET_MUST_PASS_IN_STACK
23319 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23320 #undef TARGET_PASS_BY_REFERENCE
23321 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23322 #undef TARGET_INTERNAL_ARG_POINTER
23323 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23324 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
23325 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
23326 #undef TARGET_STRICT_ARGUMENT_NAMING
23327 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23328
23329 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23330 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23331
23332 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23333 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23334
23335 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23336 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23337
23338 #ifdef HAVE_AS_TLS
23339 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23340 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23341 #endif
23342
23343 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23344 #undef TARGET_INSERT_ATTRIBUTES
23345 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23346 #endif
23347
23348 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
23349 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
23350
23351 #undef TARGET_STACK_PROTECT_FAIL
23352 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23353
23354 #undef TARGET_FUNCTION_VALUE
23355 #define TARGET_FUNCTION_VALUE ix86_function_value
23356
23357 struct gcc_target targetm = TARGET_INITIALIZER;
23358 \f
23359 #include "gt-i386.h"