]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
configure.ac (gcc_cv_as_ix86_rep_lock_prefix): Also check for "lock addl".
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "diagnostic-core.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "gimple.h"
51 #include "dwarf2.h"
52 #include "df.h"
53 #include "tm-constrs.h"
54 #include "params.h"
55 #include "cselib.h"
56 #include "debug.h"
57 #include "dwarf2out.h"
58
59 static rtx legitimize_dllimport_symbol (rtx, bool);
60
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
63 #endif
64
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
71 : 4)
72
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
76
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
78
79 const
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
98 0, /* "large" insn */
99 2, /* MOVE_RATIO */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
125 2, /* Branch cost */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
147 };
148
149 /* Processor costs (relative to an add) */
150 static const
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
170 3, /* MOVE_RATIO */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
196 1, /* Branch cost */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
218 };
219
220 static const
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
240 3, /* MOVE_RATIO */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
268 1, /* Branch cost */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
290 };
291
292 static const
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
312 6, /* MOVE_RATIO */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
338 2, /* Branch cost */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
360 };
361
362 static const
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
382 6, /* MOVE_RATIO */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
408 2, /* Branch cost */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
437 };
438
439 static const
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
459 4, /* MOVE_RATIO */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
470
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
486 1, /* Branch cost */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
508 };
509
510 static const
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
530 4, /* MOVE_RATIO */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
559 1, /* Branch cost */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
581 };
582
583 static const
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
603 9, /* MOVE_RATIO */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
629 5, /* Branch cost */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
654 };
655
656 static const
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
676 9, /* MOVE_RATIO */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
705 time). */
706 100, /* number of parallel prefetches */
707 3, /* Branch cost */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
733 };
734
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
754 9, /* MOVE_RATIO */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
776 /* On K8
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
779 On AMDFAM10
780 MOVD reg64, xmmreg Double FADD 3
781 1/1 1/1
782 MOVD reg32, xmmreg Double FADD 3
783 1/1 1/1 */
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
791 time). */
792 100, /* number of parallel prefetches */
793 2, /* Branch cost */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
820 };
821
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
841 9, /* MOVE_RATIO */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
863 /* On K8
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
866 On AMDFAM10
867 MOVD reg64, xmmreg Double FADD 3
868 1/1 1/1
869 MOVD reg32, xmmreg Double FADD 3
870 1/1 1/1 */
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
878 time). */
879 100, /* number of parallel prefetches */
880 2, /* Branch cost */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
887
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall can
890 do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
907 };
908
909 static const
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
929 6, /* MOVE_RATIO */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
955 2, /* Branch cost */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
965 {-1, libcall}}},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
978 };
979
980 static const
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1037 {-1, libcall}}},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1051 };
1052
1053 static const
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1124 };
1125
1126 static const
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1197 };
1198
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1200 static const
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1251 is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1274 };
1275
1276 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1277 static const
1278 struct processor_costs generic32_cost = {
1279 COSTS_N_INSNS (1), /* cost of an add instruction */
1280 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1281 COSTS_N_INSNS (1), /* variable shift costs */
1282 COSTS_N_INSNS (1), /* constant shift costs */
1283 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1284 COSTS_N_INSNS (4), /* HI */
1285 COSTS_N_INSNS (3), /* SI */
1286 COSTS_N_INSNS (4), /* DI */
1287 COSTS_N_INSNS (2)}, /* other */
1288 0, /* cost of multiply per each bit set */
1289 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1290 COSTS_N_INSNS (26), /* HI */
1291 COSTS_N_INSNS (42), /* SI */
1292 COSTS_N_INSNS (74), /* DI */
1293 COSTS_N_INSNS (74)}, /* other */
1294 COSTS_N_INSNS (1), /* cost of movsx */
1295 COSTS_N_INSNS (1), /* cost of movzx */
1296 8, /* "large" insn */
1297 17, /* MOVE_RATIO */
1298 4, /* cost for loading QImode using movzbl */
1299 {4, 4, 4}, /* cost of loading integer registers
1300 in QImode, HImode and SImode.
1301 Relative to reg-reg move (2). */
1302 {4, 4, 4}, /* cost of storing integer registers */
1303 4, /* cost of reg,reg fld/fst */
1304 {12, 12, 12}, /* cost of loading fp registers
1305 in SFmode, DFmode and XFmode */
1306 {6, 6, 8}, /* cost of storing fp registers
1307 in SFmode, DFmode and XFmode */
1308 2, /* cost of moving MMX register */
1309 {8, 8}, /* cost of loading MMX registers
1310 in SImode and DImode */
1311 {8, 8}, /* cost of storing MMX registers
1312 in SImode and DImode */
1313 2, /* cost of moving SSE register */
1314 {8, 8, 8}, /* cost of loading SSE registers
1315 in SImode, DImode and TImode */
1316 {8, 8, 8}, /* cost of storing SSE registers
1317 in SImode, DImode and TImode */
1318 5, /* MMX or SSE register to integer */
1319 32, /* size of l1 cache. */
1320 256, /* size of l2 cache. */
1321 64, /* size of prefetch block */
1322 6, /* number of parallel prefetches */
1323 3, /* Branch cost */
1324 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1325 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1326 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1327 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1328 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1329 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1330 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1331 DUMMY_STRINGOP_ALGS},
1332 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1333 DUMMY_STRINGOP_ALGS},
1334 1, /* scalar_stmt_cost. */
1335 1, /* scalar load_cost. */
1336 1, /* scalar_store_cost. */
1337 1, /* vec_stmt_cost. */
1338 1, /* vec_to_scalar_cost. */
1339 1, /* scalar_to_vec_cost. */
1340 1, /* vec_align_load_cost. */
1341 2, /* vec_unalign_load_cost. */
1342 1, /* vec_store_cost. */
1343 3, /* cond_taken_branch_cost. */
1344 1, /* cond_not_taken_branch_cost. */
1345 };
1346
1347 const struct processor_costs *ix86_cost = &pentium_cost;
1348
1349 /* Processor feature/optimization bitmasks. */
1350 #define m_386 (1<<PROCESSOR_I386)
1351 #define m_486 (1<<PROCESSOR_I486)
1352 #define m_PENT (1<<PROCESSOR_PENTIUM)
1353 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1354 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1355 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1356 #define m_CORE2 (1<<PROCESSOR_CORE2)
1357 #define m_ATOM (1<<PROCESSOR_ATOM)
1358
1359 #define m_GEODE (1<<PROCESSOR_GEODE)
1360 #define m_K6 (1<<PROCESSOR_K6)
1361 #define m_K6_GEODE (m_K6 | m_GEODE)
1362 #define m_K8 (1<<PROCESSOR_K8)
1363 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1364 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1365 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1366 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1367 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1368
1369 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1370 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1371
1372 /* Generic instruction choice should be common subset of supported CPUs
1373 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1374 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1375
1376 /* Feature tests against the various tunings. */
1377 unsigned char ix86_tune_features[X86_TUNE_LAST];
1378
1379 /* Feature tests against the various tunings used to create ix86_tune_features
1380 based on the processor mask. */
1381 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1382 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1383 negatively, so enabling for Generic64 seems like good code size
1384 tradeoff. We can't enable it for 32bit generic because it does not
1385 work well with PPro base chips. */
1386 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1387
1388 /* X86_TUNE_PUSH_MEMORY */
1389 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1390 | m_NOCONA | m_CORE2 | m_GENERIC,
1391
1392 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1393 m_486 | m_PENT,
1394
1395 /* X86_TUNE_UNROLL_STRLEN */
1396 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1397 | m_CORE2 | m_GENERIC,
1398
1399 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1400 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1401
1402 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1403 on simulation result. But after P4 was made, no performance benefit
1404 was observed with branch hints. It also increases the code size.
1405 As a result, icc never generates branch hints. */
1406 0,
1407
1408 /* X86_TUNE_DOUBLE_WITH_ADD */
1409 ~m_386,
1410
1411 /* X86_TUNE_USE_SAHF */
1412 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1413 | m_NOCONA | m_CORE2 | m_GENERIC,
1414
1415 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1416 partial dependencies. */
1417 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1418 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1419
1420 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1421 register stalls on Generic32 compilation setting as well. However
1422 in current implementation the partial register stalls are not eliminated
1423 very well - they can be introduced via subregs synthesized by combine
1424 and can happen in caller/callee saving sequences. Because this option
1425 pays back little on PPro based chips and is in conflict with partial reg
1426 dependencies used by Athlon/P4 based chips, it is better to leave it off
1427 for generic32 for now. */
1428 m_PPRO,
1429
1430 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1431 m_CORE2 | m_GENERIC,
1432
1433 /* X86_TUNE_USE_HIMODE_FIOP */
1434 m_386 | m_486 | m_K6_GEODE,
1435
1436 /* X86_TUNE_USE_SIMODE_FIOP */
1437 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1438
1439 /* X86_TUNE_USE_MOV0 */
1440 m_K6,
1441
1442 /* X86_TUNE_USE_CLTD */
1443 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1444
1445 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1446 m_PENT4,
1447
1448 /* X86_TUNE_SPLIT_LONG_MOVES */
1449 m_PPRO,
1450
1451 /* X86_TUNE_READ_MODIFY_WRITE */
1452 ~m_PENT,
1453
1454 /* X86_TUNE_READ_MODIFY */
1455 ~(m_PENT | m_PPRO),
1456
1457 /* X86_TUNE_PROMOTE_QIMODE */
1458 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1459 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1460
1461 /* X86_TUNE_FAST_PREFIX */
1462 ~(m_PENT | m_486 | m_386),
1463
1464 /* X86_TUNE_SINGLE_STRINGOP */
1465 m_386 | m_PENT4 | m_NOCONA,
1466
1467 /* X86_TUNE_QIMODE_MATH */
1468 ~0,
1469
1470 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1471 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1472 might be considered for Generic32 if our scheme for avoiding partial
1473 stalls was more effective. */
1474 ~m_PPRO,
1475
1476 /* X86_TUNE_PROMOTE_QI_REGS */
1477 0,
1478
1479 /* X86_TUNE_PROMOTE_HI_REGS */
1480 m_PPRO,
1481
1482 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1483 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1484 | m_CORE2 | m_GENERIC,
1485
1486 /* X86_TUNE_ADD_ESP_8 */
1487 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1488 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1489
1490 /* X86_TUNE_SUB_ESP_4 */
1491 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1492 | m_GENERIC,
1493
1494 /* X86_TUNE_SUB_ESP_8 */
1495 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1496 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1497
1498 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1499 for DFmode copies */
1500 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1501 | m_GENERIC | m_GEODE),
1502
1503 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1504 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1505
1506 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1507 conflict here in between PPro/Pentium4 based chips that thread 128bit
1508 SSE registers as single units versus K8 based chips that divide SSE
1509 registers to two 64bit halves. This knob promotes all store destinations
1510 to be 128bit to allow register renaming on 128bit SSE units, but usually
1511 results in one extra microop on 64bit SSE units. Experimental results
1512 shows that disabling this option on P4 brings over 20% SPECfp regression,
1513 while enabling it on K8 brings roughly 2.4% regression that can be partly
1514 masked by careful scheduling of moves. */
1515 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1516 | m_AMDFAM10 | m_BDVER1,
1517
1518 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1519 m_AMDFAM10 | m_BDVER1,
1520
1521 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1522 m_BDVER1,
1523
1524 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1525 m_BDVER1,
1526
1527 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1528 are resolved on SSE register parts instead of whole registers, so we may
1529 maintain just lower part of scalar values in proper format leaving the
1530 upper part undefined. */
1531 m_ATHLON_K8,
1532
1533 /* X86_TUNE_SSE_TYPELESS_STORES */
1534 m_AMD_MULTIPLE,
1535
1536 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1537 m_PPRO | m_PENT4 | m_NOCONA,
1538
1539 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1540 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1541
1542 /* X86_TUNE_PROLOGUE_USING_MOVE */
1543 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1544
1545 /* X86_TUNE_EPILOGUE_USING_MOVE */
1546 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1547
1548 /* X86_TUNE_SHIFT1 */
1549 ~m_486,
1550
1551 /* X86_TUNE_USE_FFREEP */
1552 m_AMD_MULTIPLE,
1553
1554 /* X86_TUNE_INTER_UNIT_MOVES */
1555 ~(m_AMD_MULTIPLE | m_GENERIC),
1556
1557 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1558 ~(m_AMDFAM10 | m_BDVER1),
1559
1560 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1561 than 4 branch instructions in the 16 byte window. */
1562 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1563 | m_GENERIC,
1564
1565 /* X86_TUNE_SCHEDULE */
1566 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1567 | m_GENERIC,
1568
1569 /* X86_TUNE_USE_BT */
1570 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1571
1572 /* X86_TUNE_USE_INCDEC */
1573 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1574
1575 /* X86_TUNE_PAD_RETURNS */
1576 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1577
1578 /* X86_TUNE_EXT_80387_CONSTANTS */
1579 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1580 | m_CORE2 | m_GENERIC,
1581
1582 /* X86_TUNE_SHORTEN_X87_SSE */
1583 ~m_K8,
1584
1585 /* X86_TUNE_AVOID_VECTOR_DECODE */
1586 m_K8 | m_GENERIC64,
1587
1588 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1589 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1590 ~(m_386 | m_486),
1591
1592 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1593 vector path on AMD machines. */
1594 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1595
1596 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1597 machines. */
1598 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1599
1600 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1601 than a MOV. */
1602 m_PENT,
1603
1604 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1605 but one byte longer. */
1606 m_PENT,
1607
1608 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1609 operand that cannot be represented using a modRM byte. The XOR
1610 replacement is long decoded, so this split helps here as well. */
1611 m_K6,
1612
1613 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1614 from FP to FP. */
1615 m_AMDFAM10 | m_GENERIC,
1616
1617 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1618 from integer to FP. */
1619 m_AMDFAM10,
1620
1621 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1622 with a subsequent conditional jump instruction into a single
1623 compare-and-branch uop. */
1624 m_CORE2 | m_BDVER1,
1625
1626 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1627 will impact LEA instruction selection. */
1628 m_ATOM,
1629 };
1630
1631 /* Feature tests against the various architecture variations. */
1632 unsigned char ix86_arch_features[X86_ARCH_LAST];
1633
1634 /* Feature tests against the various architecture variations, used to create
1635 ix86_arch_features based on the processor mask. */
1636 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1637 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1638 ~(m_386 | m_486 | m_PENT | m_K6),
1639
1640 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1641 ~m_386,
1642
1643 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1644 ~(m_386 | m_486),
1645
1646 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1647 ~m_386,
1648
1649 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1650 ~m_386,
1651 };
1652
1653 static const unsigned int x86_accumulate_outgoing_args
1654 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1655 | m_GENERIC;
1656
1657 static const unsigned int x86_arch_always_fancy_math_387
1658 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1659 | m_NOCONA | m_CORE2 | m_GENERIC;
1660
1661 static enum stringop_alg stringop_alg = no_stringop;
1662
1663 /* In case the average insn count for single function invocation is
1664 lower than this constant, emit fast (but longer) prologue and
1665 epilogue code. */
1666 #define FAST_PROLOGUE_INSN_COUNT 20
1667
1668 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1669 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1670 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1671 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1672
1673 /* Array of the smallest class containing reg number REGNO, indexed by
1674 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1675
1676 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1677 {
1678 /* ax, dx, cx, bx */
1679 AREG, DREG, CREG, BREG,
1680 /* si, di, bp, sp */
1681 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1682 /* FP registers */
1683 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1684 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1685 /* arg pointer */
1686 NON_Q_REGS,
1687 /* flags, fpsr, fpcr, frame */
1688 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1689 /* SSE registers */
1690 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1691 SSE_REGS, SSE_REGS,
1692 /* MMX registers */
1693 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1694 MMX_REGS, MMX_REGS,
1695 /* REX registers */
1696 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1697 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1698 /* SSE REX registers */
1699 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1700 SSE_REGS, SSE_REGS,
1701 };
1702
1703 /* The "default" register map used in 32bit mode. */
1704
1705 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1706 {
1707 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1708 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1709 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1710 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1711 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1713 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1714 };
1715
1716 /* The "default" register map used in 64bit mode. */
1717
1718 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1719 {
1720 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1721 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1722 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1723 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1724 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1725 8,9,10,11,12,13,14,15, /* extended integer registers */
1726 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1727 };
1728
1729 /* Define the register numbers to be used in Dwarf debugging information.
1730 The SVR4 reference port C compiler uses the following register numbers
1731 in its Dwarf output code:
1732 0 for %eax (gcc regno = 0)
1733 1 for %ecx (gcc regno = 2)
1734 2 for %edx (gcc regno = 1)
1735 3 for %ebx (gcc regno = 3)
1736 4 for %esp (gcc regno = 7)
1737 5 for %ebp (gcc regno = 6)
1738 6 for %esi (gcc regno = 4)
1739 7 for %edi (gcc regno = 5)
1740 The following three DWARF register numbers are never generated by
1741 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1742 believes these numbers have these meanings.
1743 8 for %eip (no gcc equivalent)
1744 9 for %eflags (gcc regno = 17)
1745 10 for %trapno (no gcc equivalent)
1746 It is not at all clear how we should number the FP stack registers
1747 for the x86 architecture. If the version of SDB on x86/svr4 were
1748 a bit less brain dead with respect to floating-point then we would
1749 have a precedent to follow with respect to DWARF register numbers
1750 for x86 FP registers, but the SDB on x86/svr4 is so completely
1751 broken with respect to FP registers that it is hardly worth thinking
1752 of it as something to strive for compatibility with.
1753 The version of x86/svr4 SDB I have at the moment does (partially)
1754 seem to believe that DWARF register number 11 is associated with
1755 the x86 register %st(0), but that's about all. Higher DWARF
1756 register numbers don't seem to be associated with anything in
1757 particular, and even for DWARF regno 11, SDB only seems to under-
1758 stand that it should say that a variable lives in %st(0) (when
1759 asked via an `=' command) if we said it was in DWARF regno 11,
1760 but SDB still prints garbage when asked for the value of the
1761 variable in question (via a `/' command).
1762 (Also note that the labels SDB prints for various FP stack regs
1763 when doing an `x' command are all wrong.)
1764 Note that these problems generally don't affect the native SVR4
1765 C compiler because it doesn't allow the use of -O with -g and
1766 because when it is *not* optimizing, it allocates a memory
1767 location for each floating-point variable, and the memory
1768 location is what gets described in the DWARF AT_location
1769 attribute for the variable in question.
1770 Regardless of the severe mental illness of the x86/svr4 SDB, we
1771 do something sensible here and we use the following DWARF
1772 register numbers. Note that these are all stack-top-relative
1773 numbers.
1774 11 for %st(0) (gcc regno = 8)
1775 12 for %st(1) (gcc regno = 9)
1776 13 for %st(2) (gcc regno = 10)
1777 14 for %st(3) (gcc regno = 11)
1778 15 for %st(4) (gcc regno = 12)
1779 16 for %st(5) (gcc regno = 13)
1780 17 for %st(6) (gcc regno = 14)
1781 18 for %st(7) (gcc regno = 15)
1782 */
1783 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1784 {
1785 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1786 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1787 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1788 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1789 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1790 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1791 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1792 };
1793
1794 /* Test and compare insns in i386.md store the information needed to
1795 generate branch and scc insns here. */
1796
1797 rtx ix86_compare_op0 = NULL_RTX;
1798 rtx ix86_compare_op1 = NULL_RTX;
1799
1800 /* Define parameter passing and return registers. */
1801
1802 static int const x86_64_int_parameter_registers[6] =
1803 {
1804 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1805 };
1806
1807 static int const x86_64_ms_abi_int_parameter_registers[4] =
1808 {
1809 CX_REG, DX_REG, R8_REG, R9_REG
1810 };
1811
1812 static int const x86_64_int_return_registers[4] =
1813 {
1814 AX_REG, DX_REG, DI_REG, SI_REG
1815 };
1816
1817 /* Define the structure for the machine field in struct function. */
1818
1819 struct GTY(()) stack_local_entry {
1820 unsigned short mode;
1821 unsigned short n;
1822 rtx rtl;
1823 struct stack_local_entry *next;
1824 };
1825
1826 /* Structure describing stack frame layout.
1827 Stack grows downward:
1828
1829 [arguments]
1830 <- ARG_POINTER
1831 saved pc
1832
1833 saved frame pointer if frame_pointer_needed
1834 <- HARD_FRAME_POINTER
1835 [saved regs]
1836
1837 [padding0]
1838
1839 [saved SSE regs]
1840
1841 [padding1] \
1842 )
1843 [va_arg registers] (
1844 > to_allocate <- FRAME_POINTER
1845 [frame] (
1846 )
1847 [padding2] /
1848 */
1849 struct ix86_frame
1850 {
1851 int padding0;
1852 int nsseregs;
1853 int nregs;
1854 int padding1;
1855 int va_arg_size;
1856 int red_zone_size;
1857 HOST_WIDE_INT frame;
1858 int padding2;
1859 int outgoing_arguments_size;
1860
1861 HOST_WIDE_INT to_allocate;
1862 /* The offsets relative to ARG_POINTER. */
1863 HOST_WIDE_INT frame_pointer_offset;
1864 HOST_WIDE_INT hard_frame_pointer_offset;
1865 HOST_WIDE_INT stack_pointer_offset;
1866
1867 /* When save_regs_using_mov is set, emit prologue using
1868 move instead of push instructions. */
1869 bool save_regs_using_mov;
1870 };
1871
1872 /* Code model option. */
1873 enum cmodel ix86_cmodel;
1874 /* Asm dialect. */
1875 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1876 /* TLS dialects. */
1877 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1878
1879 /* Which unit we are generating floating point math for. */
1880 enum fpmath_unit ix86_fpmath;
1881
1882 /* Which cpu are we scheduling for. */
1883 enum attr_cpu ix86_schedule;
1884
1885 /* Which cpu are we optimizing for. */
1886 enum processor_type ix86_tune;
1887
1888 /* Which instruction set architecture to use. */
1889 enum processor_type ix86_arch;
1890
1891 /* true if sse prefetch instruction is not NOOP. */
1892 int x86_prefetch_sse;
1893
1894 /* ix86_regparm_string as a number */
1895 static int ix86_regparm;
1896
1897 /* -mstackrealign option */
1898 extern int ix86_force_align_arg_pointer;
1899 static const char ix86_force_align_arg_pointer_string[]
1900 = "force_align_arg_pointer";
1901
1902 static rtx (*ix86_gen_leave) (void);
1903 static rtx (*ix86_gen_pop1) (rtx);
1904 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1905 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1906 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1907 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1908 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1909 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1910 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1911 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1912 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1913
1914 /* Preferred alignment for stack boundary in bits. */
1915 unsigned int ix86_preferred_stack_boundary;
1916
1917 /* Alignment for incoming stack boundary in bits specified at
1918 command line. */
1919 static unsigned int ix86_user_incoming_stack_boundary;
1920
1921 /* Default alignment for incoming stack boundary in bits. */
1922 static unsigned int ix86_default_incoming_stack_boundary;
1923
1924 /* Alignment for incoming stack boundary in bits. */
1925 unsigned int ix86_incoming_stack_boundary;
1926
1927 /* The abi used by target. */
1928 enum calling_abi ix86_abi;
1929
1930 /* Values 1-5: see jump.c */
1931 int ix86_branch_cost;
1932
1933 /* Calling abi specific va_list type nodes. */
1934 static GTY(()) tree sysv_va_list_type_node;
1935 static GTY(()) tree ms_va_list_type_node;
1936
1937 /* Variables which are this size or smaller are put in the data/bss
1938 or ldata/lbss sections. */
1939
1940 int ix86_section_threshold = 65536;
1941
1942 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1943 char internal_label_prefix[16];
1944 int internal_label_prefix_len;
1945
1946 /* Fence to use after loop using movnt. */
1947 tree x86_mfence;
1948
1949 /* Register class used for passing given 64bit part of the argument.
1950 These represent classes as documented by the PS ABI, with the exception
1951 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1952 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1953
1954 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1955 whenever possible (upper half does contain padding). */
1956 enum x86_64_reg_class
1957 {
1958 X86_64_NO_CLASS,
1959 X86_64_INTEGER_CLASS,
1960 X86_64_INTEGERSI_CLASS,
1961 X86_64_SSE_CLASS,
1962 X86_64_SSESF_CLASS,
1963 X86_64_SSEDF_CLASS,
1964 X86_64_SSEUP_CLASS,
1965 X86_64_X87_CLASS,
1966 X86_64_X87UP_CLASS,
1967 X86_64_COMPLEX_X87_CLASS,
1968 X86_64_MEMORY_CLASS
1969 };
1970
1971 #define MAX_CLASSES 4
1972
1973 /* Table of constants used by fldpi, fldln2, etc.... */
1974 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1975 static bool ext_80387_constants_init = 0;
1976
1977 \f
1978 static struct machine_function * ix86_init_machine_status (void);
1979 static rtx ix86_function_value (const_tree, const_tree, bool);
1980 static bool ix86_function_value_regno_p (const unsigned int);
1981 static rtx ix86_static_chain (const_tree, bool);
1982 static int ix86_function_regparm (const_tree, const_tree);
1983 static void ix86_compute_frame_layout (struct ix86_frame *);
1984 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1985 rtx, rtx, int);
1986 static void ix86_add_new_builtins (int);
1987 static rtx ix86_expand_vec_perm_builtin (tree);
1988 static tree ix86_canonical_va_list_type (tree);
1989
1990 enum ix86_function_specific_strings
1991 {
1992 IX86_FUNCTION_SPECIFIC_ARCH,
1993 IX86_FUNCTION_SPECIFIC_TUNE,
1994 IX86_FUNCTION_SPECIFIC_FPMATH,
1995 IX86_FUNCTION_SPECIFIC_MAX
1996 };
1997
1998 static char *ix86_target_string (int, int, const char *, const char *,
1999 const char *, bool);
2000 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2001 static void ix86_function_specific_save (struct cl_target_option *);
2002 static void ix86_function_specific_restore (struct cl_target_option *);
2003 static void ix86_function_specific_print (FILE *, int,
2004 struct cl_target_option *);
2005 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2006 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2007 static bool ix86_can_inline_p (tree, tree);
2008 static void ix86_set_current_function (tree);
2009 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2010
2011 static enum calling_abi ix86_function_abi (const_tree);
2012
2013 \f
2014 #ifndef SUBTARGET32_DEFAULT_CPU
2015 #define SUBTARGET32_DEFAULT_CPU "i386"
2016 #endif
2017
2018 /* The svr4 ABI for the i386 says that records and unions are returned
2019 in memory. */
2020 #ifndef DEFAULT_PCC_STRUCT_RETURN
2021 #define DEFAULT_PCC_STRUCT_RETURN 1
2022 #endif
2023
2024 /* Whether -mtune= or -march= were specified */
2025 static int ix86_tune_defaulted;
2026 static int ix86_arch_specified;
2027
2028 /* Bit flags that specify the ISA we are compiling for. */
2029 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
2030
2031 /* A mask of ix86_isa_flags that includes bit X if X
2032 was set or cleared on the command line. */
2033 static int ix86_isa_flags_explicit;
2034
2035 /* Define a set of ISAs which are available when a given ISA is
2036 enabled. MMX and SSE ISAs are handled separately. */
2037
2038 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2039 #define OPTION_MASK_ISA_3DNOW_SET \
2040 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2041
2042 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2043 #define OPTION_MASK_ISA_SSE2_SET \
2044 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2045 #define OPTION_MASK_ISA_SSE3_SET \
2046 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2047 #define OPTION_MASK_ISA_SSSE3_SET \
2048 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2049 #define OPTION_MASK_ISA_SSE4_1_SET \
2050 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2051 #define OPTION_MASK_ISA_SSE4_2_SET \
2052 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2053 #define OPTION_MASK_ISA_AVX_SET \
2054 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2055 #define OPTION_MASK_ISA_FMA_SET \
2056 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2057
2058 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2059 as -msse4.2. */
2060 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2061
2062 #define OPTION_MASK_ISA_SSE4A_SET \
2063 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2064 #define OPTION_MASK_ISA_FMA4_SET \
2065 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2066 | OPTION_MASK_ISA_AVX_SET)
2067 #define OPTION_MASK_ISA_XOP_SET \
2068 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2069 #define OPTION_MASK_ISA_LWP_SET \
2070 OPTION_MASK_ISA_LWP
2071
2072 /* AES and PCLMUL need SSE2 because they use xmm registers */
2073 #define OPTION_MASK_ISA_AES_SET \
2074 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2075 #define OPTION_MASK_ISA_PCLMUL_SET \
2076 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2077
2078 #define OPTION_MASK_ISA_ABM_SET \
2079 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2080
2081 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2082 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2083 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2084 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2085 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2086
2087 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2088 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2089 #define OPTION_MASK_ISA_F16C_SET \
2090 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2091
2092 /* Define a set of ISAs which aren't available when a given ISA is
2093 disabled. MMX and SSE ISAs are handled separately. */
2094
2095 #define OPTION_MASK_ISA_MMX_UNSET \
2096 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2097 #define OPTION_MASK_ISA_3DNOW_UNSET \
2098 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2099 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2100
2101 #define OPTION_MASK_ISA_SSE_UNSET \
2102 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2103 #define OPTION_MASK_ISA_SSE2_UNSET \
2104 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2105 #define OPTION_MASK_ISA_SSE3_UNSET \
2106 (OPTION_MASK_ISA_SSE3 \
2107 | OPTION_MASK_ISA_SSSE3_UNSET \
2108 | OPTION_MASK_ISA_SSE4A_UNSET )
2109 #define OPTION_MASK_ISA_SSSE3_UNSET \
2110 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2111 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2112 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2113 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2114 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2115 #define OPTION_MASK_ISA_AVX_UNSET \
2116 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2117 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2118 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2119
2120 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2121 as -mno-sse4.1. */
2122 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2123
2124 #define OPTION_MASK_ISA_SSE4A_UNSET \
2125 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2126
2127 #define OPTION_MASK_ISA_FMA4_UNSET \
2128 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2129 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2130 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2131
2132 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2133 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2134 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2135 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2136 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2137 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2138 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2139 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2140
2141 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2142 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2143 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2144
2145 /* Vectorization library interface and handlers. */
2146 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2147
2148 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2149 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2150
2151 /* Processor target table, indexed by processor number */
2152 struct ptt
2153 {
2154 const struct processor_costs *cost; /* Processor costs */
2155 const int align_loop; /* Default alignments. */
2156 const int align_loop_max_skip;
2157 const int align_jump;
2158 const int align_jump_max_skip;
2159 const int align_func;
2160 };
2161
2162 static const struct ptt processor_target_table[PROCESSOR_max] =
2163 {
2164 {&i386_cost, 4, 3, 4, 3, 4},
2165 {&i486_cost, 16, 15, 16, 15, 16},
2166 {&pentium_cost, 16, 7, 16, 7, 16},
2167 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2168 {&geode_cost, 0, 0, 0, 0, 0},
2169 {&k6_cost, 32, 7, 32, 7, 32},
2170 {&athlon_cost, 16, 7, 16, 7, 16},
2171 {&pentium4_cost, 0, 0, 0, 0, 0},
2172 {&k8_cost, 16, 7, 16, 7, 16},
2173 {&nocona_cost, 0, 0, 0, 0, 0},
2174 {&core2_cost, 16, 10, 16, 10, 16},
2175 {&generic32_cost, 16, 7, 16, 7, 16},
2176 {&generic64_cost, 16, 10, 16, 10, 16},
2177 {&amdfam10_cost, 32, 24, 32, 7, 32},
2178 {&bdver1_cost, 32, 24, 32, 7, 32},
2179 {&atom_cost, 16, 7, 16, 7, 16}
2180 };
2181
2182 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2183 {
2184 "generic",
2185 "i386",
2186 "i486",
2187 "pentium",
2188 "pentium-mmx",
2189 "pentiumpro",
2190 "pentium2",
2191 "pentium3",
2192 "pentium4",
2193 "pentium-m",
2194 "prescott",
2195 "nocona",
2196 "core2",
2197 "atom",
2198 "geode",
2199 "k6",
2200 "k6-2",
2201 "k6-3",
2202 "athlon",
2203 "athlon-4",
2204 "k8",
2205 "amdfam10",
2206 "bdver1"
2207 };
2208 \f
2209 /* Implement TARGET_HANDLE_OPTION. */
2210
2211 static bool
2212 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2213 {
2214 switch (code)
2215 {
2216 case OPT_mmmx:
2217 if (value)
2218 {
2219 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2220 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2221 }
2222 else
2223 {
2224 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2225 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2226 }
2227 return true;
2228
2229 case OPT_m3dnow:
2230 if (value)
2231 {
2232 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2233 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2234 }
2235 else
2236 {
2237 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2238 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2239 }
2240 return true;
2241
2242 case OPT_m3dnowa:
2243 return false;
2244
2245 case OPT_msse:
2246 if (value)
2247 {
2248 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2249 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2250 }
2251 else
2252 {
2253 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2254 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2255 }
2256 return true;
2257
2258 case OPT_msse2:
2259 if (value)
2260 {
2261 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2262 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2263 }
2264 else
2265 {
2266 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2267 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2268 }
2269 return true;
2270
2271 case OPT_msse3:
2272 if (value)
2273 {
2274 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2275 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2276 }
2277 else
2278 {
2279 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2280 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2281 }
2282 return true;
2283
2284 case OPT_mssse3:
2285 if (value)
2286 {
2287 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2288 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2289 }
2290 else
2291 {
2292 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2293 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2294 }
2295 return true;
2296
2297 case OPT_msse4_1:
2298 if (value)
2299 {
2300 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2301 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2302 }
2303 else
2304 {
2305 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2306 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2307 }
2308 return true;
2309
2310 case OPT_msse4_2:
2311 if (value)
2312 {
2313 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2314 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2315 }
2316 else
2317 {
2318 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2319 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2320 }
2321 return true;
2322
2323 case OPT_mavx:
2324 if (value)
2325 {
2326 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2327 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2328 }
2329 else
2330 {
2331 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2332 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2333 }
2334 return true;
2335
2336 case OPT_mfma:
2337 if (value)
2338 {
2339 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2340 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2341 }
2342 else
2343 {
2344 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2345 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2346 }
2347 return true;
2348
2349 case OPT_msse4:
2350 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2351 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2352 return true;
2353
2354 case OPT_mno_sse4:
2355 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2356 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2357 return true;
2358
2359 case OPT_msse4a:
2360 if (value)
2361 {
2362 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2363 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2364 }
2365 else
2366 {
2367 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2368 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2369 }
2370 return true;
2371
2372 case OPT_mfma4:
2373 if (value)
2374 {
2375 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2376 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2377 }
2378 else
2379 {
2380 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2381 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2382 }
2383 return true;
2384
2385 case OPT_mxop:
2386 if (value)
2387 {
2388 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2389 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2390 }
2391 else
2392 {
2393 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2394 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2395 }
2396 return true;
2397
2398 case OPT_mlwp:
2399 if (value)
2400 {
2401 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2402 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2403 }
2404 else
2405 {
2406 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2407 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2408 }
2409 return true;
2410
2411 case OPT_mabm:
2412 if (value)
2413 {
2414 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2415 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2416 }
2417 else
2418 {
2419 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2420 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2421 }
2422 return true;
2423
2424 case OPT_mpopcnt:
2425 if (value)
2426 {
2427 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2428 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2429 }
2430 else
2431 {
2432 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2433 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2434 }
2435 return true;
2436
2437 case OPT_msahf:
2438 if (value)
2439 {
2440 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2441 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2442 }
2443 else
2444 {
2445 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2446 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2447 }
2448 return true;
2449
2450 case OPT_mcx16:
2451 if (value)
2452 {
2453 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2454 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2455 }
2456 else
2457 {
2458 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2459 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2460 }
2461 return true;
2462
2463 case OPT_mmovbe:
2464 if (value)
2465 {
2466 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2467 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2468 }
2469 else
2470 {
2471 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2472 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2473 }
2474 return true;
2475
2476 case OPT_mcrc32:
2477 if (value)
2478 {
2479 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2480 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2481 }
2482 else
2483 {
2484 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2485 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2486 }
2487 return true;
2488
2489 case OPT_maes:
2490 if (value)
2491 {
2492 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2493 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2494 }
2495 else
2496 {
2497 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2498 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2499 }
2500 return true;
2501
2502 case OPT_mpclmul:
2503 if (value)
2504 {
2505 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2506 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2507 }
2508 else
2509 {
2510 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2511 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2512 }
2513 return true;
2514
2515 case OPT_mfsgsbase:
2516 if (value)
2517 {
2518 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2519 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2520 }
2521 else
2522 {
2523 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2524 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2525 }
2526 return true;
2527
2528 case OPT_mrdrnd:
2529 if (value)
2530 {
2531 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2532 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2533 }
2534 else
2535 {
2536 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2537 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2538 }
2539 return true;
2540
2541 case OPT_mf16c:
2542 if (value)
2543 {
2544 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2545 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2546 }
2547 else
2548 {
2549 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2550 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2551 }
2552 return true;
2553
2554 default:
2555 return true;
2556 }
2557 }
2558 \f
2559 /* Return a string that documents the current -m options. The caller is
2560 responsible for freeing the string. */
2561
2562 static char *
2563 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2564 const char *fpmath, bool add_nl_p)
2565 {
2566 struct ix86_target_opts
2567 {
2568 const char *option; /* option string */
2569 int mask; /* isa mask options */
2570 };
2571
2572 /* This table is ordered so that options like -msse4.2 that imply
2573 preceding options while match those first. */
2574 static struct ix86_target_opts isa_opts[] =
2575 {
2576 { "-m64", OPTION_MASK_ISA_64BIT },
2577 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2578 { "-mfma", OPTION_MASK_ISA_FMA },
2579 { "-mxop", OPTION_MASK_ISA_XOP },
2580 { "-mlwp", OPTION_MASK_ISA_LWP },
2581 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2582 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2583 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2584 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2585 { "-msse3", OPTION_MASK_ISA_SSE3 },
2586 { "-msse2", OPTION_MASK_ISA_SSE2 },
2587 { "-msse", OPTION_MASK_ISA_SSE },
2588 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2589 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2590 { "-mmmx", OPTION_MASK_ISA_MMX },
2591 { "-mabm", OPTION_MASK_ISA_ABM },
2592 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2593 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2594 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2595 { "-maes", OPTION_MASK_ISA_AES },
2596 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2597 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2598 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2599 { "-mf16c", OPTION_MASK_ISA_F16C },
2600 };
2601
2602 /* Flag options. */
2603 static struct ix86_target_opts flag_opts[] =
2604 {
2605 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2606 { "-m80387", MASK_80387 },
2607 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2608 { "-malign-double", MASK_ALIGN_DOUBLE },
2609 { "-mcld", MASK_CLD },
2610 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2611 { "-mieee-fp", MASK_IEEE_FP },
2612 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2613 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2614 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2615 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2616 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2617 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2618 { "-mno-red-zone", MASK_NO_RED_ZONE },
2619 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2620 { "-mrecip", MASK_RECIP },
2621 { "-mrtd", MASK_RTD },
2622 { "-msseregparm", MASK_SSEREGPARM },
2623 { "-mstack-arg-probe", MASK_STACK_PROBE },
2624 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2625 };
2626
2627 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2628
2629 char isa_other[40];
2630 char target_other[40];
2631 unsigned num = 0;
2632 unsigned i, j;
2633 char *ret;
2634 char *ptr;
2635 size_t len;
2636 size_t line_len;
2637 size_t sep_len;
2638
2639 memset (opts, '\0', sizeof (opts));
2640
2641 /* Add -march= option. */
2642 if (arch)
2643 {
2644 opts[num][0] = "-march=";
2645 opts[num++][1] = arch;
2646 }
2647
2648 /* Add -mtune= option. */
2649 if (tune)
2650 {
2651 opts[num][0] = "-mtune=";
2652 opts[num++][1] = tune;
2653 }
2654
2655 /* Pick out the options in isa options. */
2656 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2657 {
2658 if ((isa & isa_opts[i].mask) != 0)
2659 {
2660 opts[num++][0] = isa_opts[i].option;
2661 isa &= ~ isa_opts[i].mask;
2662 }
2663 }
2664
2665 if (isa && add_nl_p)
2666 {
2667 opts[num++][0] = isa_other;
2668 sprintf (isa_other, "(other isa: %#x)", isa);
2669 }
2670
2671 /* Add flag options. */
2672 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2673 {
2674 if ((flags & flag_opts[i].mask) != 0)
2675 {
2676 opts[num++][0] = flag_opts[i].option;
2677 flags &= ~ flag_opts[i].mask;
2678 }
2679 }
2680
2681 if (flags && add_nl_p)
2682 {
2683 opts[num++][0] = target_other;
2684 sprintf (target_other, "(other flags: %#x)", flags);
2685 }
2686
2687 /* Add -fpmath= option. */
2688 if (fpmath)
2689 {
2690 opts[num][0] = "-mfpmath=";
2691 opts[num++][1] = fpmath;
2692 }
2693
2694 /* Any options? */
2695 if (num == 0)
2696 return NULL;
2697
2698 gcc_assert (num < ARRAY_SIZE (opts));
2699
2700 /* Size the string. */
2701 len = 0;
2702 sep_len = (add_nl_p) ? 3 : 1;
2703 for (i = 0; i < num; i++)
2704 {
2705 len += sep_len;
2706 for (j = 0; j < 2; j++)
2707 if (opts[i][j])
2708 len += strlen (opts[i][j]);
2709 }
2710
2711 /* Build the string. */
2712 ret = ptr = (char *) xmalloc (len);
2713 line_len = 0;
2714
2715 for (i = 0; i < num; i++)
2716 {
2717 size_t len2[2];
2718
2719 for (j = 0; j < 2; j++)
2720 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2721
2722 if (i != 0)
2723 {
2724 *ptr++ = ' ';
2725 line_len++;
2726
2727 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2728 {
2729 *ptr++ = '\\';
2730 *ptr++ = '\n';
2731 line_len = 0;
2732 }
2733 }
2734
2735 for (j = 0; j < 2; j++)
2736 if (opts[i][j])
2737 {
2738 memcpy (ptr, opts[i][j], len2[j]);
2739 ptr += len2[j];
2740 line_len += len2[j];
2741 }
2742 }
2743
2744 *ptr = '\0';
2745 gcc_assert (ret + len >= ptr);
2746
2747 return ret;
2748 }
2749
2750 /* Return TRUE if software prefetching is beneficial for the
2751 given CPU. */
2752
2753 static bool
2754 software_prefetching_beneficial_p (void)
2755 {
2756 switch (ix86_tune)
2757 {
2758 case PROCESSOR_GEODE:
2759 case PROCESSOR_K6:
2760 case PROCESSOR_ATHLON:
2761 case PROCESSOR_K8:
2762 case PROCESSOR_AMDFAM10:
2763 return true;
2764
2765 default:
2766 return false;
2767 }
2768 }
2769
2770 /* Function that is callable from the debugger to print the current
2771 options. */
2772 void
2773 ix86_debug_options (void)
2774 {
2775 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2776 ix86_arch_string, ix86_tune_string,
2777 ix86_fpmath_string, true);
2778
2779 if (opts)
2780 {
2781 fprintf (stderr, "%s\n\n", opts);
2782 free (opts);
2783 }
2784 else
2785 fputs ("<no options>\n\n", stderr);
2786
2787 return;
2788 }
2789 \f
2790 /* Sometimes certain combinations of command options do not make
2791 sense on a particular target machine. You can define a macro
2792 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2793 defined, is executed once just after all the command options have
2794 been parsed.
2795
2796 Don't use this macro to turn on various extra optimizations for
2797 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2798
2799 void
2800 override_options (bool main_args_p)
2801 {
2802 int i;
2803 unsigned int ix86_arch_mask, ix86_tune_mask;
2804 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2805 const char *prefix;
2806 const char *suffix;
2807 const char *sw;
2808
2809 /* Comes from final.c -- no real reason to change it. */
2810 #define MAX_CODE_ALIGN 16
2811
2812 enum pta_flags
2813 {
2814 PTA_SSE = 1 << 0,
2815 PTA_SSE2 = 1 << 1,
2816 PTA_SSE3 = 1 << 2,
2817 PTA_MMX = 1 << 3,
2818 PTA_PREFETCH_SSE = 1 << 4,
2819 PTA_3DNOW = 1 << 5,
2820 PTA_3DNOW_A = 1 << 6,
2821 PTA_64BIT = 1 << 7,
2822 PTA_SSSE3 = 1 << 8,
2823 PTA_CX16 = 1 << 9,
2824 PTA_POPCNT = 1 << 10,
2825 PTA_ABM = 1 << 11,
2826 PTA_SSE4A = 1 << 12,
2827 PTA_NO_SAHF = 1 << 13,
2828 PTA_SSE4_1 = 1 << 14,
2829 PTA_SSE4_2 = 1 << 15,
2830 PTA_AES = 1 << 16,
2831 PTA_PCLMUL = 1 << 17,
2832 PTA_AVX = 1 << 18,
2833 PTA_FMA = 1 << 19,
2834 PTA_MOVBE = 1 << 20,
2835 PTA_FMA4 = 1 << 21,
2836 PTA_XOP = 1 << 22,
2837 PTA_LWP = 1 << 23,
2838 PTA_FSGSBASE = 1 << 24,
2839 PTA_RDRND = 1 << 25,
2840 PTA_F16C = 1 << 26
2841 };
2842
2843 static struct pta
2844 {
2845 const char *const name; /* processor name or nickname. */
2846 const enum processor_type processor;
2847 const enum attr_cpu schedule;
2848 const unsigned /*enum pta_flags*/ flags;
2849 }
2850 const processor_alias_table[] =
2851 {
2852 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2853 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2854 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2855 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2856 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2857 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2858 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2859 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2860 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2861 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2862 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2863 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2864 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2865 PTA_MMX | PTA_SSE},
2866 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2867 PTA_MMX | PTA_SSE},
2868 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2869 PTA_MMX | PTA_SSE | PTA_SSE2},
2870 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2871 PTA_MMX |PTA_SSE | PTA_SSE2},
2872 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2873 PTA_MMX | PTA_SSE | PTA_SSE2},
2874 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2875 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2876 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2877 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2878 | PTA_CX16 | PTA_NO_SAHF},
2879 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2880 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2881 | PTA_SSSE3 | PTA_CX16},
2882 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2883 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2884 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2885 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2886 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2887 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2888 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2889 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2890 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2891 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2892 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2893 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2894 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2895 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2896 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2897 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2898 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2899 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2900 {"x86-64", PROCESSOR_K8, CPU_K8,
2901 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2902 {"k8", PROCESSOR_K8, CPU_K8,
2903 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2904 | PTA_SSE2 | PTA_NO_SAHF},
2905 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2906 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2907 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2908 {"opteron", PROCESSOR_K8, CPU_K8,
2909 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2910 | PTA_SSE2 | PTA_NO_SAHF},
2911 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2912 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2913 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2914 {"athlon64", PROCESSOR_K8, CPU_K8,
2915 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2916 | PTA_SSE2 | PTA_NO_SAHF},
2917 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2918 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2919 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2920 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2921 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2922 | PTA_SSE2 | PTA_NO_SAHF},
2923 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2924 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2925 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2926 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2927 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2928 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2929 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2930 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2931 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2932 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2933 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2934 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2935 0 /* flags are only used for -march switch. */ },
2936 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2937 PTA_64BIT /* flags are only used for -march switch. */ },
2938 };
2939
2940 int const pta_size = ARRAY_SIZE (processor_alias_table);
2941
2942 /* Set up prefix/suffix so the error messages refer to either the command
2943 line argument, or the attribute(target). */
2944 if (main_args_p)
2945 {
2946 prefix = "-m";
2947 suffix = "";
2948 sw = "switch";
2949 }
2950 else
2951 {
2952 prefix = "option(\"";
2953 suffix = "\")";
2954 sw = "attribute";
2955 }
2956
2957 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2958 SUBTARGET_OVERRIDE_OPTIONS;
2959 #endif
2960
2961 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2962 SUBSUBTARGET_OVERRIDE_OPTIONS;
2963 #endif
2964
2965 /* -fPIC is the default for x86_64. */
2966 if (TARGET_MACHO && TARGET_64BIT)
2967 flag_pic = 2;
2968
2969 /* Set the default values for switches whose default depends on TARGET_64BIT
2970 in case they weren't overwritten by command line options. */
2971 if (TARGET_64BIT)
2972 {
2973 if (flag_zee == 2)
2974 flag_zee = 1;
2975 /* Mach-O doesn't support omitting the frame pointer for now. */
2976 if (flag_omit_frame_pointer == 2)
2977 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2978 if (flag_asynchronous_unwind_tables == 2)
2979 flag_asynchronous_unwind_tables = 1;
2980 if (flag_pcc_struct_return == 2)
2981 flag_pcc_struct_return = 0;
2982 }
2983 else
2984 {
2985 if (flag_zee == 2)
2986 flag_zee = 0;
2987 if (flag_omit_frame_pointer == 2)
2988 flag_omit_frame_pointer = 0;
2989 if (flag_asynchronous_unwind_tables == 2)
2990 flag_asynchronous_unwind_tables = 0;
2991 if (flag_pcc_struct_return == 2)
2992 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2993 }
2994
2995 /* Need to check -mtune=generic first. */
2996 if (ix86_tune_string)
2997 {
2998 if (!strcmp (ix86_tune_string, "generic")
2999 || !strcmp (ix86_tune_string, "i686")
3000 /* As special support for cross compilers we read -mtune=native
3001 as -mtune=generic. With native compilers we won't see the
3002 -mtune=native, as it was changed by the driver. */
3003 || !strcmp (ix86_tune_string, "native"))
3004 {
3005 if (TARGET_64BIT)
3006 ix86_tune_string = "generic64";
3007 else
3008 ix86_tune_string = "generic32";
3009 }
3010 /* If this call is for setting the option attribute, allow the
3011 generic32/generic64 that was previously set. */
3012 else if (!main_args_p
3013 && (!strcmp (ix86_tune_string, "generic32")
3014 || !strcmp (ix86_tune_string, "generic64")))
3015 ;
3016 else if (!strncmp (ix86_tune_string, "generic", 7))
3017 error ("bad value (%s) for %stune=%s %s",
3018 ix86_tune_string, prefix, suffix, sw);
3019 else if (!strcmp (ix86_tune_string, "x86-64"))
3020 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3021 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3022 prefix, suffix, prefix, suffix, prefix, suffix);
3023 }
3024 else
3025 {
3026 if (ix86_arch_string)
3027 ix86_tune_string = ix86_arch_string;
3028 if (!ix86_tune_string)
3029 {
3030 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3031 ix86_tune_defaulted = 1;
3032 }
3033
3034 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3035 need to use a sensible tune option. */
3036 if (!strcmp (ix86_tune_string, "generic")
3037 || !strcmp (ix86_tune_string, "x86-64")
3038 || !strcmp (ix86_tune_string, "i686"))
3039 {
3040 if (TARGET_64BIT)
3041 ix86_tune_string = "generic64";
3042 else
3043 ix86_tune_string = "generic32";
3044 }
3045 }
3046
3047 if (ix86_stringop_string)
3048 {
3049 if (!strcmp (ix86_stringop_string, "rep_byte"))
3050 stringop_alg = rep_prefix_1_byte;
3051 else if (!strcmp (ix86_stringop_string, "libcall"))
3052 stringop_alg = libcall;
3053 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3054 stringop_alg = rep_prefix_4_byte;
3055 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3056 && TARGET_64BIT)
3057 /* rep; movq isn't available in 32-bit code. */
3058 stringop_alg = rep_prefix_8_byte;
3059 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3060 stringop_alg = loop_1_byte;
3061 else if (!strcmp (ix86_stringop_string, "loop"))
3062 stringop_alg = loop;
3063 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3064 stringop_alg = unrolled_loop;
3065 else
3066 error ("bad value (%s) for %sstringop-strategy=%s %s",
3067 ix86_stringop_string, prefix, suffix, sw);
3068 }
3069
3070 if (!ix86_arch_string)
3071 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3072 else
3073 ix86_arch_specified = 1;
3074
3075 /* Validate -mabi= value. */
3076 if (ix86_abi_string)
3077 {
3078 if (strcmp (ix86_abi_string, "sysv") == 0)
3079 ix86_abi = SYSV_ABI;
3080 else if (strcmp (ix86_abi_string, "ms") == 0)
3081 ix86_abi = MS_ABI;
3082 else
3083 error ("unknown ABI (%s) for %sabi=%s %s",
3084 ix86_abi_string, prefix, suffix, sw);
3085 }
3086 else
3087 ix86_abi = DEFAULT_ABI;
3088
3089 if (ix86_cmodel_string != 0)
3090 {
3091 if (!strcmp (ix86_cmodel_string, "small"))
3092 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3093 else if (!strcmp (ix86_cmodel_string, "medium"))
3094 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3095 else if (!strcmp (ix86_cmodel_string, "large"))
3096 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3097 else if (flag_pic)
3098 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3099 else if (!strcmp (ix86_cmodel_string, "32"))
3100 ix86_cmodel = CM_32;
3101 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3102 ix86_cmodel = CM_KERNEL;
3103 else
3104 error ("bad value (%s) for %scmodel=%s %s",
3105 ix86_cmodel_string, prefix, suffix, sw);
3106 }
3107 else
3108 {
3109 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3110 use of rip-relative addressing. This eliminates fixups that
3111 would otherwise be needed if this object is to be placed in a
3112 DLL, and is essentially just as efficient as direct addressing. */
3113 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3114 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3115 else if (TARGET_64BIT)
3116 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3117 else
3118 ix86_cmodel = CM_32;
3119 }
3120 if (ix86_asm_string != 0)
3121 {
3122 if (! TARGET_MACHO
3123 && !strcmp (ix86_asm_string, "intel"))
3124 ix86_asm_dialect = ASM_INTEL;
3125 else if (!strcmp (ix86_asm_string, "att"))
3126 ix86_asm_dialect = ASM_ATT;
3127 else
3128 error ("bad value (%s) for %sasm=%s %s",
3129 ix86_asm_string, prefix, suffix, sw);
3130 }
3131 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3132 error ("code model %qs not supported in the %s bit mode",
3133 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3134 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3135 sorry ("%i-bit mode not compiled in",
3136 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3137
3138 for (i = 0; i < pta_size; i++)
3139 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3140 {
3141 ix86_schedule = processor_alias_table[i].schedule;
3142 ix86_arch = processor_alias_table[i].processor;
3143 /* Default cpu tuning to the architecture. */
3144 ix86_tune = ix86_arch;
3145
3146 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3147 error ("CPU you selected does not support x86-64 "
3148 "instruction set");
3149
3150 if (processor_alias_table[i].flags & PTA_MMX
3151 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3152 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3153 if (processor_alias_table[i].flags & PTA_3DNOW
3154 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3155 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3156 if (processor_alias_table[i].flags & PTA_3DNOW_A
3157 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3158 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3159 if (processor_alias_table[i].flags & PTA_SSE
3160 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3161 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3162 if (processor_alias_table[i].flags & PTA_SSE2
3163 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3164 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3165 if (processor_alias_table[i].flags & PTA_SSE3
3166 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3167 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3168 if (processor_alias_table[i].flags & PTA_SSSE3
3169 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3170 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3171 if (processor_alias_table[i].flags & PTA_SSE4_1
3172 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3173 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3174 if (processor_alias_table[i].flags & PTA_SSE4_2
3175 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3176 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3177 if (processor_alias_table[i].flags & PTA_AVX
3178 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3179 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3180 if (processor_alias_table[i].flags & PTA_FMA
3181 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3182 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3183 if (processor_alias_table[i].flags & PTA_SSE4A
3184 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3185 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3186 if (processor_alias_table[i].flags & PTA_FMA4
3187 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3188 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3189 if (processor_alias_table[i].flags & PTA_XOP
3190 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3191 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3192 if (processor_alias_table[i].flags & PTA_LWP
3193 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3194 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3195 if (processor_alias_table[i].flags & PTA_ABM
3196 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3197 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3198 if (processor_alias_table[i].flags & PTA_CX16
3199 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3200 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3201 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3202 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3203 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3204 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3205 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3206 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3207 if (processor_alias_table[i].flags & PTA_MOVBE
3208 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3209 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3210 if (processor_alias_table[i].flags & PTA_AES
3211 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3212 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3213 if (processor_alias_table[i].flags & PTA_PCLMUL
3214 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3215 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3216 if (processor_alias_table[i].flags & PTA_FSGSBASE
3217 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3218 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3219 if (processor_alias_table[i].flags & PTA_RDRND
3220 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3221 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3222 if (processor_alias_table[i].flags & PTA_F16C
3223 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3224 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3225 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3226 x86_prefetch_sse = true;
3227
3228 break;
3229 }
3230
3231 if (!strcmp (ix86_arch_string, "generic"))
3232 error ("generic CPU can be used only for %stune=%s %s",
3233 prefix, suffix, sw);
3234 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3235 error ("bad value (%s) for %sarch=%s %s",
3236 ix86_arch_string, prefix, suffix, sw);
3237
3238 ix86_arch_mask = 1u << ix86_arch;
3239 for (i = 0; i < X86_ARCH_LAST; ++i)
3240 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3241
3242 for (i = 0; i < pta_size; i++)
3243 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3244 {
3245 ix86_schedule = processor_alias_table[i].schedule;
3246 ix86_tune = processor_alias_table[i].processor;
3247 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3248 {
3249 if (ix86_tune_defaulted)
3250 {
3251 ix86_tune_string = "x86-64";
3252 for (i = 0; i < pta_size; i++)
3253 if (! strcmp (ix86_tune_string,
3254 processor_alias_table[i].name))
3255 break;
3256 ix86_schedule = processor_alias_table[i].schedule;
3257 ix86_tune = processor_alias_table[i].processor;
3258 }
3259 else
3260 error ("CPU you selected does not support x86-64 "
3261 "instruction set");
3262 }
3263 /* Intel CPUs have always interpreted SSE prefetch instructions as
3264 NOPs; so, we can enable SSE prefetch instructions even when
3265 -mtune (rather than -march) points us to a processor that has them.
3266 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3267 higher processors. */
3268 if (TARGET_CMOVE
3269 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3270 x86_prefetch_sse = true;
3271 break;
3272 }
3273
3274 if (ix86_tune_specified && i == pta_size)
3275 error ("bad value (%s) for %stune=%s %s",
3276 ix86_tune_string, prefix, suffix, sw);
3277
3278 ix86_tune_mask = 1u << ix86_tune;
3279 for (i = 0; i < X86_TUNE_LAST; ++i)
3280 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3281
3282 if (optimize_size)
3283 ix86_cost = &ix86_size_cost;
3284 else
3285 ix86_cost = processor_target_table[ix86_tune].cost;
3286
3287 /* Arrange to set up i386_stack_locals for all functions. */
3288 init_machine_status = ix86_init_machine_status;
3289
3290 /* Validate -mregparm= value. */
3291 if (ix86_regparm_string)
3292 {
3293 if (TARGET_64BIT)
3294 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3295 i = atoi (ix86_regparm_string);
3296 if (i < 0 || i > REGPARM_MAX)
3297 error ("%sregparm=%d%s is not between 0 and %d",
3298 prefix, i, suffix, REGPARM_MAX);
3299 else
3300 ix86_regparm = i;
3301 }
3302 if (TARGET_64BIT)
3303 ix86_regparm = REGPARM_MAX;
3304
3305 /* If the user has provided any of the -malign-* options,
3306 warn and use that value only if -falign-* is not set.
3307 Remove this code in GCC 3.2 or later. */
3308 if (ix86_align_loops_string)
3309 {
3310 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3311 prefix, suffix, suffix);
3312 if (align_loops == 0)
3313 {
3314 i = atoi (ix86_align_loops_string);
3315 if (i < 0 || i > MAX_CODE_ALIGN)
3316 error ("%salign-loops=%d%s is not between 0 and %d",
3317 prefix, i, suffix, MAX_CODE_ALIGN);
3318 else
3319 align_loops = 1 << i;
3320 }
3321 }
3322
3323 if (ix86_align_jumps_string)
3324 {
3325 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3326 prefix, suffix, suffix);
3327 if (align_jumps == 0)
3328 {
3329 i = atoi (ix86_align_jumps_string);
3330 if (i < 0 || i > MAX_CODE_ALIGN)
3331 error ("%salign-loops=%d%s is not between 0 and %d",
3332 prefix, i, suffix, MAX_CODE_ALIGN);
3333 else
3334 align_jumps = 1 << i;
3335 }
3336 }
3337
3338 if (ix86_align_funcs_string)
3339 {
3340 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3341 prefix, suffix, suffix);
3342 if (align_functions == 0)
3343 {
3344 i = atoi (ix86_align_funcs_string);
3345 if (i < 0 || i > MAX_CODE_ALIGN)
3346 error ("%salign-loops=%d%s is not between 0 and %d",
3347 prefix, i, suffix, MAX_CODE_ALIGN);
3348 else
3349 align_functions = 1 << i;
3350 }
3351 }
3352
3353 /* Default align_* from the processor table. */
3354 if (align_loops == 0)
3355 {
3356 align_loops = processor_target_table[ix86_tune].align_loop;
3357 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3358 }
3359 if (align_jumps == 0)
3360 {
3361 align_jumps = processor_target_table[ix86_tune].align_jump;
3362 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3363 }
3364 if (align_functions == 0)
3365 {
3366 align_functions = processor_target_table[ix86_tune].align_func;
3367 }
3368
3369 /* Validate -mbranch-cost= value, or provide default. */
3370 ix86_branch_cost = ix86_cost->branch_cost;
3371 if (ix86_branch_cost_string)
3372 {
3373 i = atoi (ix86_branch_cost_string);
3374 if (i < 0 || i > 5)
3375 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3376 else
3377 ix86_branch_cost = i;
3378 }
3379 if (ix86_section_threshold_string)
3380 {
3381 i = atoi (ix86_section_threshold_string);
3382 if (i < 0)
3383 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3384 else
3385 ix86_section_threshold = i;
3386 }
3387
3388 if (ix86_tls_dialect_string)
3389 {
3390 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3391 ix86_tls_dialect = TLS_DIALECT_GNU;
3392 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3393 ix86_tls_dialect = TLS_DIALECT_GNU2;
3394 else
3395 error ("bad value (%s) for %stls-dialect=%s %s",
3396 ix86_tls_dialect_string, prefix, suffix, sw);
3397 }
3398
3399 if (ix87_precision_string)
3400 {
3401 i = atoi (ix87_precision_string);
3402 if (i != 32 && i != 64 && i != 80)
3403 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3404 }
3405
3406 if (TARGET_64BIT)
3407 {
3408 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3409
3410 /* Enable by default the SSE and MMX builtins. Do allow the user to
3411 explicitly disable any of these. In particular, disabling SSE and
3412 MMX for kernel code is extremely useful. */
3413 if (!ix86_arch_specified)
3414 ix86_isa_flags
3415 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3416 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3417
3418 if (TARGET_RTD)
3419 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3420 }
3421 else
3422 {
3423 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3424
3425 if (!ix86_arch_specified)
3426 ix86_isa_flags
3427 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3428
3429 /* i386 ABI does not specify red zone. It still makes sense to use it
3430 when programmer takes care to stack from being destroyed. */
3431 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3432 target_flags |= MASK_NO_RED_ZONE;
3433 }
3434
3435 /* Keep nonleaf frame pointers. */
3436 if (flag_omit_frame_pointer)
3437 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3438 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3439 flag_omit_frame_pointer = 1;
3440
3441 /* If we're doing fast math, we don't care about comparison order
3442 wrt NaNs. This lets us use a shorter comparison sequence. */
3443 if (flag_finite_math_only)
3444 target_flags &= ~MASK_IEEE_FP;
3445
3446 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3447 since the insns won't need emulation. */
3448 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3449 target_flags &= ~MASK_NO_FANCY_MATH_387;
3450
3451 /* Likewise, if the target doesn't have a 387, or we've specified
3452 software floating point, don't use 387 inline intrinsics. */
3453 if (!TARGET_80387)
3454 target_flags |= MASK_NO_FANCY_MATH_387;
3455
3456 /* Turn on MMX builtins for -msse. */
3457 if (TARGET_SSE)
3458 {
3459 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3460 x86_prefetch_sse = true;
3461 }
3462
3463 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3464 if (TARGET_SSE4_2 || TARGET_ABM)
3465 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3466
3467 /* Validate -mpreferred-stack-boundary= value or default it to
3468 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3469 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3470 if (ix86_preferred_stack_boundary_string)
3471 {
3472 i = atoi (ix86_preferred_stack_boundary_string);
3473 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3474 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3475 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3476 else
3477 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3478 }
3479
3480 /* Set the default value for -mstackrealign. */
3481 if (ix86_force_align_arg_pointer == -1)
3482 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3483
3484 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3485
3486 /* Validate -mincoming-stack-boundary= value or default it to
3487 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3488 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3489 if (ix86_incoming_stack_boundary_string)
3490 {
3491 i = atoi (ix86_incoming_stack_boundary_string);
3492 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3493 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3494 i, TARGET_64BIT ? 4 : 2);
3495 else
3496 {
3497 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3498 ix86_incoming_stack_boundary
3499 = ix86_user_incoming_stack_boundary;
3500 }
3501 }
3502
3503 /* Accept -msseregparm only if at least SSE support is enabled. */
3504 if (TARGET_SSEREGPARM
3505 && ! TARGET_SSE)
3506 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3507
3508 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3509 if (ix86_fpmath_string != 0)
3510 {
3511 if (! strcmp (ix86_fpmath_string, "387"))
3512 ix86_fpmath = FPMATH_387;
3513 else if (! strcmp (ix86_fpmath_string, "sse"))
3514 {
3515 if (!TARGET_SSE)
3516 {
3517 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3518 ix86_fpmath = FPMATH_387;
3519 }
3520 else
3521 ix86_fpmath = FPMATH_SSE;
3522 }
3523 else if (! strcmp (ix86_fpmath_string, "387,sse")
3524 || ! strcmp (ix86_fpmath_string, "387+sse")
3525 || ! strcmp (ix86_fpmath_string, "sse,387")
3526 || ! strcmp (ix86_fpmath_string, "sse+387")
3527 || ! strcmp (ix86_fpmath_string, "both"))
3528 {
3529 if (!TARGET_SSE)
3530 {
3531 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3532 ix86_fpmath = FPMATH_387;
3533 }
3534 else if (!TARGET_80387)
3535 {
3536 warning (0, "387 instruction set disabled, using SSE arithmetics");
3537 ix86_fpmath = FPMATH_SSE;
3538 }
3539 else
3540 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3541 }
3542 else
3543 error ("bad value (%s) for %sfpmath=%s %s",
3544 ix86_fpmath_string, prefix, suffix, sw);
3545 }
3546
3547 /* If the i387 is disabled, then do not return values in it. */
3548 if (!TARGET_80387)
3549 target_flags &= ~MASK_FLOAT_RETURNS;
3550
3551 /* Use external vectorized library in vectorizing intrinsics. */
3552 if (ix86_veclibabi_string)
3553 {
3554 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3555 ix86_veclib_handler = ix86_veclibabi_svml;
3556 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3557 ix86_veclib_handler = ix86_veclibabi_acml;
3558 else
3559 error ("unknown vectorization library ABI type (%s) for "
3560 "%sveclibabi=%s %s", ix86_veclibabi_string,
3561 prefix, suffix, sw);
3562 }
3563
3564 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3565 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3566 && !optimize_size)
3567 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3568
3569 /* ??? Unwind info is not correct around the CFG unless either a frame
3570 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3571 unwind info generation to be aware of the CFG and propagating states
3572 around edges. */
3573 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3574 || flag_exceptions || flag_non_call_exceptions)
3575 && flag_omit_frame_pointer
3576 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3577 {
3578 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3579 warning (0, "unwind tables currently require either a frame pointer "
3580 "or %saccumulate-outgoing-args%s for correctness",
3581 prefix, suffix);
3582 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3583 }
3584
3585 /* If stack probes are required, the space used for large function
3586 arguments on the stack must also be probed, so enable
3587 -maccumulate-outgoing-args so this happens in the prologue. */
3588 if (TARGET_STACK_PROBE
3589 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3590 {
3591 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3592 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3593 "for correctness", prefix, suffix);
3594 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3595 }
3596
3597 /* For sane SSE instruction set generation we need fcomi instruction.
3598 It is safe to enable all CMOVE instructions. */
3599 if (TARGET_SSE)
3600 TARGET_CMOVE = 1;
3601
3602 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3603 {
3604 char *p;
3605 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3606 p = strchr (internal_label_prefix, 'X');
3607 internal_label_prefix_len = p - internal_label_prefix;
3608 *p = '\0';
3609 }
3610
3611 /* When scheduling description is not available, disable scheduler pass
3612 so it won't slow down the compilation and make x87 code slower. */
3613 if (!TARGET_SCHEDULE)
3614 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3615
3616 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3617 set_param_value ("simultaneous-prefetches",
3618 ix86_cost->simultaneous_prefetches);
3619 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3620 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3621 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3622 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3623 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3624 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3625
3626 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3627 if (flag_prefetch_loop_arrays < 0
3628 && HAVE_prefetch
3629 && optimize >= 3
3630 && software_prefetching_beneficial_p ())
3631 flag_prefetch_loop_arrays = 1;
3632
3633 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3634 can be optimized to ap = __builtin_next_arg (0). */
3635 if (!TARGET_64BIT)
3636 targetm.expand_builtin_va_start = NULL;
3637
3638 if (TARGET_64BIT)
3639 {
3640 ix86_gen_leave = gen_leave_rex64;
3641 ix86_gen_pop1 = gen_popdi1;
3642 ix86_gen_add3 = gen_adddi3;
3643 ix86_gen_sub3 = gen_subdi3;
3644 ix86_gen_sub3_carry = gen_subdi3_carry;
3645 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3646 ix86_gen_monitor = gen_sse3_monitor64;
3647 ix86_gen_andsp = gen_anddi3;
3648 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_64;
3649 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3650 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3651 }
3652 else
3653 {
3654 ix86_gen_leave = gen_leave;
3655 ix86_gen_pop1 = gen_popsi1;
3656 ix86_gen_add3 = gen_addsi3;
3657 ix86_gen_sub3 = gen_subsi3;
3658 ix86_gen_sub3_carry = gen_subsi3_carry;
3659 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3660 ix86_gen_monitor = gen_sse3_monitor;
3661 ix86_gen_andsp = gen_andsi3;
3662 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_32;
3663 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3664 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3665 }
3666
3667 #ifdef USE_IX86_CLD
3668 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3669 if (!TARGET_64BIT)
3670 target_flags |= MASK_CLD & ~target_flags_explicit;
3671 #endif
3672
3673 /* Save the initial options in case the user does function specific options */
3674 if (main_args_p)
3675 target_option_default_node = target_option_current_node
3676 = build_target_option_node ();
3677 }
3678
3679 /* Update register usage after having seen the compiler flags. */
3680
3681 void
3682 ix86_conditional_register_usage (void)
3683 {
3684 int i;
3685 unsigned int j;
3686
3687 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3688 {
3689 if (fixed_regs[i] > 1)
3690 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3691 if (call_used_regs[i] > 1)
3692 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3693 }
3694
3695 /* The PIC register, if it exists, is fixed. */
3696 j = PIC_OFFSET_TABLE_REGNUM;
3697 if (j != INVALID_REGNUM)
3698 fixed_regs[j] = call_used_regs[j] = 1;
3699
3700 /* The MS_ABI changes the set of call-used registers. */
3701 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3702 {
3703 call_used_regs[SI_REG] = 0;
3704 call_used_regs[DI_REG] = 0;
3705 call_used_regs[XMM6_REG] = 0;
3706 call_used_regs[XMM7_REG] = 0;
3707 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3708 call_used_regs[i] = 0;
3709 }
3710
3711 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3712 other call-clobbered regs for 64-bit. */
3713 if (TARGET_64BIT)
3714 {
3715 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3716
3717 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3718 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3719 && call_used_regs[i])
3720 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3721 }
3722
3723 /* If MMX is disabled, squash the registers. */
3724 if (! TARGET_MMX)
3725 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3726 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3727 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3728
3729 /* If SSE is disabled, squash the registers. */
3730 if (! TARGET_SSE)
3731 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3732 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3733 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3734
3735 /* If the FPU is disabled, squash the registers. */
3736 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3737 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3738 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3739 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3740
3741 /* If 32-bit, squash the 64-bit registers. */
3742 if (! TARGET_64BIT)
3743 {
3744 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3745 reg_names[i] = "";
3746 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3747 reg_names[i] = "";
3748 }
3749 }
3750
3751 \f
3752 /* Save the current options */
3753
3754 static void
3755 ix86_function_specific_save (struct cl_target_option *ptr)
3756 {
3757 ptr->arch = ix86_arch;
3758 ptr->schedule = ix86_schedule;
3759 ptr->tune = ix86_tune;
3760 ptr->fpmath = ix86_fpmath;
3761 ptr->branch_cost = ix86_branch_cost;
3762 ptr->tune_defaulted = ix86_tune_defaulted;
3763 ptr->arch_specified = ix86_arch_specified;
3764 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3765 ptr->target_flags_explicit = target_flags_explicit;
3766
3767 /* The fields are char but the variables are not; make sure the
3768 values fit in the fields. */
3769 gcc_assert (ptr->arch == ix86_arch);
3770 gcc_assert (ptr->schedule == ix86_schedule);
3771 gcc_assert (ptr->tune == ix86_tune);
3772 gcc_assert (ptr->fpmath == ix86_fpmath);
3773 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3774 }
3775
3776 /* Restore the current options */
3777
3778 static void
3779 ix86_function_specific_restore (struct cl_target_option *ptr)
3780 {
3781 enum processor_type old_tune = ix86_tune;
3782 enum processor_type old_arch = ix86_arch;
3783 unsigned int ix86_arch_mask, ix86_tune_mask;
3784 int i;
3785
3786 ix86_arch = (enum processor_type) ptr->arch;
3787 ix86_schedule = (enum attr_cpu) ptr->schedule;
3788 ix86_tune = (enum processor_type) ptr->tune;
3789 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3790 ix86_branch_cost = ptr->branch_cost;
3791 ix86_tune_defaulted = ptr->tune_defaulted;
3792 ix86_arch_specified = ptr->arch_specified;
3793 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3794 target_flags_explicit = ptr->target_flags_explicit;
3795
3796 /* Recreate the arch feature tests if the arch changed */
3797 if (old_arch != ix86_arch)
3798 {
3799 ix86_arch_mask = 1u << ix86_arch;
3800 for (i = 0; i < X86_ARCH_LAST; ++i)
3801 ix86_arch_features[i]
3802 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3803 }
3804
3805 /* Recreate the tune optimization tests */
3806 if (old_tune != ix86_tune)
3807 {
3808 ix86_tune_mask = 1u << ix86_tune;
3809 for (i = 0; i < X86_TUNE_LAST; ++i)
3810 ix86_tune_features[i]
3811 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3812 }
3813 }
3814
3815 /* Print the current options */
3816
3817 static void
3818 ix86_function_specific_print (FILE *file, int indent,
3819 struct cl_target_option *ptr)
3820 {
3821 char *target_string
3822 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3823 NULL, NULL, NULL, false);
3824
3825 fprintf (file, "%*sarch = %d (%s)\n",
3826 indent, "",
3827 ptr->arch,
3828 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3829 ? cpu_names[ptr->arch]
3830 : "<unknown>"));
3831
3832 fprintf (file, "%*stune = %d (%s)\n",
3833 indent, "",
3834 ptr->tune,
3835 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3836 ? cpu_names[ptr->tune]
3837 : "<unknown>"));
3838
3839 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3840 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3841 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3842 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3843
3844 if (target_string)
3845 {
3846 fprintf (file, "%*s%s\n", indent, "", target_string);
3847 free (target_string);
3848 }
3849 }
3850
3851 \f
3852 /* Inner function to process the attribute((target(...))), take an argument and
3853 set the current options from the argument. If we have a list, recursively go
3854 over the list. */
3855
3856 static bool
3857 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3858 {
3859 char *next_optstr;
3860 bool ret = true;
3861
3862 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3863 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3864 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3865 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3866
3867 enum ix86_opt_type
3868 {
3869 ix86_opt_unknown,
3870 ix86_opt_yes,
3871 ix86_opt_no,
3872 ix86_opt_str,
3873 ix86_opt_isa
3874 };
3875
3876 static const struct
3877 {
3878 const char *string;
3879 size_t len;
3880 enum ix86_opt_type type;
3881 int opt;
3882 int mask;
3883 } attrs[] = {
3884 /* isa options */
3885 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3886 IX86_ATTR_ISA ("abm", OPT_mabm),
3887 IX86_ATTR_ISA ("aes", OPT_maes),
3888 IX86_ATTR_ISA ("avx", OPT_mavx),
3889 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3890 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3891 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3892 IX86_ATTR_ISA ("sse", OPT_msse),
3893 IX86_ATTR_ISA ("sse2", OPT_msse2),
3894 IX86_ATTR_ISA ("sse3", OPT_msse3),
3895 IX86_ATTR_ISA ("sse4", OPT_msse4),
3896 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3897 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3898 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3899 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3900 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3901 IX86_ATTR_ISA ("xop", OPT_mxop),
3902 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3903 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3904 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3905 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3906
3907 /* string options */
3908 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3909 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3910 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3911
3912 /* flag options */
3913 IX86_ATTR_YES ("cld",
3914 OPT_mcld,
3915 MASK_CLD),
3916
3917 IX86_ATTR_NO ("fancy-math-387",
3918 OPT_mfancy_math_387,
3919 MASK_NO_FANCY_MATH_387),
3920
3921 IX86_ATTR_YES ("ieee-fp",
3922 OPT_mieee_fp,
3923 MASK_IEEE_FP),
3924
3925 IX86_ATTR_YES ("inline-all-stringops",
3926 OPT_minline_all_stringops,
3927 MASK_INLINE_ALL_STRINGOPS),
3928
3929 IX86_ATTR_YES ("inline-stringops-dynamically",
3930 OPT_minline_stringops_dynamically,
3931 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3932
3933 IX86_ATTR_NO ("align-stringops",
3934 OPT_mno_align_stringops,
3935 MASK_NO_ALIGN_STRINGOPS),
3936
3937 IX86_ATTR_YES ("recip",
3938 OPT_mrecip,
3939 MASK_RECIP),
3940
3941 };
3942
3943 /* If this is a list, recurse to get the options. */
3944 if (TREE_CODE (args) == TREE_LIST)
3945 {
3946 bool ret = true;
3947
3948 for (; args; args = TREE_CHAIN (args))
3949 if (TREE_VALUE (args)
3950 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3951 ret = false;
3952
3953 return ret;
3954 }
3955
3956 else if (TREE_CODE (args) != STRING_CST)
3957 gcc_unreachable ();
3958
3959 /* Handle multiple arguments separated by commas. */
3960 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3961
3962 while (next_optstr && *next_optstr != '\0')
3963 {
3964 char *p = next_optstr;
3965 char *orig_p = p;
3966 char *comma = strchr (next_optstr, ',');
3967 const char *opt_string;
3968 size_t len, opt_len;
3969 int opt;
3970 bool opt_set_p;
3971 char ch;
3972 unsigned i;
3973 enum ix86_opt_type type = ix86_opt_unknown;
3974 int mask = 0;
3975
3976 if (comma)
3977 {
3978 *comma = '\0';
3979 len = comma - next_optstr;
3980 next_optstr = comma + 1;
3981 }
3982 else
3983 {
3984 len = strlen (p);
3985 next_optstr = NULL;
3986 }
3987
3988 /* Recognize no-xxx. */
3989 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3990 {
3991 opt_set_p = false;
3992 p += 3;
3993 len -= 3;
3994 }
3995 else
3996 opt_set_p = true;
3997
3998 /* Find the option. */
3999 ch = *p;
4000 opt = N_OPTS;
4001 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4002 {
4003 type = attrs[i].type;
4004 opt_len = attrs[i].len;
4005 if (ch == attrs[i].string[0]
4006 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4007 && memcmp (p, attrs[i].string, opt_len) == 0)
4008 {
4009 opt = attrs[i].opt;
4010 mask = attrs[i].mask;
4011 opt_string = attrs[i].string;
4012 break;
4013 }
4014 }
4015
4016 /* Process the option. */
4017 if (opt == N_OPTS)
4018 {
4019 error ("attribute(target(\"%s\")) is unknown", orig_p);
4020 ret = false;
4021 }
4022
4023 else if (type == ix86_opt_isa)
4024 ix86_handle_option (opt, p, opt_set_p);
4025
4026 else if (type == ix86_opt_yes || type == ix86_opt_no)
4027 {
4028 if (type == ix86_opt_no)
4029 opt_set_p = !opt_set_p;
4030
4031 if (opt_set_p)
4032 target_flags |= mask;
4033 else
4034 target_flags &= ~mask;
4035 }
4036
4037 else if (type == ix86_opt_str)
4038 {
4039 if (p_strings[opt])
4040 {
4041 error ("option(\"%s\") was already specified", opt_string);
4042 ret = false;
4043 }
4044 else
4045 p_strings[opt] = xstrdup (p + opt_len);
4046 }
4047
4048 else
4049 gcc_unreachable ();
4050 }
4051
4052 return ret;
4053 }
4054
4055 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4056
4057 tree
4058 ix86_valid_target_attribute_tree (tree args)
4059 {
4060 const char *orig_arch_string = ix86_arch_string;
4061 const char *orig_tune_string = ix86_tune_string;
4062 const char *orig_fpmath_string = ix86_fpmath_string;
4063 int orig_tune_defaulted = ix86_tune_defaulted;
4064 int orig_arch_specified = ix86_arch_specified;
4065 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4066 tree t = NULL_TREE;
4067 int i;
4068 struct cl_target_option *def
4069 = TREE_TARGET_OPTION (target_option_default_node);
4070
4071 /* Process each of the options on the chain. */
4072 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4073 return NULL_TREE;
4074
4075 /* If the changed options are different from the default, rerun override_options,
4076 and then save the options away. The string options are are attribute options,
4077 and will be undone when we copy the save structure. */
4078 if (ix86_isa_flags != def->ix86_isa_flags
4079 || target_flags != def->target_flags
4080 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4081 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4082 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4083 {
4084 /* If we are using the default tune= or arch=, undo the string assigned,
4085 and use the default. */
4086 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4087 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4088 else if (!orig_arch_specified)
4089 ix86_arch_string = NULL;
4090
4091 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4092 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4093 else if (orig_tune_defaulted)
4094 ix86_tune_string = NULL;
4095
4096 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4097 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4098 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4099 else if (!TARGET_64BIT && TARGET_SSE)
4100 ix86_fpmath_string = "sse,387";
4101
4102 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4103 override_options (false);
4104
4105 /* Add any builtin functions with the new isa if any. */
4106 ix86_add_new_builtins (ix86_isa_flags);
4107
4108 /* Save the current options unless we are validating options for
4109 #pragma. */
4110 t = build_target_option_node ();
4111
4112 ix86_arch_string = orig_arch_string;
4113 ix86_tune_string = orig_tune_string;
4114 ix86_fpmath_string = orig_fpmath_string;
4115
4116 /* Free up memory allocated to hold the strings */
4117 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4118 if (option_strings[i])
4119 free (option_strings[i]);
4120 }
4121
4122 return t;
4123 }
4124
4125 /* Hook to validate attribute((target("string"))). */
4126
4127 static bool
4128 ix86_valid_target_attribute_p (tree fndecl,
4129 tree ARG_UNUSED (name),
4130 tree args,
4131 int ARG_UNUSED (flags))
4132 {
4133 struct cl_target_option cur_target;
4134 bool ret = true;
4135 tree old_optimize = build_optimization_node ();
4136 tree new_target, new_optimize;
4137 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4138
4139 /* If the function changed the optimization levels as well as setting target
4140 options, start with the optimizations specified. */
4141 if (func_optimize && func_optimize != old_optimize)
4142 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
4143
4144 /* The target attributes may also change some optimization flags, so update
4145 the optimization options if necessary. */
4146 cl_target_option_save (&cur_target);
4147 new_target = ix86_valid_target_attribute_tree (args);
4148 new_optimize = build_optimization_node ();
4149
4150 if (!new_target)
4151 ret = false;
4152
4153 else if (fndecl)
4154 {
4155 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4156
4157 if (old_optimize != new_optimize)
4158 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4159 }
4160
4161 cl_target_option_restore (&cur_target);
4162
4163 if (old_optimize != new_optimize)
4164 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
4165
4166 return ret;
4167 }
4168
4169 \f
4170 /* Hook to determine if one function can safely inline another. */
4171
4172 static bool
4173 ix86_can_inline_p (tree caller, tree callee)
4174 {
4175 bool ret = false;
4176 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4177 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4178
4179 /* If callee has no option attributes, then it is ok to inline. */
4180 if (!callee_tree)
4181 ret = true;
4182
4183 /* If caller has no option attributes, but callee does then it is not ok to
4184 inline. */
4185 else if (!caller_tree)
4186 ret = false;
4187
4188 else
4189 {
4190 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4191 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4192
4193 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4194 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4195 function. */
4196 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
4197 != callee_opts->ix86_isa_flags)
4198 ret = false;
4199
4200 /* See if we have the same non-isa options. */
4201 else if (caller_opts->target_flags != callee_opts->target_flags)
4202 ret = false;
4203
4204 /* See if arch, tune, etc. are the same. */
4205 else if (caller_opts->arch != callee_opts->arch)
4206 ret = false;
4207
4208 else if (caller_opts->tune != callee_opts->tune)
4209 ret = false;
4210
4211 else if (caller_opts->fpmath != callee_opts->fpmath)
4212 ret = false;
4213
4214 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4215 ret = false;
4216
4217 else
4218 ret = true;
4219 }
4220
4221 return ret;
4222 }
4223
4224 \f
4225 /* Remember the last target of ix86_set_current_function. */
4226 static GTY(()) tree ix86_previous_fndecl;
4227
4228 /* Establish appropriate back-end context for processing the function
4229 FNDECL. The argument might be NULL to indicate processing at top
4230 level, outside of any function scope. */
4231 static void
4232 ix86_set_current_function (tree fndecl)
4233 {
4234 /* Only change the context if the function changes. This hook is called
4235 several times in the course of compiling a function, and we don't want to
4236 slow things down too much or call target_reinit when it isn't safe. */
4237 if (fndecl && fndecl != ix86_previous_fndecl)
4238 {
4239 tree old_tree = (ix86_previous_fndecl
4240 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4241 : NULL_TREE);
4242
4243 tree new_tree = (fndecl
4244 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4245 : NULL_TREE);
4246
4247 ix86_previous_fndecl = fndecl;
4248 if (old_tree == new_tree)
4249 ;
4250
4251 else if (new_tree)
4252 {
4253 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4254 target_reinit ();
4255 }
4256
4257 else if (old_tree)
4258 {
4259 struct cl_target_option *def
4260 = TREE_TARGET_OPTION (target_option_current_node);
4261
4262 cl_target_option_restore (def);
4263 target_reinit ();
4264 }
4265 }
4266 }
4267
4268 \f
4269 /* Return true if this goes in large data/bss. */
4270
4271 static bool
4272 ix86_in_large_data_p (tree exp)
4273 {
4274 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4275 return false;
4276
4277 /* Functions are never large data. */
4278 if (TREE_CODE (exp) == FUNCTION_DECL)
4279 return false;
4280
4281 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4282 {
4283 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4284 if (strcmp (section, ".ldata") == 0
4285 || strcmp (section, ".lbss") == 0)
4286 return true;
4287 return false;
4288 }
4289 else
4290 {
4291 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4292
4293 /* If this is an incomplete type with size 0, then we can't put it
4294 in data because it might be too big when completed. */
4295 if (!size || size > ix86_section_threshold)
4296 return true;
4297 }
4298
4299 return false;
4300 }
4301
4302 /* Switch to the appropriate section for output of DECL.
4303 DECL is either a `VAR_DECL' node or a constant of some sort.
4304 RELOC indicates whether forming the initial value of DECL requires
4305 link-time relocations. */
4306
4307 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4308 ATTRIBUTE_UNUSED;
4309
4310 static section *
4311 x86_64_elf_select_section (tree decl, int reloc,
4312 unsigned HOST_WIDE_INT align)
4313 {
4314 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4315 && ix86_in_large_data_p (decl))
4316 {
4317 const char *sname = NULL;
4318 unsigned int flags = SECTION_WRITE;
4319 switch (categorize_decl_for_section (decl, reloc))
4320 {
4321 case SECCAT_DATA:
4322 sname = ".ldata";
4323 break;
4324 case SECCAT_DATA_REL:
4325 sname = ".ldata.rel";
4326 break;
4327 case SECCAT_DATA_REL_LOCAL:
4328 sname = ".ldata.rel.local";
4329 break;
4330 case SECCAT_DATA_REL_RO:
4331 sname = ".ldata.rel.ro";
4332 break;
4333 case SECCAT_DATA_REL_RO_LOCAL:
4334 sname = ".ldata.rel.ro.local";
4335 break;
4336 case SECCAT_BSS:
4337 sname = ".lbss";
4338 flags |= SECTION_BSS;
4339 break;
4340 case SECCAT_RODATA:
4341 case SECCAT_RODATA_MERGE_STR:
4342 case SECCAT_RODATA_MERGE_STR_INIT:
4343 case SECCAT_RODATA_MERGE_CONST:
4344 sname = ".lrodata";
4345 flags = 0;
4346 break;
4347 case SECCAT_SRODATA:
4348 case SECCAT_SDATA:
4349 case SECCAT_SBSS:
4350 gcc_unreachable ();
4351 case SECCAT_TEXT:
4352 case SECCAT_TDATA:
4353 case SECCAT_TBSS:
4354 /* We don't split these for medium model. Place them into
4355 default sections and hope for best. */
4356 break;
4357 case SECCAT_EMUTLS_VAR:
4358 case SECCAT_EMUTLS_TMPL:
4359 gcc_unreachable ();
4360 }
4361 if (sname)
4362 {
4363 /* We might get called with string constants, but get_named_section
4364 doesn't like them as they are not DECLs. Also, we need to set
4365 flags in that case. */
4366 if (!DECL_P (decl))
4367 return get_section (sname, flags, NULL);
4368 return get_named_section (decl, sname, reloc);
4369 }
4370 }
4371 return default_elf_select_section (decl, reloc, align);
4372 }
4373
4374 /* Build up a unique section name, expressed as a
4375 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4376 RELOC indicates whether the initial value of EXP requires
4377 link-time relocations. */
4378
4379 static void ATTRIBUTE_UNUSED
4380 x86_64_elf_unique_section (tree decl, int reloc)
4381 {
4382 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4383 && ix86_in_large_data_p (decl))
4384 {
4385 const char *prefix = NULL;
4386 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4387 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4388
4389 switch (categorize_decl_for_section (decl, reloc))
4390 {
4391 case SECCAT_DATA:
4392 case SECCAT_DATA_REL:
4393 case SECCAT_DATA_REL_LOCAL:
4394 case SECCAT_DATA_REL_RO:
4395 case SECCAT_DATA_REL_RO_LOCAL:
4396 prefix = one_only ? ".ld" : ".ldata";
4397 break;
4398 case SECCAT_BSS:
4399 prefix = one_only ? ".lb" : ".lbss";
4400 break;
4401 case SECCAT_RODATA:
4402 case SECCAT_RODATA_MERGE_STR:
4403 case SECCAT_RODATA_MERGE_STR_INIT:
4404 case SECCAT_RODATA_MERGE_CONST:
4405 prefix = one_only ? ".lr" : ".lrodata";
4406 break;
4407 case SECCAT_SRODATA:
4408 case SECCAT_SDATA:
4409 case SECCAT_SBSS:
4410 gcc_unreachable ();
4411 case SECCAT_TEXT:
4412 case SECCAT_TDATA:
4413 case SECCAT_TBSS:
4414 /* We don't split these for medium model. Place them into
4415 default sections and hope for best. */
4416 break;
4417 case SECCAT_EMUTLS_VAR:
4418 prefix = targetm.emutls.var_section;
4419 break;
4420 case SECCAT_EMUTLS_TMPL:
4421 prefix = targetm.emutls.tmpl_section;
4422 break;
4423 }
4424 if (prefix)
4425 {
4426 const char *name, *linkonce;
4427 char *string;
4428
4429 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4430 name = targetm.strip_name_encoding (name);
4431
4432 /* If we're using one_only, then there needs to be a .gnu.linkonce
4433 prefix to the section name. */
4434 linkonce = one_only ? ".gnu.linkonce" : "";
4435
4436 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4437
4438 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4439 return;
4440 }
4441 }
4442 default_unique_section (decl, reloc);
4443 }
4444
4445 #ifdef COMMON_ASM_OP
4446 /* This says how to output assembler code to declare an
4447 uninitialized external linkage data object.
4448
4449 For medium model x86-64 we need to use .largecomm opcode for
4450 large objects. */
4451 void
4452 x86_elf_aligned_common (FILE *file,
4453 const char *name, unsigned HOST_WIDE_INT size,
4454 int align)
4455 {
4456 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4457 && size > (unsigned int)ix86_section_threshold)
4458 fputs (".largecomm\t", file);
4459 else
4460 fputs (COMMON_ASM_OP, file);
4461 assemble_name (file, name);
4462 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4463 size, align / BITS_PER_UNIT);
4464 }
4465 #endif
4466
4467 /* Utility function for targets to use in implementing
4468 ASM_OUTPUT_ALIGNED_BSS. */
4469
4470 void
4471 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4472 const char *name, unsigned HOST_WIDE_INT size,
4473 int align)
4474 {
4475 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4476 && size > (unsigned int)ix86_section_threshold)
4477 switch_to_section (get_named_section (decl, ".lbss", 0));
4478 else
4479 switch_to_section (bss_section);
4480 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4481 #ifdef ASM_DECLARE_OBJECT_NAME
4482 last_assemble_variable_decl = decl;
4483 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4484 #else
4485 /* Standard thing is just output label for the object. */
4486 ASM_OUTPUT_LABEL (file, name);
4487 #endif /* ASM_DECLARE_OBJECT_NAME */
4488 ASM_OUTPUT_SKIP (file, size ? size : 1);
4489 }
4490 \f
4491 void
4492 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4493 {
4494 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4495 make the problem with not enough registers even worse. */
4496 #ifdef INSN_SCHEDULING
4497 if (level > 1)
4498 flag_schedule_insns = 0;
4499 #endif
4500
4501 if (TARGET_MACHO)
4502 /* The Darwin libraries never set errno, so we might as well
4503 avoid calling them when that's the only reason we would. */
4504 flag_errno_math = 0;
4505
4506 /* The default values of these switches depend on the TARGET_64BIT
4507 that is not known at this moment. Mark these values with 2 and
4508 let user the to override these. In case there is no command line option
4509 specifying them, we will set the defaults in override_options. */
4510 if (optimize >= 1)
4511 flag_omit_frame_pointer = 2;
4512
4513 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4514 if (level > 1)
4515 flag_zee = 2;
4516
4517 flag_pcc_struct_return = 2;
4518 flag_asynchronous_unwind_tables = 2;
4519 flag_vect_cost_model = 1;
4520 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4521 SUBTARGET_OPTIMIZATION_OPTIONS;
4522 #endif
4523 }
4524
4525 /* Decide whether we must probe the stack before any space allocation
4526 on this target. It's essentially TARGET_STACK_PROBE except when
4527 -fstack-check causes the stack to be already probed differently. */
4528
4529 bool
4530 ix86_target_stack_probe (void)
4531 {
4532 /* Do not probe the stack twice if static stack checking is enabled. */
4533 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4534 return false;
4535
4536 return TARGET_STACK_PROBE;
4537 }
4538 \f
4539 /* Decide whether we can make a sibling call to a function. DECL is the
4540 declaration of the function being targeted by the call and EXP is the
4541 CALL_EXPR representing the call. */
4542
4543 static bool
4544 ix86_function_ok_for_sibcall (tree decl, tree exp)
4545 {
4546 tree type, decl_or_type;
4547 rtx a, b;
4548
4549 /* If we are generating position-independent code, we cannot sibcall
4550 optimize any indirect call, or a direct call to a global function,
4551 as the PLT requires %ebx be live. */
4552 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4553 return false;
4554
4555 /* If we need to align the outgoing stack, then sibcalling would
4556 unalign the stack, which may break the called function. */
4557 if (ix86_minimum_incoming_stack_boundary (true)
4558 < PREFERRED_STACK_BOUNDARY)
4559 return false;
4560
4561 if (decl)
4562 {
4563 decl_or_type = decl;
4564 type = TREE_TYPE (decl);
4565 }
4566 else
4567 {
4568 /* We're looking at the CALL_EXPR, we need the type of the function. */
4569 type = CALL_EXPR_FN (exp); /* pointer expression */
4570 type = TREE_TYPE (type); /* pointer type */
4571 type = TREE_TYPE (type); /* function type */
4572 decl_or_type = type;
4573 }
4574
4575 /* Check that the return value locations are the same. Like
4576 if we are returning floats on the 80387 register stack, we cannot
4577 make a sibcall from a function that doesn't return a float to a
4578 function that does or, conversely, from a function that does return
4579 a float to a function that doesn't; the necessary stack adjustment
4580 would not be executed. This is also the place we notice
4581 differences in the return value ABI. Note that it is ok for one
4582 of the functions to have void return type as long as the return
4583 value of the other is passed in a register. */
4584 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4585 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4586 cfun->decl, false);
4587 if (STACK_REG_P (a) || STACK_REG_P (b))
4588 {
4589 if (!rtx_equal_p (a, b))
4590 return false;
4591 }
4592 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4593 ;
4594 else if (!rtx_equal_p (a, b))
4595 return false;
4596
4597 if (TARGET_64BIT)
4598 {
4599 /* The SYSV ABI has more call-clobbered registers;
4600 disallow sibcalls from MS to SYSV. */
4601 if (cfun->machine->call_abi == MS_ABI
4602 && ix86_function_type_abi (type) == SYSV_ABI)
4603 return false;
4604 }
4605 else
4606 {
4607 /* If this call is indirect, we'll need to be able to use a
4608 call-clobbered register for the address of the target function.
4609 Make sure that all such registers are not used for passing
4610 parameters. Note that DLLIMPORT functions are indirect. */
4611 if (!decl
4612 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4613 {
4614 if (ix86_function_regparm (type, NULL) >= 3)
4615 {
4616 /* ??? Need to count the actual number of registers to be used,
4617 not the possible number of registers. Fix later. */
4618 return false;
4619 }
4620 }
4621 }
4622
4623 /* Otherwise okay. That also includes certain types of indirect calls. */
4624 return true;
4625 }
4626
4627 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4628 and "sseregparm" calling convention attributes;
4629 arguments as in struct attribute_spec.handler. */
4630
4631 static tree
4632 ix86_handle_cconv_attribute (tree *node, tree name,
4633 tree args,
4634 int flags ATTRIBUTE_UNUSED,
4635 bool *no_add_attrs)
4636 {
4637 if (TREE_CODE (*node) != FUNCTION_TYPE
4638 && TREE_CODE (*node) != METHOD_TYPE
4639 && TREE_CODE (*node) != FIELD_DECL
4640 && TREE_CODE (*node) != TYPE_DECL)
4641 {
4642 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4643 name);
4644 *no_add_attrs = true;
4645 return NULL_TREE;
4646 }
4647
4648 /* Can combine regparm with all attributes but fastcall. */
4649 if (is_attribute_p ("regparm", name))
4650 {
4651 tree cst;
4652
4653 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4654 {
4655 error ("fastcall and regparm attributes are not compatible");
4656 }
4657
4658 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4659 {
4660 error ("regparam and thiscall attributes are not compatible");
4661 }
4662
4663 cst = TREE_VALUE (args);
4664 if (TREE_CODE (cst) != INTEGER_CST)
4665 {
4666 warning (OPT_Wattributes,
4667 "%qE attribute requires an integer constant argument",
4668 name);
4669 *no_add_attrs = true;
4670 }
4671 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4672 {
4673 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4674 name, REGPARM_MAX);
4675 *no_add_attrs = true;
4676 }
4677
4678 return NULL_TREE;
4679 }
4680
4681 if (TARGET_64BIT)
4682 {
4683 /* Do not warn when emulating the MS ABI. */
4684 if ((TREE_CODE (*node) != FUNCTION_TYPE
4685 && TREE_CODE (*node) != METHOD_TYPE)
4686 || ix86_function_type_abi (*node) != MS_ABI)
4687 warning (OPT_Wattributes, "%qE attribute ignored",
4688 name);
4689 *no_add_attrs = true;
4690 return NULL_TREE;
4691 }
4692
4693 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4694 if (is_attribute_p ("fastcall", name))
4695 {
4696 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4697 {
4698 error ("fastcall and cdecl attributes are not compatible");
4699 }
4700 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4701 {
4702 error ("fastcall and stdcall attributes are not compatible");
4703 }
4704 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4705 {
4706 error ("fastcall and regparm attributes are not compatible");
4707 }
4708 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4709 {
4710 error ("fastcall and thiscall attributes are not compatible");
4711 }
4712 }
4713
4714 /* Can combine stdcall with fastcall (redundant), regparm and
4715 sseregparm. */
4716 else if (is_attribute_p ("stdcall", name))
4717 {
4718 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4719 {
4720 error ("stdcall and cdecl attributes are not compatible");
4721 }
4722 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4723 {
4724 error ("stdcall and fastcall attributes are not compatible");
4725 }
4726 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4727 {
4728 error ("stdcall and thiscall attributes are not compatible");
4729 }
4730 }
4731
4732 /* Can combine cdecl with regparm and sseregparm. */
4733 else if (is_attribute_p ("cdecl", name))
4734 {
4735 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4736 {
4737 error ("stdcall and cdecl attributes are not compatible");
4738 }
4739 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4740 {
4741 error ("fastcall and cdecl attributes are not compatible");
4742 }
4743 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4744 {
4745 error ("cdecl and thiscall attributes are not compatible");
4746 }
4747 }
4748 else if (is_attribute_p ("thiscall", name))
4749 {
4750 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4751 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4752 name);
4753 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4754 {
4755 error ("stdcall and thiscall attributes are not compatible");
4756 }
4757 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4758 {
4759 error ("fastcall and thiscall attributes are not compatible");
4760 }
4761 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4762 {
4763 error ("cdecl and thiscall attributes are not compatible");
4764 }
4765 }
4766
4767 /* Can combine sseregparm with all attributes. */
4768
4769 return NULL_TREE;
4770 }
4771
4772 /* Return 0 if the attributes for two types are incompatible, 1 if they
4773 are compatible, and 2 if they are nearly compatible (which causes a
4774 warning to be generated). */
4775
4776 static int
4777 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4778 {
4779 /* Check for mismatch of non-default calling convention. */
4780 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4781
4782 if (TREE_CODE (type1) != FUNCTION_TYPE
4783 && TREE_CODE (type1) != METHOD_TYPE)
4784 return 1;
4785
4786 /* Check for mismatched fastcall/regparm types. */
4787 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4788 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4789 || (ix86_function_regparm (type1, NULL)
4790 != ix86_function_regparm (type2, NULL)))
4791 return 0;
4792
4793 /* Check for mismatched sseregparm types. */
4794 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4795 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4796 return 0;
4797
4798 /* Check for mismatched thiscall types. */
4799 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4800 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4801 return 0;
4802
4803 /* Check for mismatched return types (cdecl vs stdcall). */
4804 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4805 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4806 return 0;
4807
4808 return 1;
4809 }
4810 \f
4811 /* Return the regparm value for a function with the indicated TYPE and DECL.
4812 DECL may be NULL when calling function indirectly
4813 or considering a libcall. */
4814
4815 static int
4816 ix86_function_regparm (const_tree type, const_tree decl)
4817 {
4818 tree attr;
4819 int regparm;
4820
4821 if (TARGET_64BIT)
4822 return (ix86_function_type_abi (type) == SYSV_ABI
4823 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4824
4825 regparm = ix86_regparm;
4826 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4827 if (attr)
4828 {
4829 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4830 return regparm;
4831 }
4832
4833 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4834 return 2;
4835
4836 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4837 return 1;
4838
4839 /* Use register calling convention for local functions when possible. */
4840 if (decl
4841 && TREE_CODE (decl) == FUNCTION_DECL
4842 && optimize
4843 && !profile_flag)
4844 {
4845 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4846 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4847 if (i && i->local)
4848 {
4849 int local_regparm, globals = 0, regno;
4850
4851 /* Make sure no regparm register is taken by a
4852 fixed register variable. */
4853 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4854 if (fixed_regs[local_regparm])
4855 break;
4856
4857 /* We don't want to use regparm(3) for nested functions as
4858 these use a static chain pointer in the third argument. */
4859 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4860 local_regparm = 2;
4861
4862 /* Each fixed register usage increases register pressure,
4863 so less registers should be used for argument passing.
4864 This functionality can be overriden by an explicit
4865 regparm value. */
4866 for (regno = 0; regno <= DI_REG; regno++)
4867 if (fixed_regs[regno])
4868 globals++;
4869
4870 local_regparm
4871 = globals < local_regparm ? local_regparm - globals : 0;
4872
4873 if (local_regparm > regparm)
4874 regparm = local_regparm;
4875 }
4876 }
4877
4878 return regparm;
4879 }
4880
4881 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4882 DFmode (2) arguments in SSE registers for a function with the
4883 indicated TYPE and DECL. DECL may be NULL when calling function
4884 indirectly or considering a libcall. Otherwise return 0. */
4885
4886 static int
4887 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4888 {
4889 gcc_assert (!TARGET_64BIT);
4890
4891 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4892 by the sseregparm attribute. */
4893 if (TARGET_SSEREGPARM
4894 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4895 {
4896 if (!TARGET_SSE)
4897 {
4898 if (warn)
4899 {
4900 if (decl)
4901 error ("Calling %qD with attribute sseregparm without "
4902 "SSE/SSE2 enabled", decl);
4903 else
4904 error ("Calling %qT with attribute sseregparm without "
4905 "SSE/SSE2 enabled", type);
4906 }
4907 return 0;
4908 }
4909
4910 return 2;
4911 }
4912
4913 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4914 (and DFmode for SSE2) arguments in SSE registers. */
4915 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4916 {
4917 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4918 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4919 if (i && i->local)
4920 return TARGET_SSE2 ? 2 : 1;
4921 }
4922
4923 return 0;
4924 }
4925
4926 /* Return true if EAX is live at the start of the function. Used by
4927 ix86_expand_prologue to determine if we need special help before
4928 calling allocate_stack_worker. */
4929
4930 static bool
4931 ix86_eax_live_at_start_p (void)
4932 {
4933 /* Cheat. Don't bother working forward from ix86_function_regparm
4934 to the function type to whether an actual argument is located in
4935 eax. Instead just look at cfg info, which is still close enough
4936 to correct at this point. This gives false positives for broken
4937 functions that might use uninitialized data that happens to be
4938 allocated in eax, but who cares? */
4939 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4940 }
4941
4942 /* Value is the number of bytes of arguments automatically
4943 popped when returning from a subroutine call.
4944 FUNDECL is the declaration node of the function (as a tree),
4945 FUNTYPE is the data type of the function (as a tree),
4946 or for a library call it is an identifier node for the subroutine name.
4947 SIZE is the number of bytes of arguments passed on the stack.
4948
4949 On the 80386, the RTD insn may be used to pop them if the number
4950 of args is fixed, but if the number is variable then the caller
4951 must pop them all. RTD can't be used for library calls now
4952 because the library is compiled with the Unix compiler.
4953 Use of RTD is a selectable option, since it is incompatible with
4954 standard Unix calling sequences. If the option is not selected,
4955 the caller must always pop the args.
4956
4957 The attribute stdcall is equivalent to RTD on a per module basis. */
4958
4959 static int
4960 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4961 {
4962 int rtd;
4963
4964 /* None of the 64-bit ABIs pop arguments. */
4965 if (TARGET_64BIT)
4966 return 0;
4967
4968 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4969
4970 /* Cdecl functions override -mrtd, and never pop the stack. */
4971 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4972 {
4973 /* Stdcall and fastcall functions will pop the stack if not
4974 variable args. */
4975 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4976 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
4977 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
4978 rtd = 1;
4979
4980 if (rtd && ! stdarg_p (funtype))
4981 return size;
4982 }
4983
4984 /* Lose any fake structure return argument if it is passed on the stack. */
4985 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4986 && !KEEP_AGGREGATE_RETURN_POINTER)
4987 {
4988 int nregs = ix86_function_regparm (funtype, fundecl);
4989 if (nregs == 0)
4990 return GET_MODE_SIZE (Pmode);
4991 }
4992
4993 return 0;
4994 }
4995 \f
4996 /* Argument support functions. */
4997
4998 /* Return true when register may be used to pass function parameters. */
4999 bool
5000 ix86_function_arg_regno_p (int regno)
5001 {
5002 int i;
5003 const int *parm_regs;
5004
5005 if (!TARGET_64BIT)
5006 {
5007 if (TARGET_MACHO)
5008 return (regno < REGPARM_MAX
5009 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5010 else
5011 return (regno < REGPARM_MAX
5012 || (TARGET_MMX && MMX_REGNO_P (regno)
5013 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5014 || (TARGET_SSE && SSE_REGNO_P (regno)
5015 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5016 }
5017
5018 if (TARGET_MACHO)
5019 {
5020 if (SSE_REGNO_P (regno) && TARGET_SSE)
5021 return true;
5022 }
5023 else
5024 {
5025 if (TARGET_SSE && SSE_REGNO_P (regno)
5026 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5027 return true;
5028 }
5029
5030 /* TODO: The function should depend on current function ABI but
5031 builtins.c would need updating then. Therefore we use the
5032 default ABI. */
5033
5034 /* RAX is used as hidden argument to va_arg functions. */
5035 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5036 return true;
5037
5038 if (ix86_abi == MS_ABI)
5039 parm_regs = x86_64_ms_abi_int_parameter_registers;
5040 else
5041 parm_regs = x86_64_int_parameter_registers;
5042 for (i = 0; i < (ix86_abi == MS_ABI
5043 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5044 if (regno == parm_regs[i])
5045 return true;
5046 return false;
5047 }
5048
5049 /* Return if we do not know how to pass TYPE solely in registers. */
5050
5051 static bool
5052 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5053 {
5054 if (must_pass_in_stack_var_size_or_pad (mode, type))
5055 return true;
5056
5057 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5058 The layout_type routine is crafty and tries to trick us into passing
5059 currently unsupported vector types on the stack by using TImode. */
5060 return (!TARGET_64BIT && mode == TImode
5061 && type && TREE_CODE (type) != VECTOR_TYPE);
5062 }
5063
5064 /* It returns the size, in bytes, of the area reserved for arguments passed
5065 in registers for the function represented by fndecl dependent to the used
5066 abi format. */
5067 int
5068 ix86_reg_parm_stack_space (const_tree fndecl)
5069 {
5070 enum calling_abi call_abi = SYSV_ABI;
5071 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5072 call_abi = ix86_function_abi (fndecl);
5073 else
5074 call_abi = ix86_function_type_abi (fndecl);
5075 if (call_abi == MS_ABI)
5076 return 32;
5077 return 0;
5078 }
5079
5080 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5081 call abi used. */
5082 enum calling_abi
5083 ix86_function_type_abi (const_tree fntype)
5084 {
5085 if (TARGET_64BIT && fntype != NULL)
5086 {
5087 enum calling_abi abi = ix86_abi;
5088 if (abi == SYSV_ABI)
5089 {
5090 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5091 abi = MS_ABI;
5092 }
5093 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5094 abi = SYSV_ABI;
5095 return abi;
5096 }
5097 return ix86_abi;
5098 }
5099
5100 static bool
5101 ix86_function_ms_hook_prologue (const_tree fntype)
5102 {
5103 if (fntype && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fntype)))
5104 {
5105 if (decl_function_context (fntype) != NULL_TREE)
5106 {
5107 error_at (DECL_SOURCE_LOCATION (fntype),
5108 "ms_hook_prologue is not compatible with nested function");
5109 }
5110
5111 return true;
5112 }
5113 return false;
5114 }
5115
5116 static enum calling_abi
5117 ix86_function_abi (const_tree fndecl)
5118 {
5119 if (! fndecl)
5120 return ix86_abi;
5121 return ix86_function_type_abi (TREE_TYPE (fndecl));
5122 }
5123
5124 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5125 call abi used. */
5126 enum calling_abi
5127 ix86_cfun_abi (void)
5128 {
5129 if (! cfun || ! TARGET_64BIT)
5130 return ix86_abi;
5131 return cfun->machine->call_abi;
5132 }
5133
5134 /* Write the extra assembler code needed to declare a function properly. */
5135
5136 void
5137 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5138 tree decl)
5139 {
5140 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5141
5142 if (is_ms_hook)
5143 {
5144 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5145 unsigned int filler_cc = 0xcccccccc;
5146
5147 for (i = 0; i < filler_count; i += 4)
5148 fprintf (asm_out_file, ASM_LONG " 0x%x\n", filler_cc);
5149 }
5150
5151 ASM_OUTPUT_LABEL (asm_out_file, fname);
5152
5153 /* Output magic byte marker, if hot-patch attribute is set.
5154 For x86 case frame-pointer prologue will be emitted in
5155 expand_prologue. */
5156 if (is_ms_hook)
5157 {
5158 if (TARGET_64BIT)
5159 /* leaq [%rsp + 0], %rsp */
5160 asm_fprintf (asm_out_file, ASM_BYTE
5161 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5162 else
5163 /* movl.s %edi, %edi. */
5164 asm_fprintf (asm_out_file, ASM_BYTE "0x8b, 0xff\n");
5165 }
5166 }
5167
5168 /* regclass.c */
5169 extern void init_regs (void);
5170
5171 /* Implementation of call abi switching target hook. Specific to FNDECL
5172 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5173 for more details. */
5174 void
5175 ix86_call_abi_override (const_tree fndecl)
5176 {
5177 if (fndecl == NULL_TREE)
5178 cfun->machine->call_abi = ix86_abi;
5179 else
5180 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5181 }
5182
5183 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5184 re-initialization of init_regs each time we switch function context since
5185 this is needed only during RTL expansion. */
5186 static void
5187 ix86_maybe_switch_abi (void)
5188 {
5189 if (TARGET_64BIT &&
5190 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5191 reinit_regs ();
5192 }
5193
5194 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5195 for a call to a function whose data type is FNTYPE.
5196 For a library call, FNTYPE is 0. */
5197
5198 void
5199 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5200 tree fntype, /* tree ptr for function decl */
5201 rtx libname, /* SYMBOL_REF of library name or 0 */
5202 tree fndecl)
5203 {
5204 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5205 memset (cum, 0, sizeof (*cum));
5206
5207 if (fndecl)
5208 cum->call_abi = ix86_function_abi (fndecl);
5209 else
5210 cum->call_abi = ix86_function_type_abi (fntype);
5211 /* Set up the number of registers to use for passing arguments. */
5212
5213 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5214 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5215 "or subtarget optimization implying it");
5216 cum->nregs = ix86_regparm;
5217 if (TARGET_64BIT)
5218 {
5219 cum->nregs = (cum->call_abi == SYSV_ABI
5220 ? X86_64_REGPARM_MAX
5221 : X86_64_MS_REGPARM_MAX);
5222 }
5223 if (TARGET_SSE)
5224 {
5225 cum->sse_nregs = SSE_REGPARM_MAX;
5226 if (TARGET_64BIT)
5227 {
5228 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5229 ? X86_64_SSE_REGPARM_MAX
5230 : X86_64_MS_SSE_REGPARM_MAX);
5231 }
5232 }
5233 if (TARGET_MMX)
5234 cum->mmx_nregs = MMX_REGPARM_MAX;
5235 cum->warn_avx = true;
5236 cum->warn_sse = true;
5237 cum->warn_mmx = true;
5238
5239 /* Because type might mismatch in between caller and callee, we need to
5240 use actual type of function for local calls.
5241 FIXME: cgraph_analyze can be told to actually record if function uses
5242 va_start so for local functions maybe_vaarg can be made aggressive
5243 helping K&R code.
5244 FIXME: once typesytem is fixed, we won't need this code anymore. */
5245 if (i && i->local)
5246 fntype = TREE_TYPE (fndecl);
5247 cum->maybe_vaarg = (fntype
5248 ? (!prototype_p (fntype) || stdarg_p (fntype))
5249 : !libname);
5250
5251 if (!TARGET_64BIT)
5252 {
5253 /* If there are variable arguments, then we won't pass anything
5254 in registers in 32-bit mode. */
5255 if (stdarg_p (fntype))
5256 {
5257 cum->nregs = 0;
5258 cum->sse_nregs = 0;
5259 cum->mmx_nregs = 0;
5260 cum->warn_avx = 0;
5261 cum->warn_sse = 0;
5262 cum->warn_mmx = 0;
5263 return;
5264 }
5265
5266 /* Use ecx and edx registers if function has fastcall attribute,
5267 else look for regparm information. */
5268 if (fntype)
5269 {
5270 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5271 {
5272 cum->nregs = 1;
5273 cum->fastcall = 1; /* Same first register as in fastcall. */
5274 }
5275 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5276 {
5277 cum->nregs = 2;
5278 cum->fastcall = 1;
5279 }
5280 else
5281 cum->nregs = ix86_function_regparm (fntype, fndecl);
5282 }
5283
5284 /* Set up the number of SSE registers used for passing SFmode
5285 and DFmode arguments. Warn for mismatching ABI. */
5286 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5287 }
5288 }
5289
5290 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5291 But in the case of vector types, it is some vector mode.
5292
5293 When we have only some of our vector isa extensions enabled, then there
5294 are some modes for which vector_mode_supported_p is false. For these
5295 modes, the generic vector support in gcc will choose some non-vector mode
5296 in order to implement the type. By computing the natural mode, we'll
5297 select the proper ABI location for the operand and not depend on whatever
5298 the middle-end decides to do with these vector types.
5299
5300 The midde-end can't deal with the vector types > 16 bytes. In this
5301 case, we return the original mode and warn ABI change if CUM isn't
5302 NULL. */
5303
5304 static enum machine_mode
5305 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5306 {
5307 enum machine_mode mode = TYPE_MODE (type);
5308
5309 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5310 {
5311 HOST_WIDE_INT size = int_size_in_bytes (type);
5312 if ((size == 8 || size == 16 || size == 32)
5313 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5314 && TYPE_VECTOR_SUBPARTS (type) > 1)
5315 {
5316 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5317
5318 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5319 mode = MIN_MODE_VECTOR_FLOAT;
5320 else
5321 mode = MIN_MODE_VECTOR_INT;
5322
5323 /* Get the mode which has this inner mode and number of units. */
5324 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5325 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5326 && GET_MODE_INNER (mode) == innermode)
5327 {
5328 if (size == 32 && !TARGET_AVX)
5329 {
5330 static bool warnedavx;
5331
5332 if (cum
5333 && !warnedavx
5334 && cum->warn_avx)
5335 {
5336 warnedavx = true;
5337 warning (0, "AVX vector argument without AVX "
5338 "enabled changes the ABI");
5339 }
5340 return TYPE_MODE (type);
5341 }
5342 else
5343 return mode;
5344 }
5345
5346 gcc_unreachable ();
5347 }
5348 }
5349
5350 return mode;
5351 }
5352
5353 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5354 this may not agree with the mode that the type system has chosen for the
5355 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5356 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5357
5358 static rtx
5359 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5360 unsigned int regno)
5361 {
5362 rtx tmp;
5363
5364 if (orig_mode != BLKmode)
5365 tmp = gen_rtx_REG (orig_mode, regno);
5366 else
5367 {
5368 tmp = gen_rtx_REG (mode, regno);
5369 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5370 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5371 }
5372
5373 return tmp;
5374 }
5375
5376 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5377 of this code is to classify each 8bytes of incoming argument by the register
5378 class and assign registers accordingly. */
5379
5380 /* Return the union class of CLASS1 and CLASS2.
5381 See the x86-64 PS ABI for details. */
5382
5383 static enum x86_64_reg_class
5384 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5385 {
5386 /* Rule #1: If both classes are equal, this is the resulting class. */
5387 if (class1 == class2)
5388 return class1;
5389
5390 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5391 the other class. */
5392 if (class1 == X86_64_NO_CLASS)
5393 return class2;
5394 if (class2 == X86_64_NO_CLASS)
5395 return class1;
5396
5397 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5398 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5399 return X86_64_MEMORY_CLASS;
5400
5401 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5402 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5403 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5404 return X86_64_INTEGERSI_CLASS;
5405 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5406 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5407 return X86_64_INTEGER_CLASS;
5408
5409 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5410 MEMORY is used. */
5411 if (class1 == X86_64_X87_CLASS
5412 || class1 == X86_64_X87UP_CLASS
5413 || class1 == X86_64_COMPLEX_X87_CLASS
5414 || class2 == X86_64_X87_CLASS
5415 || class2 == X86_64_X87UP_CLASS
5416 || class2 == X86_64_COMPLEX_X87_CLASS)
5417 return X86_64_MEMORY_CLASS;
5418
5419 /* Rule #6: Otherwise class SSE is used. */
5420 return X86_64_SSE_CLASS;
5421 }
5422
5423 /* Classify the argument of type TYPE and mode MODE.
5424 CLASSES will be filled by the register class used to pass each word
5425 of the operand. The number of words is returned. In case the parameter
5426 should be passed in memory, 0 is returned. As a special case for zero
5427 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5428
5429 BIT_OFFSET is used internally for handling records and specifies offset
5430 of the offset in bits modulo 256 to avoid overflow cases.
5431
5432 See the x86-64 PS ABI for details.
5433 */
5434
5435 static int
5436 classify_argument (enum machine_mode mode, const_tree type,
5437 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5438 {
5439 HOST_WIDE_INT bytes =
5440 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5441 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5442
5443 /* Variable sized entities are always passed/returned in memory. */
5444 if (bytes < 0)
5445 return 0;
5446
5447 if (mode != VOIDmode
5448 && targetm.calls.must_pass_in_stack (mode, type))
5449 return 0;
5450
5451 if (type && AGGREGATE_TYPE_P (type))
5452 {
5453 int i;
5454 tree field;
5455 enum x86_64_reg_class subclasses[MAX_CLASSES];
5456
5457 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5458 if (bytes > 32)
5459 return 0;
5460
5461 for (i = 0; i < words; i++)
5462 classes[i] = X86_64_NO_CLASS;
5463
5464 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5465 signalize memory class, so handle it as special case. */
5466 if (!words)
5467 {
5468 classes[0] = X86_64_NO_CLASS;
5469 return 1;
5470 }
5471
5472 /* Classify each field of record and merge classes. */
5473 switch (TREE_CODE (type))
5474 {
5475 case RECORD_TYPE:
5476 /* And now merge the fields of structure. */
5477 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5478 {
5479 if (TREE_CODE (field) == FIELD_DECL)
5480 {
5481 int num;
5482
5483 if (TREE_TYPE (field) == error_mark_node)
5484 continue;
5485
5486 /* Bitfields are always classified as integer. Handle them
5487 early, since later code would consider them to be
5488 misaligned integers. */
5489 if (DECL_BIT_FIELD (field))
5490 {
5491 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5492 i < ((int_bit_position (field) + (bit_offset % 64))
5493 + tree_low_cst (DECL_SIZE (field), 0)
5494 + 63) / 8 / 8; i++)
5495 classes[i] =
5496 merge_classes (X86_64_INTEGER_CLASS,
5497 classes[i]);
5498 }
5499 else
5500 {
5501 int pos;
5502
5503 type = TREE_TYPE (field);
5504
5505 /* Flexible array member is ignored. */
5506 if (TYPE_MODE (type) == BLKmode
5507 && TREE_CODE (type) == ARRAY_TYPE
5508 && TYPE_SIZE (type) == NULL_TREE
5509 && TYPE_DOMAIN (type) != NULL_TREE
5510 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5511 == NULL_TREE))
5512 {
5513 static bool warned;
5514
5515 if (!warned && warn_psabi)
5516 {
5517 warned = true;
5518 inform (input_location,
5519 "The ABI of passing struct with"
5520 " a flexible array member has"
5521 " changed in GCC 4.4");
5522 }
5523 continue;
5524 }
5525 num = classify_argument (TYPE_MODE (type), type,
5526 subclasses,
5527 (int_bit_position (field)
5528 + bit_offset) % 256);
5529 if (!num)
5530 return 0;
5531 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5532 for (i = 0; i < num && (i + pos) < words; i++)
5533 classes[i + pos] =
5534 merge_classes (subclasses[i], classes[i + pos]);
5535 }
5536 }
5537 }
5538 break;
5539
5540 case ARRAY_TYPE:
5541 /* Arrays are handled as small records. */
5542 {
5543 int num;
5544 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5545 TREE_TYPE (type), subclasses, bit_offset);
5546 if (!num)
5547 return 0;
5548
5549 /* The partial classes are now full classes. */
5550 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5551 subclasses[0] = X86_64_SSE_CLASS;
5552 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5553 && !((bit_offset % 64) == 0 && bytes == 4))
5554 subclasses[0] = X86_64_INTEGER_CLASS;
5555
5556 for (i = 0; i < words; i++)
5557 classes[i] = subclasses[i % num];
5558
5559 break;
5560 }
5561 case UNION_TYPE:
5562 case QUAL_UNION_TYPE:
5563 /* Unions are similar to RECORD_TYPE but offset is always 0.
5564 */
5565 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5566 {
5567 if (TREE_CODE (field) == FIELD_DECL)
5568 {
5569 int num;
5570
5571 if (TREE_TYPE (field) == error_mark_node)
5572 continue;
5573
5574 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5575 TREE_TYPE (field), subclasses,
5576 bit_offset);
5577 if (!num)
5578 return 0;
5579 for (i = 0; i < num; i++)
5580 classes[i] = merge_classes (subclasses[i], classes[i]);
5581 }
5582 }
5583 break;
5584
5585 default:
5586 gcc_unreachable ();
5587 }
5588
5589 if (words > 2)
5590 {
5591 /* When size > 16 bytes, if the first one isn't
5592 X86_64_SSE_CLASS or any other ones aren't
5593 X86_64_SSEUP_CLASS, everything should be passed in
5594 memory. */
5595 if (classes[0] != X86_64_SSE_CLASS)
5596 return 0;
5597
5598 for (i = 1; i < words; i++)
5599 if (classes[i] != X86_64_SSEUP_CLASS)
5600 return 0;
5601 }
5602
5603 /* Final merger cleanup. */
5604 for (i = 0; i < words; i++)
5605 {
5606 /* If one class is MEMORY, everything should be passed in
5607 memory. */
5608 if (classes[i] == X86_64_MEMORY_CLASS)
5609 return 0;
5610
5611 /* The X86_64_SSEUP_CLASS should be always preceded by
5612 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5613 if (classes[i] == X86_64_SSEUP_CLASS
5614 && classes[i - 1] != X86_64_SSE_CLASS
5615 && classes[i - 1] != X86_64_SSEUP_CLASS)
5616 {
5617 /* The first one should never be X86_64_SSEUP_CLASS. */
5618 gcc_assert (i != 0);
5619 classes[i] = X86_64_SSE_CLASS;
5620 }
5621
5622 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5623 everything should be passed in memory. */
5624 if (classes[i] == X86_64_X87UP_CLASS
5625 && (classes[i - 1] != X86_64_X87_CLASS))
5626 {
5627 static bool warned;
5628
5629 /* The first one should never be X86_64_X87UP_CLASS. */
5630 gcc_assert (i != 0);
5631 if (!warned && warn_psabi)
5632 {
5633 warned = true;
5634 inform (input_location,
5635 "The ABI of passing union with long double"
5636 " has changed in GCC 4.4");
5637 }
5638 return 0;
5639 }
5640 }
5641 return words;
5642 }
5643
5644 /* Compute alignment needed. We align all types to natural boundaries with
5645 exception of XFmode that is aligned to 64bits. */
5646 if (mode != VOIDmode && mode != BLKmode)
5647 {
5648 int mode_alignment = GET_MODE_BITSIZE (mode);
5649
5650 if (mode == XFmode)
5651 mode_alignment = 128;
5652 else if (mode == XCmode)
5653 mode_alignment = 256;
5654 if (COMPLEX_MODE_P (mode))
5655 mode_alignment /= 2;
5656 /* Misaligned fields are always returned in memory. */
5657 if (bit_offset % mode_alignment)
5658 return 0;
5659 }
5660
5661 /* for V1xx modes, just use the base mode */
5662 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5663 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5664 mode = GET_MODE_INNER (mode);
5665
5666 /* Classification of atomic types. */
5667 switch (mode)
5668 {
5669 case SDmode:
5670 case DDmode:
5671 classes[0] = X86_64_SSE_CLASS;
5672 return 1;
5673 case TDmode:
5674 classes[0] = X86_64_SSE_CLASS;
5675 classes[1] = X86_64_SSEUP_CLASS;
5676 return 2;
5677 case DImode:
5678 case SImode:
5679 case HImode:
5680 case QImode:
5681 case CSImode:
5682 case CHImode:
5683 case CQImode:
5684 {
5685 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5686
5687 if (size <= 32)
5688 {
5689 classes[0] = X86_64_INTEGERSI_CLASS;
5690 return 1;
5691 }
5692 else if (size <= 64)
5693 {
5694 classes[0] = X86_64_INTEGER_CLASS;
5695 return 1;
5696 }
5697 else if (size <= 64+32)
5698 {
5699 classes[0] = X86_64_INTEGER_CLASS;
5700 classes[1] = X86_64_INTEGERSI_CLASS;
5701 return 2;
5702 }
5703 else if (size <= 64+64)
5704 {
5705 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5706 return 2;
5707 }
5708 else
5709 gcc_unreachable ();
5710 }
5711 case CDImode:
5712 case TImode:
5713 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5714 return 2;
5715 case COImode:
5716 case OImode:
5717 /* OImode shouldn't be used directly. */
5718 gcc_unreachable ();
5719 case CTImode:
5720 return 0;
5721 case SFmode:
5722 if (!(bit_offset % 64))
5723 classes[0] = X86_64_SSESF_CLASS;
5724 else
5725 classes[0] = X86_64_SSE_CLASS;
5726 return 1;
5727 case DFmode:
5728 classes[0] = X86_64_SSEDF_CLASS;
5729 return 1;
5730 case XFmode:
5731 classes[0] = X86_64_X87_CLASS;
5732 classes[1] = X86_64_X87UP_CLASS;
5733 return 2;
5734 case TFmode:
5735 classes[0] = X86_64_SSE_CLASS;
5736 classes[1] = X86_64_SSEUP_CLASS;
5737 return 2;
5738 case SCmode:
5739 classes[0] = X86_64_SSE_CLASS;
5740 if (!(bit_offset % 64))
5741 return 1;
5742 else
5743 {
5744 static bool warned;
5745
5746 if (!warned && warn_psabi)
5747 {
5748 warned = true;
5749 inform (input_location,
5750 "The ABI of passing structure with complex float"
5751 " member has changed in GCC 4.4");
5752 }
5753 classes[1] = X86_64_SSESF_CLASS;
5754 return 2;
5755 }
5756 case DCmode:
5757 classes[0] = X86_64_SSEDF_CLASS;
5758 classes[1] = X86_64_SSEDF_CLASS;
5759 return 2;
5760 case XCmode:
5761 classes[0] = X86_64_COMPLEX_X87_CLASS;
5762 return 1;
5763 case TCmode:
5764 /* This modes is larger than 16 bytes. */
5765 return 0;
5766 case V8SFmode:
5767 case V8SImode:
5768 case V32QImode:
5769 case V16HImode:
5770 case V4DFmode:
5771 case V4DImode:
5772 classes[0] = X86_64_SSE_CLASS;
5773 classes[1] = X86_64_SSEUP_CLASS;
5774 classes[2] = X86_64_SSEUP_CLASS;
5775 classes[3] = X86_64_SSEUP_CLASS;
5776 return 4;
5777 case V4SFmode:
5778 case V4SImode:
5779 case V16QImode:
5780 case V8HImode:
5781 case V2DFmode:
5782 case V2DImode:
5783 classes[0] = X86_64_SSE_CLASS;
5784 classes[1] = X86_64_SSEUP_CLASS;
5785 return 2;
5786 case V1TImode:
5787 case V1DImode:
5788 case V2SFmode:
5789 case V2SImode:
5790 case V4HImode:
5791 case V8QImode:
5792 classes[0] = X86_64_SSE_CLASS;
5793 return 1;
5794 case BLKmode:
5795 case VOIDmode:
5796 return 0;
5797 default:
5798 gcc_assert (VECTOR_MODE_P (mode));
5799
5800 if (bytes > 16)
5801 return 0;
5802
5803 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5804
5805 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5806 classes[0] = X86_64_INTEGERSI_CLASS;
5807 else
5808 classes[0] = X86_64_INTEGER_CLASS;
5809 classes[1] = X86_64_INTEGER_CLASS;
5810 return 1 + (bytes > 8);
5811 }
5812 }
5813
5814 /* Examine the argument and return set number of register required in each
5815 class. Return 0 iff parameter should be passed in memory. */
5816 static int
5817 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5818 int *int_nregs, int *sse_nregs)
5819 {
5820 enum x86_64_reg_class regclass[MAX_CLASSES];
5821 int n = classify_argument (mode, type, regclass, 0);
5822
5823 *int_nregs = 0;
5824 *sse_nregs = 0;
5825 if (!n)
5826 return 0;
5827 for (n--; n >= 0; n--)
5828 switch (regclass[n])
5829 {
5830 case X86_64_INTEGER_CLASS:
5831 case X86_64_INTEGERSI_CLASS:
5832 (*int_nregs)++;
5833 break;
5834 case X86_64_SSE_CLASS:
5835 case X86_64_SSESF_CLASS:
5836 case X86_64_SSEDF_CLASS:
5837 (*sse_nregs)++;
5838 break;
5839 case X86_64_NO_CLASS:
5840 case X86_64_SSEUP_CLASS:
5841 break;
5842 case X86_64_X87_CLASS:
5843 case X86_64_X87UP_CLASS:
5844 if (!in_return)
5845 return 0;
5846 break;
5847 case X86_64_COMPLEX_X87_CLASS:
5848 return in_return ? 2 : 0;
5849 case X86_64_MEMORY_CLASS:
5850 gcc_unreachable ();
5851 }
5852 return 1;
5853 }
5854
5855 /* Construct container for the argument used by GCC interface. See
5856 FUNCTION_ARG for the detailed description. */
5857
5858 static rtx
5859 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5860 const_tree type, int in_return, int nintregs, int nsseregs,
5861 const int *intreg, int sse_regno)
5862 {
5863 /* The following variables hold the static issued_error state. */
5864 static bool issued_sse_arg_error;
5865 static bool issued_sse_ret_error;
5866 static bool issued_x87_ret_error;
5867
5868 enum machine_mode tmpmode;
5869 int bytes =
5870 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5871 enum x86_64_reg_class regclass[MAX_CLASSES];
5872 int n;
5873 int i;
5874 int nexps = 0;
5875 int needed_sseregs, needed_intregs;
5876 rtx exp[MAX_CLASSES];
5877 rtx ret;
5878
5879 n = classify_argument (mode, type, regclass, 0);
5880 if (!n)
5881 return NULL;
5882 if (!examine_argument (mode, type, in_return, &needed_intregs,
5883 &needed_sseregs))
5884 return NULL;
5885 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5886 return NULL;
5887
5888 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5889 some less clueful developer tries to use floating-point anyway. */
5890 if (needed_sseregs && !TARGET_SSE)
5891 {
5892 if (in_return)
5893 {
5894 if (!issued_sse_ret_error)
5895 {
5896 error ("SSE register return with SSE disabled");
5897 issued_sse_ret_error = true;
5898 }
5899 }
5900 else if (!issued_sse_arg_error)
5901 {
5902 error ("SSE register argument with SSE disabled");
5903 issued_sse_arg_error = true;
5904 }
5905 return NULL;
5906 }
5907
5908 /* Likewise, error if the ABI requires us to return values in the
5909 x87 registers and the user specified -mno-80387. */
5910 if (!TARGET_80387 && in_return)
5911 for (i = 0; i < n; i++)
5912 if (regclass[i] == X86_64_X87_CLASS
5913 || regclass[i] == X86_64_X87UP_CLASS
5914 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5915 {
5916 if (!issued_x87_ret_error)
5917 {
5918 error ("x87 register return with x87 disabled");
5919 issued_x87_ret_error = true;
5920 }
5921 return NULL;
5922 }
5923
5924 /* First construct simple cases. Avoid SCmode, since we want to use
5925 single register to pass this type. */
5926 if (n == 1 && mode != SCmode)
5927 switch (regclass[0])
5928 {
5929 case X86_64_INTEGER_CLASS:
5930 case X86_64_INTEGERSI_CLASS:
5931 return gen_rtx_REG (mode, intreg[0]);
5932 case X86_64_SSE_CLASS:
5933 case X86_64_SSESF_CLASS:
5934 case X86_64_SSEDF_CLASS:
5935 if (mode != BLKmode)
5936 return gen_reg_or_parallel (mode, orig_mode,
5937 SSE_REGNO (sse_regno));
5938 break;
5939 case X86_64_X87_CLASS:
5940 case X86_64_COMPLEX_X87_CLASS:
5941 return gen_rtx_REG (mode, FIRST_STACK_REG);
5942 case X86_64_NO_CLASS:
5943 /* Zero sized array, struct or class. */
5944 return NULL;
5945 default:
5946 gcc_unreachable ();
5947 }
5948 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5949 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5950 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5951 if (n == 4
5952 && regclass[0] == X86_64_SSE_CLASS
5953 && regclass[1] == X86_64_SSEUP_CLASS
5954 && regclass[2] == X86_64_SSEUP_CLASS
5955 && regclass[3] == X86_64_SSEUP_CLASS
5956 && mode != BLKmode)
5957 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5958
5959 if (n == 2
5960 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5961 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5962 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5963 && regclass[1] == X86_64_INTEGER_CLASS
5964 && (mode == CDImode || mode == TImode || mode == TFmode)
5965 && intreg[0] + 1 == intreg[1])
5966 return gen_rtx_REG (mode, intreg[0]);
5967
5968 /* Otherwise figure out the entries of the PARALLEL. */
5969 for (i = 0; i < n; i++)
5970 {
5971 int pos;
5972
5973 switch (regclass[i])
5974 {
5975 case X86_64_NO_CLASS:
5976 break;
5977 case X86_64_INTEGER_CLASS:
5978 case X86_64_INTEGERSI_CLASS:
5979 /* Merge TImodes on aligned occasions here too. */
5980 if (i * 8 + 8 > bytes)
5981 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5982 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5983 tmpmode = SImode;
5984 else
5985 tmpmode = DImode;
5986 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5987 if (tmpmode == BLKmode)
5988 tmpmode = DImode;
5989 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5990 gen_rtx_REG (tmpmode, *intreg),
5991 GEN_INT (i*8));
5992 intreg++;
5993 break;
5994 case X86_64_SSESF_CLASS:
5995 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5996 gen_rtx_REG (SFmode,
5997 SSE_REGNO (sse_regno)),
5998 GEN_INT (i*8));
5999 sse_regno++;
6000 break;
6001 case X86_64_SSEDF_CLASS:
6002 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6003 gen_rtx_REG (DFmode,
6004 SSE_REGNO (sse_regno)),
6005 GEN_INT (i*8));
6006 sse_regno++;
6007 break;
6008 case X86_64_SSE_CLASS:
6009 pos = i;
6010 switch (n)
6011 {
6012 case 1:
6013 tmpmode = DImode;
6014 break;
6015 case 2:
6016 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6017 {
6018 tmpmode = TImode;
6019 i++;
6020 }
6021 else
6022 tmpmode = DImode;
6023 break;
6024 case 4:
6025 gcc_assert (i == 0
6026 && regclass[1] == X86_64_SSEUP_CLASS
6027 && regclass[2] == X86_64_SSEUP_CLASS
6028 && regclass[3] == X86_64_SSEUP_CLASS);
6029 tmpmode = OImode;
6030 i += 3;
6031 break;
6032 default:
6033 gcc_unreachable ();
6034 }
6035 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6036 gen_rtx_REG (tmpmode,
6037 SSE_REGNO (sse_regno)),
6038 GEN_INT (pos*8));
6039 sse_regno++;
6040 break;
6041 default:
6042 gcc_unreachable ();
6043 }
6044 }
6045
6046 /* Empty aligned struct, union or class. */
6047 if (nexps == 0)
6048 return NULL;
6049
6050 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6051 for (i = 0; i < nexps; i++)
6052 XVECEXP (ret, 0, i) = exp [i];
6053 return ret;
6054 }
6055
6056 /* Update the data in CUM to advance over an argument of mode MODE
6057 and data type TYPE. (TYPE is null for libcalls where that information
6058 may not be available.) */
6059
6060 static void
6061 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6062 const_tree type, HOST_WIDE_INT bytes,
6063 HOST_WIDE_INT words)
6064 {
6065 switch (mode)
6066 {
6067 default:
6068 break;
6069
6070 case BLKmode:
6071 if (bytes < 0)
6072 break;
6073 /* FALLTHRU */
6074
6075 case DImode:
6076 case SImode:
6077 case HImode:
6078 case QImode:
6079 cum->words += words;
6080 cum->nregs -= words;
6081 cum->regno += words;
6082
6083 if (cum->nregs <= 0)
6084 {
6085 cum->nregs = 0;
6086 cum->regno = 0;
6087 }
6088 break;
6089
6090 case OImode:
6091 /* OImode shouldn't be used directly. */
6092 gcc_unreachable ();
6093
6094 case DFmode:
6095 if (cum->float_in_sse < 2)
6096 break;
6097 case SFmode:
6098 if (cum->float_in_sse < 1)
6099 break;
6100 /* FALLTHRU */
6101
6102 case V8SFmode:
6103 case V8SImode:
6104 case V32QImode:
6105 case V16HImode:
6106 case V4DFmode:
6107 case V4DImode:
6108 case TImode:
6109 case V16QImode:
6110 case V8HImode:
6111 case V4SImode:
6112 case V2DImode:
6113 case V4SFmode:
6114 case V2DFmode:
6115 if (!type || !AGGREGATE_TYPE_P (type))
6116 {
6117 cum->sse_words += words;
6118 cum->sse_nregs -= 1;
6119 cum->sse_regno += 1;
6120 if (cum->sse_nregs <= 0)
6121 {
6122 cum->sse_nregs = 0;
6123 cum->sse_regno = 0;
6124 }
6125 }
6126 break;
6127
6128 case V8QImode:
6129 case V4HImode:
6130 case V2SImode:
6131 case V2SFmode:
6132 case V1TImode:
6133 case V1DImode:
6134 if (!type || !AGGREGATE_TYPE_P (type))
6135 {
6136 cum->mmx_words += words;
6137 cum->mmx_nregs -= 1;
6138 cum->mmx_regno += 1;
6139 if (cum->mmx_nregs <= 0)
6140 {
6141 cum->mmx_nregs = 0;
6142 cum->mmx_regno = 0;
6143 }
6144 }
6145 break;
6146 }
6147 }
6148
6149 static void
6150 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6151 const_tree type, HOST_WIDE_INT words, bool named)
6152 {
6153 int int_nregs, sse_nregs;
6154
6155 /* Unnamed 256bit vector mode parameters are passed on stack. */
6156 if (!named && VALID_AVX256_REG_MODE (mode))
6157 return;
6158
6159 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
6160 cum->words += words;
6161 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6162 {
6163 cum->nregs -= int_nregs;
6164 cum->sse_nregs -= sse_nregs;
6165 cum->regno += int_nregs;
6166 cum->sse_regno += sse_nregs;
6167 }
6168 else
6169 cum->words += words;
6170 }
6171
6172 static void
6173 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6174 HOST_WIDE_INT words)
6175 {
6176 /* Otherwise, this should be passed indirect. */
6177 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6178
6179 cum->words += words;
6180 if (cum->nregs > 0)
6181 {
6182 cum->nregs -= 1;
6183 cum->regno += 1;
6184 }
6185 }
6186
6187 /* Update the data in CUM to advance over an argument of mode MODE and
6188 data type TYPE. (TYPE is null for libcalls where that information
6189 may not be available.) */
6190
6191 static void
6192 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6193 const_tree type, bool named)
6194 {
6195 HOST_WIDE_INT bytes, words;
6196
6197 if (mode == BLKmode)
6198 bytes = int_size_in_bytes (type);
6199 else
6200 bytes = GET_MODE_SIZE (mode);
6201 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6202
6203 if (type)
6204 mode = type_natural_mode (type, NULL);
6205
6206 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6207 function_arg_advance_ms_64 (cum, bytes, words);
6208 else if (TARGET_64BIT)
6209 function_arg_advance_64 (cum, mode, type, words, named);
6210 else
6211 function_arg_advance_32 (cum, mode, type, bytes, words);
6212 }
6213
6214 /* Define where to put the arguments to a function.
6215 Value is zero to push the argument on the stack,
6216 or a hard register in which to store the argument.
6217
6218 MODE is the argument's machine mode.
6219 TYPE is the data type of the argument (as a tree).
6220 This is null for libcalls where that information may
6221 not be available.
6222 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6223 the preceding args and about the function being called.
6224 NAMED is nonzero if this argument is a named parameter
6225 (otherwise it is an extra parameter matching an ellipsis). */
6226
6227 static rtx
6228 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6229 enum machine_mode orig_mode, const_tree type,
6230 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6231 {
6232 static bool warnedsse, warnedmmx;
6233
6234 /* Avoid the AL settings for the Unix64 ABI. */
6235 if (mode == VOIDmode)
6236 return constm1_rtx;
6237
6238 switch (mode)
6239 {
6240 default:
6241 break;
6242
6243 case BLKmode:
6244 if (bytes < 0)
6245 break;
6246 /* FALLTHRU */
6247 case DImode:
6248 case SImode:
6249 case HImode:
6250 case QImode:
6251 if (words <= cum->nregs)
6252 {
6253 int regno = cum->regno;
6254
6255 /* Fastcall allocates the first two DWORD (SImode) or
6256 smaller arguments to ECX and EDX if it isn't an
6257 aggregate type . */
6258 if (cum->fastcall)
6259 {
6260 if (mode == BLKmode
6261 || mode == DImode
6262 || (type && AGGREGATE_TYPE_P (type)))
6263 break;
6264
6265 /* ECX not EAX is the first allocated register. */
6266 if (regno == AX_REG)
6267 regno = CX_REG;
6268 }
6269 return gen_rtx_REG (mode, regno);
6270 }
6271 break;
6272
6273 case DFmode:
6274 if (cum->float_in_sse < 2)
6275 break;
6276 case SFmode:
6277 if (cum->float_in_sse < 1)
6278 break;
6279 /* FALLTHRU */
6280 case TImode:
6281 /* In 32bit, we pass TImode in xmm registers. */
6282 case V16QImode:
6283 case V8HImode:
6284 case V4SImode:
6285 case V2DImode:
6286 case V4SFmode:
6287 case V2DFmode:
6288 if (!type || !AGGREGATE_TYPE_P (type))
6289 {
6290 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6291 {
6292 warnedsse = true;
6293 warning (0, "SSE vector argument without SSE enabled "
6294 "changes the ABI");
6295 }
6296 if (cum->sse_nregs)
6297 return gen_reg_or_parallel (mode, orig_mode,
6298 cum->sse_regno + FIRST_SSE_REG);
6299 }
6300 break;
6301
6302 case OImode:
6303 /* OImode shouldn't be used directly. */
6304 gcc_unreachable ();
6305
6306 case V8SFmode:
6307 case V8SImode:
6308 case V32QImode:
6309 case V16HImode:
6310 case V4DFmode:
6311 case V4DImode:
6312 if (!type || !AGGREGATE_TYPE_P (type))
6313 {
6314 if (cum->sse_nregs)
6315 return gen_reg_or_parallel (mode, orig_mode,
6316 cum->sse_regno + FIRST_SSE_REG);
6317 }
6318 break;
6319
6320 case V8QImode:
6321 case V4HImode:
6322 case V2SImode:
6323 case V2SFmode:
6324 case V1TImode:
6325 case V1DImode:
6326 if (!type || !AGGREGATE_TYPE_P (type))
6327 {
6328 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6329 {
6330 warnedmmx = true;
6331 warning (0, "MMX vector argument without MMX enabled "
6332 "changes the ABI");
6333 }
6334 if (cum->mmx_nregs)
6335 return gen_reg_or_parallel (mode, orig_mode,
6336 cum->mmx_regno + FIRST_MMX_REG);
6337 }
6338 break;
6339 }
6340
6341 return NULL_RTX;
6342 }
6343
6344 static rtx
6345 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6346 enum machine_mode orig_mode, const_tree type, bool named)
6347 {
6348 /* Handle a hidden AL argument containing number of registers
6349 for varargs x86-64 functions. */
6350 if (mode == VOIDmode)
6351 return GEN_INT (cum->maybe_vaarg
6352 ? (cum->sse_nregs < 0
6353 ? X86_64_SSE_REGPARM_MAX
6354 : cum->sse_regno)
6355 : -1);
6356
6357 switch (mode)
6358 {
6359 default:
6360 break;
6361
6362 case V8SFmode:
6363 case V8SImode:
6364 case V32QImode:
6365 case V16HImode:
6366 case V4DFmode:
6367 case V4DImode:
6368 /* Unnamed 256bit vector mode parameters are passed on stack. */
6369 if (!named)
6370 return NULL;
6371 break;
6372 }
6373
6374 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6375 cum->sse_nregs,
6376 &x86_64_int_parameter_registers [cum->regno],
6377 cum->sse_regno);
6378 }
6379
6380 static rtx
6381 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6382 enum machine_mode orig_mode, bool named,
6383 HOST_WIDE_INT bytes)
6384 {
6385 unsigned int regno;
6386
6387 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6388 We use value of -2 to specify that current function call is MSABI. */
6389 if (mode == VOIDmode)
6390 return GEN_INT (-2);
6391
6392 /* If we've run out of registers, it goes on the stack. */
6393 if (cum->nregs == 0)
6394 return NULL_RTX;
6395
6396 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6397
6398 /* Only floating point modes are passed in anything but integer regs. */
6399 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6400 {
6401 if (named)
6402 regno = cum->regno + FIRST_SSE_REG;
6403 else
6404 {
6405 rtx t1, t2;
6406
6407 /* Unnamed floating parameters are passed in both the
6408 SSE and integer registers. */
6409 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6410 t2 = gen_rtx_REG (mode, regno);
6411 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6412 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6413 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6414 }
6415 }
6416 /* Handle aggregated types passed in register. */
6417 if (orig_mode == BLKmode)
6418 {
6419 if (bytes > 0 && bytes <= 8)
6420 mode = (bytes > 4 ? DImode : SImode);
6421 if (mode == BLKmode)
6422 mode = DImode;
6423 }
6424
6425 return gen_reg_or_parallel (mode, orig_mode, regno);
6426 }
6427
6428 /* Return where to put the arguments to a function.
6429 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6430
6431 MODE is the argument's machine mode. TYPE is the data type of the
6432 argument. It is null for libcalls where that information may not be
6433 available. CUM gives information about the preceding args and about
6434 the function being called. NAMED is nonzero if this argument is a
6435 named parameter (otherwise it is an extra parameter matching an
6436 ellipsis). */
6437
6438 static rtx
6439 ix86_function_arg (const CUMULATIVE_ARGS *cum, enum machine_mode omode,
6440 const_tree type, bool named)
6441 {
6442 enum machine_mode mode = omode;
6443 HOST_WIDE_INT bytes, words;
6444
6445 if (mode == BLKmode)
6446 bytes = int_size_in_bytes (type);
6447 else
6448 bytes = GET_MODE_SIZE (mode);
6449 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6450
6451 /* To simplify the code below, represent vector types with a vector mode
6452 even if MMX/SSE are not active. */
6453 if (type && TREE_CODE (type) == VECTOR_TYPE)
6454 mode = type_natural_mode (type, cum);
6455
6456 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6457 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6458 else if (TARGET_64BIT)
6459 return function_arg_64 (cum, mode, omode, type, named);
6460 else
6461 return function_arg_32 (cum, mode, omode, type, bytes, words);
6462 }
6463
6464 /* A C expression that indicates when an argument must be passed by
6465 reference. If nonzero for an argument, a copy of that argument is
6466 made in memory and a pointer to the argument is passed instead of
6467 the argument itself. The pointer is passed in whatever way is
6468 appropriate for passing a pointer to that type. */
6469
6470 static bool
6471 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6472 enum machine_mode mode ATTRIBUTE_UNUSED,
6473 const_tree type, bool named ATTRIBUTE_UNUSED)
6474 {
6475 /* See Windows x64 Software Convention. */
6476 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6477 {
6478 int msize = (int) GET_MODE_SIZE (mode);
6479 if (type)
6480 {
6481 /* Arrays are passed by reference. */
6482 if (TREE_CODE (type) == ARRAY_TYPE)
6483 return true;
6484
6485 if (AGGREGATE_TYPE_P (type))
6486 {
6487 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6488 are passed by reference. */
6489 msize = int_size_in_bytes (type);
6490 }
6491 }
6492
6493 /* __m128 is passed by reference. */
6494 switch (msize) {
6495 case 1: case 2: case 4: case 8:
6496 break;
6497 default:
6498 return true;
6499 }
6500 }
6501 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6502 return 1;
6503
6504 return 0;
6505 }
6506
6507 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6508 ABI. */
6509 static bool
6510 contains_aligned_value_p (tree type)
6511 {
6512 enum machine_mode mode = TYPE_MODE (type);
6513 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6514 || mode == TDmode
6515 || mode == TFmode
6516 || mode == TCmode)
6517 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6518 return true;
6519 if (TYPE_ALIGN (type) < 128)
6520 return false;
6521
6522 if (AGGREGATE_TYPE_P (type))
6523 {
6524 /* Walk the aggregates recursively. */
6525 switch (TREE_CODE (type))
6526 {
6527 case RECORD_TYPE:
6528 case UNION_TYPE:
6529 case QUAL_UNION_TYPE:
6530 {
6531 tree field;
6532
6533 /* Walk all the structure fields. */
6534 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6535 {
6536 if (TREE_CODE (field) == FIELD_DECL
6537 && contains_aligned_value_p (TREE_TYPE (field)))
6538 return true;
6539 }
6540 break;
6541 }
6542
6543 case ARRAY_TYPE:
6544 /* Just for use if some languages passes arrays by value. */
6545 if (contains_aligned_value_p (TREE_TYPE (type)))
6546 return true;
6547 break;
6548
6549 default:
6550 gcc_unreachable ();
6551 }
6552 }
6553 return false;
6554 }
6555
6556 /* Gives the alignment boundary, in bits, of an argument with the
6557 specified mode and type. */
6558
6559 int
6560 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6561 {
6562 int align;
6563 if (type)
6564 {
6565 /* Since the main variant type is used for call, we convert it to
6566 the main variant type. */
6567 type = TYPE_MAIN_VARIANT (type);
6568 align = TYPE_ALIGN (type);
6569 }
6570 else
6571 align = GET_MODE_ALIGNMENT (mode);
6572 if (align < PARM_BOUNDARY)
6573 align = PARM_BOUNDARY;
6574 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6575 natural boundaries. */
6576 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6577 {
6578 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6579 make an exception for SSE modes since these require 128bit
6580 alignment.
6581
6582 The handling here differs from field_alignment. ICC aligns MMX
6583 arguments to 4 byte boundaries, while structure fields are aligned
6584 to 8 byte boundaries. */
6585 if (!type)
6586 {
6587 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6588 align = PARM_BOUNDARY;
6589 }
6590 else
6591 {
6592 if (!contains_aligned_value_p (type))
6593 align = PARM_BOUNDARY;
6594 }
6595 }
6596 if (align > BIGGEST_ALIGNMENT)
6597 align = BIGGEST_ALIGNMENT;
6598 return align;
6599 }
6600
6601 /* Return true if N is a possible register number of function value. */
6602
6603 static bool
6604 ix86_function_value_regno_p (const unsigned int regno)
6605 {
6606 switch (regno)
6607 {
6608 case 0:
6609 return true;
6610
6611 case FIRST_FLOAT_REG:
6612 /* TODO: The function should depend on current function ABI but
6613 builtins.c would need updating then. Therefore we use the
6614 default ABI. */
6615 if (TARGET_64BIT && ix86_abi == MS_ABI)
6616 return false;
6617 return TARGET_FLOAT_RETURNS_IN_80387;
6618
6619 case FIRST_SSE_REG:
6620 return TARGET_SSE;
6621
6622 case FIRST_MMX_REG:
6623 if (TARGET_MACHO || TARGET_64BIT)
6624 return false;
6625 return TARGET_MMX;
6626 }
6627
6628 return false;
6629 }
6630
6631 /* Define how to find the value returned by a function.
6632 VALTYPE is the data type of the value (as a tree).
6633 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6634 otherwise, FUNC is 0. */
6635
6636 static rtx
6637 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6638 const_tree fntype, const_tree fn)
6639 {
6640 unsigned int regno;
6641
6642 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6643 we normally prevent this case when mmx is not available. However
6644 some ABIs may require the result to be returned like DImode. */
6645 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6646 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6647
6648 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6649 we prevent this case when sse is not available. However some ABIs
6650 may require the result to be returned like integer TImode. */
6651 else if (mode == TImode
6652 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6653 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6654
6655 /* 32-byte vector modes in %ymm0. */
6656 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6657 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6658
6659 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6660 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6661 regno = FIRST_FLOAT_REG;
6662 else
6663 /* Most things go in %eax. */
6664 regno = AX_REG;
6665
6666 /* Override FP return register with %xmm0 for local functions when
6667 SSE math is enabled or for functions with sseregparm attribute. */
6668 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6669 {
6670 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6671 if ((sse_level >= 1 && mode == SFmode)
6672 || (sse_level == 2 && mode == DFmode))
6673 regno = FIRST_SSE_REG;
6674 }
6675
6676 /* OImode shouldn't be used directly. */
6677 gcc_assert (mode != OImode);
6678
6679 return gen_rtx_REG (orig_mode, regno);
6680 }
6681
6682 static rtx
6683 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6684 const_tree valtype)
6685 {
6686 rtx ret;
6687
6688 /* Handle libcalls, which don't provide a type node. */
6689 if (valtype == NULL)
6690 {
6691 switch (mode)
6692 {
6693 case SFmode:
6694 case SCmode:
6695 case DFmode:
6696 case DCmode:
6697 case TFmode:
6698 case SDmode:
6699 case DDmode:
6700 case TDmode:
6701 return gen_rtx_REG (mode, FIRST_SSE_REG);
6702 case XFmode:
6703 case XCmode:
6704 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6705 case TCmode:
6706 return NULL;
6707 default:
6708 return gen_rtx_REG (mode, AX_REG);
6709 }
6710 }
6711
6712 ret = construct_container (mode, orig_mode, valtype, 1,
6713 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6714 x86_64_int_return_registers, 0);
6715
6716 /* For zero sized structures, construct_container returns NULL, but we
6717 need to keep rest of compiler happy by returning meaningful value. */
6718 if (!ret)
6719 ret = gen_rtx_REG (orig_mode, AX_REG);
6720
6721 return ret;
6722 }
6723
6724 static rtx
6725 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6726 {
6727 unsigned int regno = AX_REG;
6728
6729 if (TARGET_SSE)
6730 {
6731 switch (GET_MODE_SIZE (mode))
6732 {
6733 case 16:
6734 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6735 && !COMPLEX_MODE_P (mode))
6736 regno = FIRST_SSE_REG;
6737 break;
6738 case 8:
6739 case 4:
6740 if (mode == SFmode || mode == DFmode)
6741 regno = FIRST_SSE_REG;
6742 break;
6743 default:
6744 break;
6745 }
6746 }
6747 return gen_rtx_REG (orig_mode, regno);
6748 }
6749
6750 static rtx
6751 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6752 enum machine_mode orig_mode, enum machine_mode mode)
6753 {
6754 const_tree fn, fntype;
6755
6756 fn = NULL_TREE;
6757 if (fntype_or_decl && DECL_P (fntype_or_decl))
6758 fn = fntype_or_decl;
6759 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6760
6761 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6762 return function_value_ms_64 (orig_mode, mode);
6763 else if (TARGET_64BIT)
6764 return function_value_64 (orig_mode, mode, valtype);
6765 else
6766 return function_value_32 (orig_mode, mode, fntype, fn);
6767 }
6768
6769 static rtx
6770 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6771 bool outgoing ATTRIBUTE_UNUSED)
6772 {
6773 enum machine_mode mode, orig_mode;
6774
6775 orig_mode = TYPE_MODE (valtype);
6776 mode = type_natural_mode (valtype, NULL);
6777 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6778 }
6779
6780 rtx
6781 ix86_libcall_value (enum machine_mode mode)
6782 {
6783 return ix86_function_value_1 (NULL, NULL, mode, mode);
6784 }
6785
6786 /* Return true iff type is returned in memory. */
6787
6788 static int ATTRIBUTE_UNUSED
6789 return_in_memory_32 (const_tree type, enum machine_mode mode)
6790 {
6791 HOST_WIDE_INT size;
6792
6793 if (mode == BLKmode)
6794 return 1;
6795
6796 size = int_size_in_bytes (type);
6797
6798 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6799 return 0;
6800
6801 if (VECTOR_MODE_P (mode) || mode == TImode)
6802 {
6803 /* User-created vectors small enough to fit in EAX. */
6804 if (size < 8)
6805 return 0;
6806
6807 /* MMX/3dNow values are returned in MM0,
6808 except when it doesn't exits. */
6809 if (size == 8)
6810 return (TARGET_MMX ? 0 : 1);
6811
6812 /* SSE values are returned in XMM0, except when it doesn't exist. */
6813 if (size == 16)
6814 return (TARGET_SSE ? 0 : 1);
6815
6816 /* AVX values are returned in YMM0, except when it doesn't exist. */
6817 if (size == 32)
6818 return TARGET_AVX ? 0 : 1;
6819 }
6820
6821 if (mode == XFmode)
6822 return 0;
6823
6824 if (size > 12)
6825 return 1;
6826
6827 /* OImode shouldn't be used directly. */
6828 gcc_assert (mode != OImode);
6829
6830 return 0;
6831 }
6832
6833 static int ATTRIBUTE_UNUSED
6834 return_in_memory_64 (const_tree type, enum machine_mode mode)
6835 {
6836 int needed_intregs, needed_sseregs;
6837 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6838 }
6839
6840 static int ATTRIBUTE_UNUSED
6841 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6842 {
6843 HOST_WIDE_INT size = int_size_in_bytes (type);
6844
6845 /* __m128 is returned in xmm0. */
6846 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6847 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6848 return 0;
6849
6850 /* Otherwise, the size must be exactly in [1248]. */
6851 return (size != 1 && size != 2 && size != 4 && size != 8);
6852 }
6853
6854 static bool
6855 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6856 {
6857 #ifdef SUBTARGET_RETURN_IN_MEMORY
6858 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6859 #else
6860 const enum machine_mode mode = type_natural_mode (type, NULL);
6861
6862 if (TARGET_64BIT)
6863 {
6864 if (ix86_function_type_abi (fntype) == MS_ABI)
6865 return return_in_memory_ms_64 (type, mode);
6866 else
6867 return return_in_memory_64 (type, mode);
6868 }
6869 else
6870 return return_in_memory_32 (type, mode);
6871 #endif
6872 }
6873
6874 /* Return false iff TYPE is returned in memory. This version is used
6875 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6876 but differs notably in that when MMX is available, 8-byte vectors
6877 are returned in memory, rather than in MMX registers. */
6878
6879 bool
6880 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6881 {
6882 int size;
6883 enum machine_mode mode = type_natural_mode (type, NULL);
6884
6885 if (TARGET_64BIT)
6886 return return_in_memory_64 (type, mode);
6887
6888 if (mode == BLKmode)
6889 return 1;
6890
6891 size = int_size_in_bytes (type);
6892
6893 if (VECTOR_MODE_P (mode))
6894 {
6895 /* Return in memory only if MMX registers *are* available. This
6896 seems backwards, but it is consistent with the existing
6897 Solaris x86 ABI. */
6898 if (size == 8)
6899 return TARGET_MMX;
6900 if (size == 16)
6901 return !TARGET_SSE;
6902 }
6903 else if (mode == TImode)
6904 return !TARGET_SSE;
6905 else if (mode == XFmode)
6906 return 0;
6907
6908 return size > 12;
6909 }
6910
6911 /* When returning SSE vector types, we have a choice of either
6912 (1) being abi incompatible with a -march switch, or
6913 (2) generating an error.
6914 Given no good solution, I think the safest thing is one warning.
6915 The user won't be able to use -Werror, but....
6916
6917 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6918 called in response to actually generating a caller or callee that
6919 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6920 via aggregate_value_p for general type probing from tree-ssa. */
6921
6922 static rtx
6923 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6924 {
6925 static bool warnedsse, warnedmmx;
6926
6927 if (!TARGET_64BIT && type)
6928 {
6929 /* Look at the return type of the function, not the function type. */
6930 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6931
6932 if (!TARGET_SSE && !warnedsse)
6933 {
6934 if (mode == TImode
6935 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6936 {
6937 warnedsse = true;
6938 warning (0, "SSE vector return without SSE enabled "
6939 "changes the ABI");
6940 }
6941 }
6942
6943 if (!TARGET_MMX && !warnedmmx)
6944 {
6945 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6946 {
6947 warnedmmx = true;
6948 warning (0, "MMX vector return without MMX enabled "
6949 "changes the ABI");
6950 }
6951 }
6952 }
6953
6954 return NULL;
6955 }
6956
6957 \f
6958 /* Create the va_list data type. */
6959
6960 /* Returns the calling convention specific va_list date type.
6961 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6962
6963 static tree
6964 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6965 {
6966 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6967
6968 /* For i386 we use plain pointer to argument area. */
6969 if (!TARGET_64BIT || abi == MS_ABI)
6970 return build_pointer_type (char_type_node);
6971
6972 record = lang_hooks.types.make_type (RECORD_TYPE);
6973 type_decl = build_decl (BUILTINS_LOCATION,
6974 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6975
6976 f_gpr = build_decl (BUILTINS_LOCATION,
6977 FIELD_DECL, get_identifier ("gp_offset"),
6978 unsigned_type_node);
6979 f_fpr = build_decl (BUILTINS_LOCATION,
6980 FIELD_DECL, get_identifier ("fp_offset"),
6981 unsigned_type_node);
6982 f_ovf = build_decl (BUILTINS_LOCATION,
6983 FIELD_DECL, get_identifier ("overflow_arg_area"),
6984 ptr_type_node);
6985 f_sav = build_decl (BUILTINS_LOCATION,
6986 FIELD_DECL, get_identifier ("reg_save_area"),
6987 ptr_type_node);
6988
6989 va_list_gpr_counter_field = f_gpr;
6990 va_list_fpr_counter_field = f_fpr;
6991
6992 DECL_FIELD_CONTEXT (f_gpr) = record;
6993 DECL_FIELD_CONTEXT (f_fpr) = record;
6994 DECL_FIELD_CONTEXT (f_ovf) = record;
6995 DECL_FIELD_CONTEXT (f_sav) = record;
6996
6997 TREE_CHAIN (record) = type_decl;
6998 TYPE_NAME (record) = type_decl;
6999 TYPE_FIELDS (record) = f_gpr;
7000 TREE_CHAIN (f_gpr) = f_fpr;
7001 TREE_CHAIN (f_fpr) = f_ovf;
7002 TREE_CHAIN (f_ovf) = f_sav;
7003
7004 layout_type (record);
7005
7006 /* The correct type is an array type of one element. */
7007 return build_array_type (record, build_index_type (size_zero_node));
7008 }
7009
7010 /* Setup the builtin va_list data type and for 64-bit the additional
7011 calling convention specific va_list data types. */
7012
7013 static tree
7014 ix86_build_builtin_va_list (void)
7015 {
7016 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7017
7018 /* Initialize abi specific va_list builtin types. */
7019 if (TARGET_64BIT)
7020 {
7021 tree t;
7022 if (ix86_abi == MS_ABI)
7023 {
7024 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7025 if (TREE_CODE (t) != RECORD_TYPE)
7026 t = build_variant_type_copy (t);
7027 sysv_va_list_type_node = t;
7028 }
7029 else
7030 {
7031 t = ret;
7032 if (TREE_CODE (t) != RECORD_TYPE)
7033 t = build_variant_type_copy (t);
7034 sysv_va_list_type_node = t;
7035 }
7036 if (ix86_abi != MS_ABI)
7037 {
7038 t = ix86_build_builtin_va_list_abi (MS_ABI);
7039 if (TREE_CODE (t) != RECORD_TYPE)
7040 t = build_variant_type_copy (t);
7041 ms_va_list_type_node = t;
7042 }
7043 else
7044 {
7045 t = ret;
7046 if (TREE_CODE (t) != RECORD_TYPE)
7047 t = build_variant_type_copy (t);
7048 ms_va_list_type_node = t;
7049 }
7050 }
7051
7052 return ret;
7053 }
7054
7055 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7056
7057 static void
7058 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7059 {
7060 rtx save_area, mem;
7061 rtx label;
7062 rtx tmp_reg;
7063 rtx nsse_reg;
7064 alias_set_type set;
7065 int i;
7066
7067 /* GPR size of varargs save area. */
7068 if (cfun->va_list_gpr_size)
7069 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7070 else
7071 ix86_varargs_gpr_size = 0;
7072
7073 /* FPR size of varargs save area. We don't need it if we don't pass
7074 anything in SSE registers. */
7075 if (cum->sse_nregs && cfun->va_list_fpr_size)
7076 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7077 else
7078 ix86_varargs_fpr_size = 0;
7079
7080 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7081 return;
7082
7083 save_area = frame_pointer_rtx;
7084 set = get_varargs_alias_set ();
7085
7086 for (i = cum->regno;
7087 i < X86_64_REGPARM_MAX
7088 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7089 i++)
7090 {
7091 mem = gen_rtx_MEM (Pmode,
7092 plus_constant (save_area, i * UNITS_PER_WORD));
7093 MEM_NOTRAP_P (mem) = 1;
7094 set_mem_alias_set (mem, set);
7095 emit_move_insn (mem, gen_rtx_REG (Pmode,
7096 x86_64_int_parameter_registers[i]));
7097 }
7098
7099 if (ix86_varargs_fpr_size)
7100 {
7101 /* Now emit code to save SSE registers. The AX parameter contains number
7102 of SSE parameter registers used to call this function. We use
7103 sse_prologue_save insn template that produces computed jump across
7104 SSE saves. We need some preparation work to get this working. */
7105
7106 label = gen_label_rtx ();
7107
7108 nsse_reg = gen_reg_rtx (Pmode);
7109 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
7110
7111 /* Compute address of memory block we save into. We always use pointer
7112 pointing 127 bytes after first byte to store - this is needed to keep
7113 instruction size limited by 4 bytes (5 bytes for AVX) with one
7114 byte displacement. */
7115 tmp_reg = gen_reg_rtx (Pmode);
7116 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
7117 plus_constant (save_area,
7118 ix86_varargs_gpr_size + 127)));
7119 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
7120 MEM_NOTRAP_P (mem) = 1;
7121 set_mem_alias_set (mem, set);
7122 set_mem_align (mem, 64);
7123
7124 /* And finally do the dirty job! */
7125 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
7126 GEN_INT (cum->sse_regno), label,
7127 gen_reg_rtx (Pmode)));
7128 }
7129 }
7130
7131 static void
7132 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7133 {
7134 alias_set_type set = get_varargs_alias_set ();
7135 int i;
7136
7137 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7138 {
7139 rtx reg, mem;
7140
7141 mem = gen_rtx_MEM (Pmode,
7142 plus_constant (virtual_incoming_args_rtx,
7143 i * UNITS_PER_WORD));
7144 MEM_NOTRAP_P (mem) = 1;
7145 set_mem_alias_set (mem, set);
7146
7147 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7148 emit_move_insn (mem, reg);
7149 }
7150 }
7151
7152 static void
7153 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7154 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7155 int no_rtl)
7156 {
7157 CUMULATIVE_ARGS next_cum;
7158 tree fntype;
7159
7160 /* This argument doesn't appear to be used anymore. Which is good,
7161 because the old code here didn't suppress rtl generation. */
7162 gcc_assert (!no_rtl);
7163
7164 if (!TARGET_64BIT)
7165 return;
7166
7167 fntype = TREE_TYPE (current_function_decl);
7168
7169 /* For varargs, we do not want to skip the dummy va_dcl argument.
7170 For stdargs, we do want to skip the last named argument. */
7171 next_cum = *cum;
7172 if (stdarg_p (fntype))
7173 ix86_function_arg_advance (&next_cum, mode, type, true);
7174
7175 if (cum->call_abi == MS_ABI)
7176 setup_incoming_varargs_ms_64 (&next_cum);
7177 else
7178 setup_incoming_varargs_64 (&next_cum);
7179 }
7180
7181 /* Checks if TYPE is of kind va_list char *. */
7182
7183 static bool
7184 is_va_list_char_pointer (tree type)
7185 {
7186 tree canonic;
7187
7188 /* For 32-bit it is always true. */
7189 if (!TARGET_64BIT)
7190 return true;
7191 canonic = ix86_canonical_va_list_type (type);
7192 return (canonic == ms_va_list_type_node
7193 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7194 }
7195
7196 /* Implement va_start. */
7197
7198 static void
7199 ix86_va_start (tree valist, rtx nextarg)
7200 {
7201 HOST_WIDE_INT words, n_gpr, n_fpr;
7202 tree f_gpr, f_fpr, f_ovf, f_sav;
7203 tree gpr, fpr, ovf, sav, t;
7204 tree type;
7205
7206 /* Only 64bit target needs something special. */
7207 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7208 {
7209 std_expand_builtin_va_start (valist, nextarg);
7210 return;
7211 }
7212
7213 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7214 f_fpr = TREE_CHAIN (f_gpr);
7215 f_ovf = TREE_CHAIN (f_fpr);
7216 f_sav = TREE_CHAIN (f_ovf);
7217
7218 valist = build_simple_mem_ref (valist);
7219 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7220 /* The following should be folded into the MEM_REF offset. */
7221 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7222 f_gpr, NULL_TREE);
7223 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7224 f_fpr, NULL_TREE);
7225 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7226 f_ovf, NULL_TREE);
7227 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7228 f_sav, NULL_TREE);
7229
7230 /* Count number of gp and fp argument registers used. */
7231 words = crtl->args.info.words;
7232 n_gpr = crtl->args.info.regno;
7233 n_fpr = crtl->args.info.sse_regno;
7234
7235 if (cfun->va_list_gpr_size)
7236 {
7237 type = TREE_TYPE (gpr);
7238 t = build2 (MODIFY_EXPR, type,
7239 gpr, build_int_cst (type, n_gpr * 8));
7240 TREE_SIDE_EFFECTS (t) = 1;
7241 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7242 }
7243
7244 if (TARGET_SSE && cfun->va_list_fpr_size)
7245 {
7246 type = TREE_TYPE (fpr);
7247 t = build2 (MODIFY_EXPR, type, fpr,
7248 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7249 TREE_SIDE_EFFECTS (t) = 1;
7250 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7251 }
7252
7253 /* Find the overflow area. */
7254 type = TREE_TYPE (ovf);
7255 t = make_tree (type, crtl->args.internal_arg_pointer);
7256 if (words != 0)
7257 t = build2 (POINTER_PLUS_EXPR, type, t,
7258 size_int (words * UNITS_PER_WORD));
7259 t = build2 (MODIFY_EXPR, type, ovf, t);
7260 TREE_SIDE_EFFECTS (t) = 1;
7261 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7262
7263 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7264 {
7265 /* Find the register save area.
7266 Prologue of the function save it right above stack frame. */
7267 type = TREE_TYPE (sav);
7268 t = make_tree (type, frame_pointer_rtx);
7269 if (!ix86_varargs_gpr_size)
7270 t = build2 (POINTER_PLUS_EXPR, type, t,
7271 size_int (-8 * X86_64_REGPARM_MAX));
7272 t = build2 (MODIFY_EXPR, type, sav, t);
7273 TREE_SIDE_EFFECTS (t) = 1;
7274 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7275 }
7276 }
7277
7278 /* Implement va_arg. */
7279
7280 static tree
7281 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7282 gimple_seq *post_p)
7283 {
7284 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7285 tree f_gpr, f_fpr, f_ovf, f_sav;
7286 tree gpr, fpr, ovf, sav, t;
7287 int size, rsize;
7288 tree lab_false, lab_over = NULL_TREE;
7289 tree addr, t2;
7290 rtx container;
7291 int indirect_p = 0;
7292 tree ptrtype;
7293 enum machine_mode nat_mode;
7294 unsigned int arg_boundary;
7295
7296 /* Only 64bit target needs something special. */
7297 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7298 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7299
7300 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7301 f_fpr = TREE_CHAIN (f_gpr);
7302 f_ovf = TREE_CHAIN (f_fpr);
7303 f_sav = TREE_CHAIN (f_ovf);
7304
7305 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7306 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7307 valist = build_va_arg_indirect_ref (valist);
7308 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7309 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7310 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7311
7312 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7313 if (indirect_p)
7314 type = build_pointer_type (type);
7315 size = int_size_in_bytes (type);
7316 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7317
7318 nat_mode = type_natural_mode (type, NULL);
7319 switch (nat_mode)
7320 {
7321 case V8SFmode:
7322 case V8SImode:
7323 case V32QImode:
7324 case V16HImode:
7325 case V4DFmode:
7326 case V4DImode:
7327 /* Unnamed 256bit vector mode parameters are passed on stack. */
7328 if (ix86_cfun_abi () == SYSV_ABI)
7329 {
7330 container = NULL;
7331 break;
7332 }
7333
7334 default:
7335 container = construct_container (nat_mode, TYPE_MODE (type),
7336 type, 0, X86_64_REGPARM_MAX,
7337 X86_64_SSE_REGPARM_MAX, intreg,
7338 0);
7339 break;
7340 }
7341
7342 /* Pull the value out of the saved registers. */
7343
7344 addr = create_tmp_var (ptr_type_node, "addr");
7345
7346 if (container)
7347 {
7348 int needed_intregs, needed_sseregs;
7349 bool need_temp;
7350 tree int_addr, sse_addr;
7351
7352 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7353 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7354
7355 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7356
7357 need_temp = (!REG_P (container)
7358 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7359 || TYPE_ALIGN (type) > 128));
7360
7361 /* In case we are passing structure, verify that it is consecutive block
7362 on the register save area. If not we need to do moves. */
7363 if (!need_temp && !REG_P (container))
7364 {
7365 /* Verify that all registers are strictly consecutive */
7366 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7367 {
7368 int i;
7369
7370 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7371 {
7372 rtx slot = XVECEXP (container, 0, i);
7373 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7374 || INTVAL (XEXP (slot, 1)) != i * 16)
7375 need_temp = 1;
7376 }
7377 }
7378 else
7379 {
7380 int i;
7381
7382 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7383 {
7384 rtx slot = XVECEXP (container, 0, i);
7385 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7386 || INTVAL (XEXP (slot, 1)) != i * 8)
7387 need_temp = 1;
7388 }
7389 }
7390 }
7391 if (!need_temp)
7392 {
7393 int_addr = addr;
7394 sse_addr = addr;
7395 }
7396 else
7397 {
7398 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7399 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7400 }
7401
7402 /* First ensure that we fit completely in registers. */
7403 if (needed_intregs)
7404 {
7405 t = build_int_cst (TREE_TYPE (gpr),
7406 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7407 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7408 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7409 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7410 gimplify_and_add (t, pre_p);
7411 }
7412 if (needed_sseregs)
7413 {
7414 t = build_int_cst (TREE_TYPE (fpr),
7415 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7416 + X86_64_REGPARM_MAX * 8);
7417 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7418 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7419 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7420 gimplify_and_add (t, pre_p);
7421 }
7422
7423 /* Compute index to start of area used for integer regs. */
7424 if (needed_intregs)
7425 {
7426 /* int_addr = gpr + sav; */
7427 t = fold_convert (sizetype, gpr);
7428 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7429 gimplify_assign (int_addr, t, pre_p);
7430 }
7431 if (needed_sseregs)
7432 {
7433 /* sse_addr = fpr + sav; */
7434 t = fold_convert (sizetype, fpr);
7435 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7436 gimplify_assign (sse_addr, t, pre_p);
7437 }
7438 if (need_temp)
7439 {
7440 int i, prev_size = 0;
7441 tree temp = create_tmp_var (type, "va_arg_tmp");
7442
7443 /* addr = &temp; */
7444 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7445 gimplify_assign (addr, t, pre_p);
7446
7447 for (i = 0; i < XVECLEN (container, 0); i++)
7448 {
7449 rtx slot = XVECEXP (container, 0, i);
7450 rtx reg = XEXP (slot, 0);
7451 enum machine_mode mode = GET_MODE (reg);
7452 tree piece_type;
7453 tree addr_type;
7454 tree daddr_type;
7455 tree src_addr, src;
7456 int src_offset;
7457 tree dest_addr, dest;
7458 int cur_size = GET_MODE_SIZE (mode);
7459
7460 if (prev_size + cur_size > size)
7461 {
7462 cur_size = size - prev_size;
7463 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7464 if (mode == BLKmode)
7465 mode = QImode;
7466 }
7467 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7468 if (mode == GET_MODE (reg))
7469 addr_type = build_pointer_type (piece_type);
7470 else
7471 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7472 true);
7473 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7474 true);
7475
7476 if (SSE_REGNO_P (REGNO (reg)))
7477 {
7478 src_addr = sse_addr;
7479 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7480 }
7481 else
7482 {
7483 src_addr = int_addr;
7484 src_offset = REGNO (reg) * 8;
7485 }
7486 src_addr = fold_convert (addr_type, src_addr);
7487 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7488 size_int (src_offset));
7489
7490 dest_addr = fold_convert (daddr_type, addr);
7491 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7492 size_int (INTVAL (XEXP (slot, 1))));
7493 if (cur_size == GET_MODE_SIZE (mode))
7494 {
7495 src = build_va_arg_indirect_ref (src_addr);
7496 dest = build_va_arg_indirect_ref (dest_addr);
7497
7498 gimplify_assign (dest, src, pre_p);
7499 }
7500 else
7501 {
7502 tree copy
7503 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7504 3, dest_addr, src_addr,
7505 size_int (cur_size));
7506 gimplify_and_add (copy, pre_p);
7507 }
7508 prev_size += cur_size;
7509 }
7510 }
7511
7512 if (needed_intregs)
7513 {
7514 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7515 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7516 gimplify_assign (gpr, t, pre_p);
7517 }
7518
7519 if (needed_sseregs)
7520 {
7521 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7522 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7523 gimplify_assign (fpr, t, pre_p);
7524 }
7525
7526 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7527
7528 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7529 }
7530
7531 /* ... otherwise out of the overflow area. */
7532
7533 /* When we align parameter on stack for caller, if the parameter
7534 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7535 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7536 here with caller. */
7537 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7538 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7539 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7540
7541 /* Care for on-stack alignment if needed. */
7542 if (arg_boundary <= 64
7543 || integer_zerop (TYPE_SIZE (type)))
7544 t = ovf;
7545 else
7546 {
7547 HOST_WIDE_INT align = arg_boundary / 8;
7548 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7549 size_int (align - 1));
7550 t = fold_convert (sizetype, t);
7551 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7552 size_int (-align));
7553 t = fold_convert (TREE_TYPE (ovf), t);
7554 if (crtl->stack_alignment_needed < arg_boundary)
7555 crtl->stack_alignment_needed = arg_boundary;
7556 }
7557 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7558 gimplify_assign (addr, t, pre_p);
7559
7560 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7561 size_int (rsize * UNITS_PER_WORD));
7562 gimplify_assign (unshare_expr (ovf), t, pre_p);
7563
7564 if (container)
7565 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7566
7567 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7568 addr = fold_convert (ptrtype, addr);
7569
7570 if (indirect_p)
7571 addr = build_va_arg_indirect_ref (addr);
7572 return build_va_arg_indirect_ref (addr);
7573 }
7574 \f
7575 /* Return nonzero if OPNUM's MEM should be matched
7576 in movabs* patterns. */
7577
7578 int
7579 ix86_check_movabs (rtx insn, int opnum)
7580 {
7581 rtx set, mem;
7582
7583 set = PATTERN (insn);
7584 if (GET_CODE (set) == PARALLEL)
7585 set = XVECEXP (set, 0, 0);
7586 gcc_assert (GET_CODE (set) == SET);
7587 mem = XEXP (set, opnum);
7588 while (GET_CODE (mem) == SUBREG)
7589 mem = SUBREG_REG (mem);
7590 gcc_assert (MEM_P (mem));
7591 return (volatile_ok || !MEM_VOLATILE_P (mem));
7592 }
7593 \f
7594 /* Initialize the table of extra 80387 mathematical constants. */
7595
7596 static void
7597 init_ext_80387_constants (void)
7598 {
7599 static const char * cst[5] =
7600 {
7601 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7602 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7603 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7604 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7605 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7606 };
7607 int i;
7608
7609 for (i = 0; i < 5; i++)
7610 {
7611 real_from_string (&ext_80387_constants_table[i], cst[i]);
7612 /* Ensure each constant is rounded to XFmode precision. */
7613 real_convert (&ext_80387_constants_table[i],
7614 XFmode, &ext_80387_constants_table[i]);
7615 }
7616
7617 ext_80387_constants_init = 1;
7618 }
7619
7620 /* Return true if the constant is something that can be loaded with
7621 a special instruction. */
7622
7623 int
7624 standard_80387_constant_p (rtx x)
7625 {
7626 enum machine_mode mode = GET_MODE (x);
7627
7628 REAL_VALUE_TYPE r;
7629
7630 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7631 return -1;
7632
7633 if (x == CONST0_RTX (mode))
7634 return 1;
7635 if (x == CONST1_RTX (mode))
7636 return 2;
7637
7638 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7639
7640 /* For XFmode constants, try to find a special 80387 instruction when
7641 optimizing for size or on those CPUs that benefit from them. */
7642 if (mode == XFmode
7643 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7644 {
7645 int i;
7646
7647 if (! ext_80387_constants_init)
7648 init_ext_80387_constants ();
7649
7650 for (i = 0; i < 5; i++)
7651 if (real_identical (&r, &ext_80387_constants_table[i]))
7652 return i + 3;
7653 }
7654
7655 /* Load of the constant -0.0 or -1.0 will be split as
7656 fldz;fchs or fld1;fchs sequence. */
7657 if (real_isnegzero (&r))
7658 return 8;
7659 if (real_identical (&r, &dconstm1))
7660 return 9;
7661
7662 return 0;
7663 }
7664
7665 /* Return the opcode of the special instruction to be used to load
7666 the constant X. */
7667
7668 const char *
7669 standard_80387_constant_opcode (rtx x)
7670 {
7671 switch (standard_80387_constant_p (x))
7672 {
7673 case 1:
7674 return "fldz";
7675 case 2:
7676 return "fld1";
7677 case 3:
7678 return "fldlg2";
7679 case 4:
7680 return "fldln2";
7681 case 5:
7682 return "fldl2e";
7683 case 6:
7684 return "fldl2t";
7685 case 7:
7686 return "fldpi";
7687 case 8:
7688 case 9:
7689 return "#";
7690 default:
7691 gcc_unreachable ();
7692 }
7693 }
7694
7695 /* Return the CONST_DOUBLE representing the 80387 constant that is
7696 loaded by the specified special instruction. The argument IDX
7697 matches the return value from standard_80387_constant_p. */
7698
7699 rtx
7700 standard_80387_constant_rtx (int idx)
7701 {
7702 int i;
7703
7704 if (! ext_80387_constants_init)
7705 init_ext_80387_constants ();
7706
7707 switch (idx)
7708 {
7709 case 3:
7710 case 4:
7711 case 5:
7712 case 6:
7713 case 7:
7714 i = idx - 3;
7715 break;
7716
7717 default:
7718 gcc_unreachable ();
7719 }
7720
7721 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7722 XFmode);
7723 }
7724
7725 /* Return 1 if X is all 0s and 2 if x is all 1s
7726 in supported SSE vector mode. */
7727
7728 int
7729 standard_sse_constant_p (rtx x)
7730 {
7731 enum machine_mode mode = GET_MODE (x);
7732
7733 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7734 return 1;
7735 if (vector_all_ones_operand (x, mode))
7736 switch (mode)
7737 {
7738 case V16QImode:
7739 case V8HImode:
7740 case V4SImode:
7741 case V2DImode:
7742 if (TARGET_SSE2)
7743 return 2;
7744 default:
7745 break;
7746 }
7747
7748 return 0;
7749 }
7750
7751 /* Return the opcode of the special instruction to be used to load
7752 the constant X. */
7753
7754 const char *
7755 standard_sse_constant_opcode (rtx insn, rtx x)
7756 {
7757 switch (standard_sse_constant_p (x))
7758 {
7759 case 1:
7760 switch (get_attr_mode (insn))
7761 {
7762 case MODE_V4SF:
7763 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7764 case MODE_V2DF:
7765 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7766 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7767 else
7768 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7769 case MODE_TI:
7770 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7771 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7772 else
7773 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7774 case MODE_V8SF:
7775 return "vxorps\t%x0, %x0, %x0";
7776 case MODE_V4DF:
7777 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7778 return "vxorps\t%x0, %x0, %x0";
7779 else
7780 return "vxorpd\t%x0, %x0, %x0";
7781 case MODE_OI:
7782 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7783 return "vxorps\t%x0, %x0, %x0";
7784 else
7785 return "vpxor\t%x0, %x0, %x0";
7786 default:
7787 break;
7788 }
7789 case 2:
7790 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7791 default:
7792 break;
7793 }
7794 gcc_unreachable ();
7795 }
7796
7797 /* Returns 1 if OP contains a symbol reference */
7798
7799 int
7800 symbolic_reference_mentioned_p (rtx op)
7801 {
7802 const char *fmt;
7803 int i;
7804
7805 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7806 return 1;
7807
7808 fmt = GET_RTX_FORMAT (GET_CODE (op));
7809 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7810 {
7811 if (fmt[i] == 'E')
7812 {
7813 int j;
7814
7815 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7816 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7817 return 1;
7818 }
7819
7820 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7821 return 1;
7822 }
7823
7824 return 0;
7825 }
7826
7827 /* Return 1 if it is appropriate to emit `ret' instructions in the
7828 body of a function. Do this only if the epilogue is simple, needing a
7829 couple of insns. Prior to reloading, we can't tell how many registers
7830 must be saved, so return 0 then. Return 0 if there is no frame
7831 marker to de-allocate. */
7832
7833 int
7834 ix86_can_use_return_insn_p (void)
7835 {
7836 struct ix86_frame frame;
7837
7838 if (! reload_completed || frame_pointer_needed)
7839 return 0;
7840
7841 /* Don't allow more than 32 pop, since that's all we can do
7842 with one instruction. */
7843 if (crtl->args.pops_args
7844 && crtl->args.size >= 32768)
7845 return 0;
7846
7847 ix86_compute_frame_layout (&frame);
7848 return frame.to_allocate == 0 && frame.padding0 == 0
7849 && (frame.nregs + frame.nsseregs) == 0;
7850 }
7851 \f
7852 /* Value should be nonzero if functions must have frame pointers.
7853 Zero means the frame pointer need not be set up (and parms may
7854 be accessed via the stack pointer) in functions that seem suitable. */
7855
7856 static bool
7857 ix86_frame_pointer_required (void)
7858 {
7859 /* If we accessed previous frames, then the generated code expects
7860 to be able to access the saved ebp value in our frame. */
7861 if (cfun->machine->accesses_prev_frame)
7862 return true;
7863
7864 /* Several x86 os'es need a frame pointer for other reasons,
7865 usually pertaining to setjmp. */
7866 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7867 return true;
7868
7869 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7870 the frame pointer by default. Turn it back on now if we've not
7871 got a leaf function. */
7872 if (TARGET_OMIT_LEAF_FRAME_POINTER
7873 && (!current_function_is_leaf
7874 || ix86_current_function_calls_tls_descriptor))
7875 return true;
7876
7877 if (crtl->profile)
7878 return true;
7879
7880 return false;
7881 }
7882
7883 /* Record that the current function accesses previous call frames. */
7884
7885 void
7886 ix86_setup_frame_addresses (void)
7887 {
7888 cfun->machine->accesses_prev_frame = 1;
7889 }
7890 \f
7891 #ifndef USE_HIDDEN_LINKONCE
7892 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7893 # define USE_HIDDEN_LINKONCE 1
7894 # else
7895 # define USE_HIDDEN_LINKONCE 0
7896 # endif
7897 #endif
7898
7899 static int pic_labels_used;
7900
7901 /* Fills in the label name that should be used for a pc thunk for
7902 the given register. */
7903
7904 static void
7905 get_pc_thunk_name (char name[32], unsigned int regno)
7906 {
7907 gcc_assert (!TARGET_64BIT);
7908
7909 if (USE_HIDDEN_LINKONCE)
7910 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7911 else
7912 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7913 }
7914
7915
7916 /* This function generates code for -fpic that loads %ebx with
7917 the return address of the caller and then returns. */
7918
7919 static void
7920 ix86_code_end (void)
7921 {
7922 rtx xops[2];
7923 int regno;
7924
7925 for (regno = 0; regno < 8; ++regno)
7926 {
7927 char name[32];
7928 tree decl;
7929
7930 if (! ((pic_labels_used >> regno) & 1))
7931 continue;
7932
7933 get_pc_thunk_name (name, regno);
7934
7935 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
7936 get_identifier (name),
7937 build_function_type (void_type_node, void_list_node));
7938 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
7939 NULL_TREE, void_type_node);
7940 TREE_PUBLIC (decl) = 1;
7941 TREE_STATIC (decl) = 1;
7942
7943 #if TARGET_MACHO
7944 if (TARGET_MACHO)
7945 {
7946 switch_to_section (darwin_sections[text_coal_section]);
7947 fputs ("\t.weak_definition\t", asm_out_file);
7948 assemble_name (asm_out_file, name);
7949 fputs ("\n\t.private_extern\t", asm_out_file);
7950 assemble_name (asm_out_file, name);
7951 fputs ("\n", asm_out_file);
7952 ASM_OUTPUT_LABEL (asm_out_file, name);
7953 DECL_WEAK (decl) = 1;
7954 }
7955 else
7956 #endif
7957 if (USE_HIDDEN_LINKONCE)
7958 {
7959 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7960
7961 targetm.asm_out.unique_section (decl, 0);
7962 switch_to_section (get_named_section (decl, NULL, 0));
7963
7964 targetm.asm_out.globalize_label (asm_out_file, name);
7965 fputs ("\t.hidden\t", asm_out_file);
7966 assemble_name (asm_out_file, name);
7967 putc ('\n', asm_out_file);
7968 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7969 }
7970 else
7971 {
7972 switch_to_section (text_section);
7973 ASM_OUTPUT_LABEL (asm_out_file, name);
7974 }
7975
7976 DECL_INITIAL (decl) = make_node (BLOCK);
7977 current_function_decl = decl;
7978 init_function_start (decl);
7979 first_function_block_is_cold = false;
7980 /* Make sure unwind info is emitted for the thunk if needed. */
7981 final_start_function (emit_barrier (), asm_out_file, 1);
7982
7983 xops[0] = gen_rtx_REG (Pmode, regno);
7984 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7985 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7986 output_asm_insn ("ret", xops);
7987 final_end_function ();
7988 init_insn_lengths ();
7989 free_after_compilation (cfun);
7990 set_cfun (NULL);
7991 current_function_decl = NULL;
7992 }
7993 }
7994
7995 /* Emit code for the SET_GOT patterns. */
7996
7997 const char *
7998 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7999 {
8000 rtx xops[3];
8001
8002 xops[0] = dest;
8003
8004 if (TARGET_VXWORKS_RTP && flag_pic)
8005 {
8006 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8007 xops[2] = gen_rtx_MEM (Pmode,
8008 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8009 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8010
8011 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8012 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8013 an unadorned address. */
8014 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8015 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8016 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8017 return "";
8018 }
8019
8020 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8021
8022 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8023 {
8024 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8025
8026 if (!flag_pic)
8027 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8028 else
8029 {
8030 output_asm_insn ("call\t%a2", xops);
8031 #ifdef DWARF2_UNWIND_INFO
8032 /* The call to next label acts as a push. */
8033 if (dwarf2out_do_frame ())
8034 {
8035 rtx insn;
8036 start_sequence ();
8037 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8038 gen_rtx_PLUS (Pmode,
8039 stack_pointer_rtx,
8040 GEN_INT (-4))));
8041 RTX_FRAME_RELATED_P (insn) = 1;
8042 dwarf2out_frame_debug (insn, true);
8043 end_sequence ();
8044 }
8045 #endif
8046 }
8047
8048 #if TARGET_MACHO
8049 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8050 is what will be referenced by the Mach-O PIC subsystem. */
8051 if (!label)
8052 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8053 #endif
8054
8055 targetm.asm_out.internal_label (asm_out_file, "L",
8056 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8057
8058 if (flag_pic)
8059 {
8060 output_asm_insn ("pop%z0\t%0", xops);
8061 #ifdef DWARF2_UNWIND_INFO
8062 /* The pop is a pop and clobbers dest, but doesn't restore it
8063 for unwind info purposes. */
8064 if (dwarf2out_do_frame ())
8065 {
8066 rtx insn;
8067 start_sequence ();
8068 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8069 dwarf2out_frame_debug (insn, true);
8070 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8071 gen_rtx_PLUS (Pmode,
8072 stack_pointer_rtx,
8073 GEN_INT (4))));
8074 RTX_FRAME_RELATED_P (insn) = 1;
8075 dwarf2out_frame_debug (insn, true);
8076 end_sequence ();
8077 }
8078 #endif
8079 }
8080 }
8081 else
8082 {
8083 char name[32];
8084 get_pc_thunk_name (name, REGNO (dest));
8085 pic_labels_used |= 1 << REGNO (dest);
8086
8087 #ifdef DWARF2_UNWIND_INFO
8088 /* Ensure all queued register saves are flushed before the
8089 call. */
8090 if (dwarf2out_do_frame ())
8091 {
8092 rtx insn;
8093 start_sequence ();
8094 insn = emit_barrier ();
8095 end_sequence ();
8096 dwarf2out_frame_debug (insn, false);
8097 }
8098 #endif
8099 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8100 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8101 output_asm_insn ("call\t%X2", xops);
8102 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8103 is what will be referenced by the Mach-O PIC subsystem. */
8104 #if TARGET_MACHO
8105 if (!label)
8106 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8107 else
8108 targetm.asm_out.internal_label (asm_out_file, "L",
8109 CODE_LABEL_NUMBER (label));
8110 #endif
8111 }
8112
8113 if (TARGET_MACHO)
8114 return "";
8115
8116 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8117 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8118 else
8119 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8120
8121 return "";
8122 }
8123
8124 /* Generate an "push" pattern for input ARG. */
8125
8126 static rtx
8127 gen_push (rtx arg)
8128 {
8129 if (ix86_cfa_state->reg == stack_pointer_rtx)
8130 ix86_cfa_state->offset += UNITS_PER_WORD;
8131
8132 return gen_rtx_SET (VOIDmode,
8133 gen_rtx_MEM (Pmode,
8134 gen_rtx_PRE_DEC (Pmode,
8135 stack_pointer_rtx)),
8136 arg);
8137 }
8138
8139 /* Return >= 0 if there is an unused call-clobbered register available
8140 for the entire function. */
8141
8142 static unsigned int
8143 ix86_select_alt_pic_regnum (void)
8144 {
8145 if (current_function_is_leaf && !crtl->profile
8146 && !ix86_current_function_calls_tls_descriptor)
8147 {
8148 int i, drap;
8149 /* Can't use the same register for both PIC and DRAP. */
8150 if (crtl->drap_reg)
8151 drap = REGNO (crtl->drap_reg);
8152 else
8153 drap = -1;
8154 for (i = 2; i >= 0; --i)
8155 if (i != drap && !df_regs_ever_live_p (i))
8156 return i;
8157 }
8158
8159 return INVALID_REGNUM;
8160 }
8161
8162 /* Return 1 if we need to save REGNO. */
8163 static int
8164 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8165 {
8166 if (pic_offset_table_rtx
8167 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8168 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8169 || crtl->profile
8170 || crtl->calls_eh_return
8171 || crtl->uses_const_pool))
8172 {
8173 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8174 return 0;
8175 return 1;
8176 }
8177
8178 if (crtl->calls_eh_return && maybe_eh_return)
8179 {
8180 unsigned i;
8181 for (i = 0; ; i++)
8182 {
8183 unsigned test = EH_RETURN_DATA_REGNO (i);
8184 if (test == INVALID_REGNUM)
8185 break;
8186 if (test == regno)
8187 return 1;
8188 }
8189 }
8190
8191 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8192 return 1;
8193
8194 return (df_regs_ever_live_p (regno)
8195 && !call_used_regs[regno]
8196 && !fixed_regs[regno]
8197 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8198 }
8199
8200 /* Return number of saved general prupose registers. */
8201
8202 static int
8203 ix86_nsaved_regs (void)
8204 {
8205 int nregs = 0;
8206 int regno;
8207
8208 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8209 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8210 nregs ++;
8211 return nregs;
8212 }
8213
8214 /* Return number of saved SSE registrers. */
8215
8216 static int
8217 ix86_nsaved_sseregs (void)
8218 {
8219 int nregs = 0;
8220 int regno;
8221
8222 if (ix86_cfun_abi () != MS_ABI)
8223 return 0;
8224 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8225 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8226 nregs ++;
8227 return nregs;
8228 }
8229
8230 /* Given FROM and TO register numbers, say whether this elimination is
8231 allowed. If stack alignment is needed, we can only replace argument
8232 pointer with hard frame pointer, or replace frame pointer with stack
8233 pointer. Otherwise, frame pointer elimination is automatically
8234 handled and all other eliminations are valid. */
8235
8236 static bool
8237 ix86_can_eliminate (const int from, const int to)
8238 {
8239 if (stack_realign_fp)
8240 return ((from == ARG_POINTER_REGNUM
8241 && to == HARD_FRAME_POINTER_REGNUM)
8242 || (from == FRAME_POINTER_REGNUM
8243 && to == STACK_POINTER_REGNUM));
8244 else
8245 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8246 }
8247
8248 /* Return the offset between two registers, one to be eliminated, and the other
8249 its replacement, at the start of a routine. */
8250
8251 HOST_WIDE_INT
8252 ix86_initial_elimination_offset (int from, int to)
8253 {
8254 struct ix86_frame frame;
8255 ix86_compute_frame_layout (&frame);
8256
8257 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8258 return frame.hard_frame_pointer_offset;
8259 else if (from == FRAME_POINTER_REGNUM
8260 && to == HARD_FRAME_POINTER_REGNUM)
8261 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8262 else
8263 {
8264 gcc_assert (to == STACK_POINTER_REGNUM);
8265
8266 if (from == ARG_POINTER_REGNUM)
8267 return frame.stack_pointer_offset;
8268
8269 gcc_assert (from == FRAME_POINTER_REGNUM);
8270 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8271 }
8272 }
8273
8274 /* In a dynamically-aligned function, we can't know the offset from
8275 stack pointer to frame pointer, so we must ensure that setjmp
8276 eliminates fp against the hard fp (%ebp) rather than trying to
8277 index from %esp up to the top of the frame across a gap that is
8278 of unknown (at compile-time) size. */
8279 static rtx
8280 ix86_builtin_setjmp_frame_value (void)
8281 {
8282 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8283 }
8284
8285 /* Fill structure ix86_frame about frame of currently computed function. */
8286
8287 static void
8288 ix86_compute_frame_layout (struct ix86_frame *frame)
8289 {
8290 unsigned int stack_alignment_needed;
8291 HOST_WIDE_INT offset;
8292 unsigned int preferred_alignment;
8293 HOST_WIDE_INT size = get_frame_size ();
8294
8295 frame->nregs = ix86_nsaved_regs ();
8296 frame->nsseregs = ix86_nsaved_sseregs ();
8297
8298 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8299 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8300
8301 /* MS ABI seem to require stack alignment to be always 16 except for function
8302 prologues and leaf. */
8303 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8304 && (!current_function_is_leaf || cfun->calls_alloca != 0
8305 || ix86_current_function_calls_tls_descriptor))
8306 {
8307 preferred_alignment = 16;
8308 stack_alignment_needed = 16;
8309 crtl->preferred_stack_boundary = 128;
8310 crtl->stack_alignment_needed = 128;
8311 }
8312
8313 gcc_assert (!size || stack_alignment_needed);
8314 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8315 gcc_assert (preferred_alignment <= stack_alignment_needed);
8316
8317 /* During reload iteration the amount of registers saved can change.
8318 Recompute the value as needed. Do not recompute when amount of registers
8319 didn't change as reload does multiple calls to the function and does not
8320 expect the decision to change within single iteration. */
8321 if (!optimize_function_for_size_p (cfun)
8322 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8323 {
8324 int count = frame->nregs;
8325 struct cgraph_node *node = cgraph_node (current_function_decl);
8326
8327 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8328 /* The fast prologue uses move instead of push to save registers. This
8329 is significantly longer, but also executes faster as modern hardware
8330 can execute the moves in parallel, but can't do that for push/pop.
8331
8332 Be careful about choosing what prologue to emit: When function takes
8333 many instructions to execute we may use slow version as well as in
8334 case function is known to be outside hot spot (this is known with
8335 feedback only). Weight the size of function by number of registers
8336 to save as it is cheap to use one or two push instructions but very
8337 slow to use many of them. */
8338 if (count)
8339 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8340 if (node->frequency < NODE_FREQUENCY_NORMAL
8341 || (flag_branch_probabilities
8342 && node->frequency < NODE_FREQUENCY_HOT))
8343 cfun->machine->use_fast_prologue_epilogue = false;
8344 else
8345 cfun->machine->use_fast_prologue_epilogue
8346 = !expensive_function_p (count);
8347 }
8348 if (TARGET_PROLOGUE_USING_MOVE
8349 && cfun->machine->use_fast_prologue_epilogue)
8350 frame->save_regs_using_mov = true;
8351 else
8352 frame->save_regs_using_mov = false;
8353
8354 /* If static stack checking is enabled and done with probes, the registers
8355 need to be saved before allocating the frame. */
8356 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8357 frame->save_regs_using_mov = false;
8358
8359 /* Skip return address. */
8360 offset = UNITS_PER_WORD;
8361
8362 /* Skip pushed static chain. */
8363 if (ix86_static_chain_on_stack)
8364 offset += UNITS_PER_WORD;
8365
8366 /* Skip saved base pointer. */
8367 if (frame_pointer_needed)
8368 offset += UNITS_PER_WORD;
8369
8370 frame->hard_frame_pointer_offset = offset;
8371
8372 /* Set offset to aligned because the realigned frame starts from
8373 here. */
8374 if (stack_realign_fp)
8375 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
8376
8377 /* Register save area */
8378 offset += frame->nregs * UNITS_PER_WORD;
8379
8380 /* Align SSE reg save area. */
8381 if (frame->nsseregs)
8382 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
8383 else
8384 frame->padding0 = 0;
8385
8386 /* SSE register save area. */
8387 offset += frame->padding0 + frame->nsseregs * 16;
8388
8389 /* Va-arg area */
8390 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8391 offset += frame->va_arg_size;
8392
8393 /* Align start of frame for local function. */
8394 frame->padding1 = ((offset + stack_alignment_needed - 1)
8395 & -stack_alignment_needed) - offset;
8396
8397 offset += frame->padding1;
8398
8399 /* Frame pointer points here. */
8400 frame->frame_pointer_offset = offset;
8401
8402 offset += size;
8403
8404 /* Add outgoing arguments area. Can be skipped if we eliminated
8405 all the function calls as dead code.
8406 Skipping is however impossible when function calls alloca. Alloca
8407 expander assumes that last crtl->outgoing_args_size
8408 of stack frame are unused. */
8409 if (ACCUMULATE_OUTGOING_ARGS
8410 && (!current_function_is_leaf || cfun->calls_alloca
8411 || ix86_current_function_calls_tls_descriptor))
8412 {
8413 offset += crtl->outgoing_args_size;
8414 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8415 }
8416 else
8417 frame->outgoing_arguments_size = 0;
8418
8419 /* Align stack boundary. Only needed if we're calling another function
8420 or using alloca. */
8421 if (!current_function_is_leaf || cfun->calls_alloca
8422 || ix86_current_function_calls_tls_descriptor)
8423 frame->padding2 = ((offset + preferred_alignment - 1)
8424 & -preferred_alignment) - offset;
8425 else
8426 frame->padding2 = 0;
8427
8428 offset += frame->padding2;
8429
8430 /* We've reached end of stack frame. */
8431 frame->stack_pointer_offset = offset;
8432
8433 /* Size prologue needs to allocate. */
8434 frame->to_allocate =
8435 (size + frame->padding1 + frame->padding2
8436 + frame->outgoing_arguments_size + frame->va_arg_size);
8437
8438 if ((!frame->to_allocate && frame->nregs <= 1)
8439 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
8440 frame->save_regs_using_mov = false;
8441
8442 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8443 && current_function_sp_is_unchanging
8444 && current_function_is_leaf
8445 && !ix86_current_function_calls_tls_descriptor)
8446 {
8447 frame->red_zone_size = frame->to_allocate;
8448 if (frame->save_regs_using_mov)
8449 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8450 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8451 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8452 }
8453 else
8454 frame->red_zone_size = 0;
8455 frame->to_allocate -= frame->red_zone_size;
8456 frame->stack_pointer_offset -= frame->red_zone_size;
8457 }
8458
8459 /* Emit code to save registers in the prologue. */
8460
8461 static void
8462 ix86_emit_save_regs (void)
8463 {
8464 unsigned int regno;
8465 rtx insn;
8466
8467 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8468 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8469 {
8470 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8471 RTX_FRAME_RELATED_P (insn) = 1;
8472 }
8473 }
8474
8475 /* Emit code to save registers using MOV insns. First register
8476 is restored from POINTER + OFFSET. */
8477 static void
8478 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8479 {
8480 unsigned int regno;
8481 rtx insn;
8482
8483 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8484 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8485 {
8486 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8487 Pmode, offset),
8488 gen_rtx_REG (Pmode, regno));
8489 RTX_FRAME_RELATED_P (insn) = 1;
8490 offset += UNITS_PER_WORD;
8491 }
8492 }
8493
8494 /* Emit code to save registers using MOV insns. First register
8495 is restored from POINTER + OFFSET. */
8496 static void
8497 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8498 {
8499 unsigned int regno;
8500 rtx insn;
8501 rtx mem;
8502
8503 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8504 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8505 {
8506 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8507 set_mem_align (mem, 128);
8508 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8509 RTX_FRAME_RELATED_P (insn) = 1;
8510 offset += 16;
8511 }
8512 }
8513
8514 static GTY(()) rtx queued_cfa_restores;
8515
8516 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8517 manipulation insn. Don't add it if the previously
8518 saved value will be left untouched within stack red-zone till return,
8519 as unwinders can find the same value in the register and
8520 on the stack. */
8521
8522 static void
8523 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
8524 {
8525 if (TARGET_RED_ZONE
8526 && !TARGET_64BIT_MS_ABI
8527 && red_offset + RED_ZONE_SIZE >= 0
8528 && crtl->args.pops_args < 65536)
8529 return;
8530
8531 if (insn)
8532 {
8533 add_reg_note (insn, REG_CFA_RESTORE, reg);
8534 RTX_FRAME_RELATED_P (insn) = 1;
8535 }
8536 else
8537 queued_cfa_restores
8538 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8539 }
8540
8541 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8542
8543 static void
8544 ix86_add_queued_cfa_restore_notes (rtx insn)
8545 {
8546 rtx last;
8547 if (!queued_cfa_restores)
8548 return;
8549 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8550 ;
8551 XEXP (last, 1) = REG_NOTES (insn);
8552 REG_NOTES (insn) = queued_cfa_restores;
8553 queued_cfa_restores = NULL_RTX;
8554 RTX_FRAME_RELATED_P (insn) = 1;
8555 }
8556
8557 /* Expand prologue or epilogue stack adjustment.
8558 The pattern exist to put a dependency on all ebp-based memory accesses.
8559 STYLE should be negative if instructions should be marked as frame related,
8560 zero if %r11 register is live and cannot be freely used and positive
8561 otherwise. */
8562
8563 static void
8564 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8565 int style, bool set_cfa)
8566 {
8567 rtx insn;
8568
8569 if (! TARGET_64BIT)
8570 insn = emit_insn (gen_pro_epilogue_adjust_stack_si_1 (dest, src, offset));
8571 else if (x86_64_immediate_operand (offset, DImode))
8572 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_1 (dest, src, offset));
8573 else
8574 {
8575 rtx tmp;
8576 /* r11 is used by indirect sibcall return as well, set before the
8577 epilogue and used after the epilogue. */
8578 if (style)
8579 tmp = gen_rtx_REG (DImode, R11_REG);
8580 else
8581 {
8582 gcc_assert (src != hard_frame_pointer_rtx
8583 && dest != hard_frame_pointer_rtx);
8584 tmp = hard_frame_pointer_rtx;
8585 }
8586 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8587 if (style < 0)
8588 RTX_FRAME_RELATED_P (insn) = 1;
8589 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_2 (dest, src, tmp,
8590 offset));
8591 }
8592
8593 if (style >= 0)
8594 ix86_add_queued_cfa_restore_notes (insn);
8595
8596 if (set_cfa)
8597 {
8598 rtx r;
8599
8600 gcc_assert (ix86_cfa_state->reg == src);
8601 ix86_cfa_state->offset += INTVAL (offset);
8602 ix86_cfa_state->reg = dest;
8603
8604 r = gen_rtx_PLUS (Pmode, src, offset);
8605 r = gen_rtx_SET (VOIDmode, dest, r);
8606 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8607 RTX_FRAME_RELATED_P (insn) = 1;
8608 }
8609 else if (style < 0)
8610 RTX_FRAME_RELATED_P (insn) = 1;
8611 }
8612
8613 /* Find an available register to be used as dynamic realign argument
8614 pointer regsiter. Such a register will be written in prologue and
8615 used in begin of body, so it must not be
8616 1. parameter passing register.
8617 2. GOT pointer.
8618 We reuse static-chain register if it is available. Otherwise, we
8619 use DI for i386 and R13 for x86-64. We chose R13 since it has
8620 shorter encoding.
8621
8622 Return: the regno of chosen register. */
8623
8624 static unsigned int
8625 find_drap_reg (void)
8626 {
8627 tree decl = cfun->decl;
8628
8629 if (TARGET_64BIT)
8630 {
8631 /* Use R13 for nested function or function need static chain.
8632 Since function with tail call may use any caller-saved
8633 registers in epilogue, DRAP must not use caller-saved
8634 register in such case. */
8635 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8636 return R13_REG;
8637
8638 return R10_REG;
8639 }
8640 else
8641 {
8642 /* Use DI for nested function or function need static chain.
8643 Since function with tail call may use any caller-saved
8644 registers in epilogue, DRAP must not use caller-saved
8645 register in such case. */
8646 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8647 return DI_REG;
8648
8649 /* Reuse static chain register if it isn't used for parameter
8650 passing. */
8651 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8652 && !lookup_attribute ("fastcall",
8653 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8654 && !lookup_attribute ("thiscall",
8655 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8656 return CX_REG;
8657 else
8658 return DI_REG;
8659 }
8660 }
8661
8662 /* Return minimum incoming stack alignment. */
8663
8664 static unsigned int
8665 ix86_minimum_incoming_stack_boundary (bool sibcall)
8666 {
8667 unsigned int incoming_stack_boundary;
8668
8669 /* Prefer the one specified at command line. */
8670 if (ix86_user_incoming_stack_boundary)
8671 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8672 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8673 if -mstackrealign is used, it isn't used for sibcall check and
8674 estimated stack alignment is 128bit. */
8675 else if (!sibcall
8676 && !TARGET_64BIT
8677 && ix86_force_align_arg_pointer
8678 && crtl->stack_alignment_estimated == 128)
8679 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8680 else
8681 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8682
8683 /* Incoming stack alignment can be changed on individual functions
8684 via force_align_arg_pointer attribute. We use the smallest
8685 incoming stack boundary. */
8686 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8687 && lookup_attribute (ix86_force_align_arg_pointer_string,
8688 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8689 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8690
8691 /* The incoming stack frame has to be aligned at least at
8692 parm_stack_boundary. */
8693 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8694 incoming_stack_boundary = crtl->parm_stack_boundary;
8695
8696 /* Stack at entrance of main is aligned by runtime. We use the
8697 smallest incoming stack boundary. */
8698 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8699 && DECL_NAME (current_function_decl)
8700 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8701 && DECL_FILE_SCOPE_P (current_function_decl))
8702 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8703
8704 return incoming_stack_boundary;
8705 }
8706
8707 /* Update incoming stack boundary and estimated stack alignment. */
8708
8709 static void
8710 ix86_update_stack_boundary (void)
8711 {
8712 ix86_incoming_stack_boundary
8713 = ix86_minimum_incoming_stack_boundary (false);
8714
8715 /* x86_64 vararg needs 16byte stack alignment for register save
8716 area. */
8717 if (TARGET_64BIT
8718 && cfun->stdarg
8719 && crtl->stack_alignment_estimated < 128)
8720 crtl->stack_alignment_estimated = 128;
8721 }
8722
8723 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8724 needed or an rtx for DRAP otherwise. */
8725
8726 static rtx
8727 ix86_get_drap_rtx (void)
8728 {
8729 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8730 crtl->need_drap = true;
8731
8732 if (stack_realign_drap)
8733 {
8734 /* Assign DRAP to vDRAP and returns vDRAP */
8735 unsigned int regno = find_drap_reg ();
8736 rtx drap_vreg;
8737 rtx arg_ptr;
8738 rtx seq, insn;
8739
8740 arg_ptr = gen_rtx_REG (Pmode, regno);
8741 crtl->drap_reg = arg_ptr;
8742
8743 start_sequence ();
8744 drap_vreg = copy_to_reg (arg_ptr);
8745 seq = get_insns ();
8746 end_sequence ();
8747
8748 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8749 if (!optimize)
8750 {
8751 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8752 RTX_FRAME_RELATED_P (insn) = 1;
8753 }
8754 return drap_vreg;
8755 }
8756 else
8757 return NULL;
8758 }
8759
8760 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8761
8762 static rtx
8763 ix86_internal_arg_pointer (void)
8764 {
8765 return virtual_incoming_args_rtx;
8766 }
8767
8768 struct scratch_reg {
8769 rtx reg;
8770 bool saved;
8771 };
8772
8773 /* Return a short-lived scratch register for use on function entry.
8774 In 32-bit mode, it is valid only after the registers are saved
8775 in the prologue. This register must be released by means of
8776 release_scratch_register_on_entry once it is dead. */
8777
8778 static void
8779 get_scratch_register_on_entry (struct scratch_reg *sr)
8780 {
8781 int regno;
8782
8783 sr->saved = false;
8784
8785 if (TARGET_64BIT)
8786 {
8787 /* We always use R11 in 64-bit mode. */
8788 regno = R11_REG;
8789 }
8790 else
8791 {
8792 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
8793 bool fastcall_p
8794 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8795 bool static_chain_p = DECL_STATIC_CHAIN (decl);
8796 int regparm = ix86_function_regparm (fntype, decl);
8797 int drap_regno
8798 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
8799
8800 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
8801 for the static chain register. */
8802 if ((regparm < 1 || (fastcall_p && !static_chain_p))
8803 && drap_regno != AX_REG)
8804 regno = AX_REG;
8805 else if (regparm < 2 && drap_regno != DX_REG)
8806 regno = DX_REG;
8807 /* ecx is the static chain register. */
8808 else if (regparm < 3 && !fastcall_p && !static_chain_p
8809 && drap_regno != CX_REG)
8810 regno = CX_REG;
8811 else if (ix86_save_reg (BX_REG, true))
8812 regno = BX_REG;
8813 /* esi is the static chain register. */
8814 else if (!(regparm == 3 && static_chain_p)
8815 && ix86_save_reg (SI_REG, true))
8816 regno = SI_REG;
8817 else if (ix86_save_reg (DI_REG, true))
8818 regno = DI_REG;
8819 else
8820 {
8821 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
8822 sr->saved = true;
8823 }
8824 }
8825
8826 sr->reg = gen_rtx_REG (Pmode, regno);
8827 if (sr->saved)
8828 {
8829 rtx insn = emit_insn (gen_push (sr->reg));
8830 RTX_FRAME_RELATED_P (insn) = 1;
8831 }
8832 }
8833
8834 /* Release a scratch register obtained from the preceding function. */
8835
8836 static void
8837 release_scratch_register_on_entry (struct scratch_reg *sr)
8838 {
8839 if (sr->saved)
8840 {
8841 rtx x, insn = emit_insn (ix86_gen_pop1 (sr->reg));
8842
8843 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
8844 RTX_FRAME_RELATED_P (insn) = 1;
8845 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
8846 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
8847 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
8848 }
8849 }
8850
8851 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
8852
8853 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
8854
8855 static void
8856 ix86_adjust_stack_and_probe (HOST_WIDE_INT size)
8857 {
8858 /* We skip the probe for the first interval + a small dope of 4 words and
8859 probe that many bytes past the specified size to maintain a protection
8860 area at the botton of the stack. */
8861 const int dope = 4 * UNITS_PER_WORD;
8862 rtx size_rtx = GEN_INT (size);
8863
8864 /* See if we have a constant small number of probes to generate. If so,
8865 that's the easy case. The run-time loop is made up of 11 insns in the
8866 generic case while the compile-time loop is made up of 3+2*(n-1) insns
8867 for n # of intervals. */
8868 if (size <= 5 * PROBE_INTERVAL)
8869 {
8870 HOST_WIDE_INT i, adjust;
8871 bool first_probe = true;
8872
8873 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
8874 values of N from 1 until it exceeds SIZE. If only one probe is
8875 needed, this will not generate any code. Then adjust and probe
8876 to PROBE_INTERVAL + SIZE. */
8877 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
8878 {
8879 if (first_probe)
8880 {
8881 adjust = 2 * PROBE_INTERVAL + dope;
8882 first_probe = false;
8883 }
8884 else
8885 adjust = PROBE_INTERVAL;
8886
8887 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8888 plus_constant (stack_pointer_rtx, -adjust)));
8889 emit_stack_probe (stack_pointer_rtx);
8890 }
8891
8892 if (first_probe)
8893 adjust = size + PROBE_INTERVAL + dope;
8894 else
8895 adjust = size + PROBE_INTERVAL - i;
8896
8897 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8898 plus_constant (stack_pointer_rtx, -adjust)));
8899 emit_stack_probe (stack_pointer_rtx);
8900
8901 /* Adjust back to account for the additional first interval. */
8902 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8903 plus_constant (stack_pointer_rtx,
8904 PROBE_INTERVAL + dope)));
8905 }
8906
8907 /* Otherwise, do the same as above, but in a loop. Note that we must be
8908 extra careful with variables wrapping around because we might be at
8909 the very top (or the very bottom) of the address space and we have
8910 to be able to handle this case properly; in particular, we use an
8911 equality test for the loop condition. */
8912 else
8913 {
8914 HOST_WIDE_INT rounded_size;
8915 struct scratch_reg sr;
8916
8917 get_scratch_register_on_entry (&sr);
8918
8919
8920 /* Step 1: round SIZE to the previous multiple of the interval. */
8921
8922 rounded_size = size & -PROBE_INTERVAL;
8923
8924
8925 /* Step 2: compute initial and final value of the loop counter. */
8926
8927 /* SP = SP_0 + PROBE_INTERVAL. */
8928 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8929 plus_constant (stack_pointer_rtx,
8930 - (PROBE_INTERVAL + dope))));
8931
8932 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
8933 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
8934 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
8935 gen_rtx_PLUS (Pmode, sr.reg,
8936 stack_pointer_rtx)));
8937
8938
8939 /* Step 3: the loop
8940
8941 while (SP != LAST_ADDR)
8942 {
8943 SP = SP + PROBE_INTERVAL
8944 probe at SP
8945 }
8946
8947 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
8948 values of N from 1 until it is equal to ROUNDED_SIZE. */
8949
8950 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
8951
8952
8953 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
8954 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
8955
8956 if (size != rounded_size)
8957 {
8958 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8959 plus_constant (stack_pointer_rtx,
8960 rounded_size - size)));
8961 emit_stack_probe (stack_pointer_rtx);
8962 }
8963
8964 /* Adjust back to account for the additional first interval. */
8965 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8966 plus_constant (stack_pointer_rtx,
8967 PROBE_INTERVAL + dope)));
8968
8969 release_scratch_register_on_entry (&sr);
8970 }
8971
8972 gcc_assert (ix86_cfa_state->reg != stack_pointer_rtx);
8973
8974 /* Make sure nothing is scheduled before we are done. */
8975 emit_insn (gen_blockage ());
8976 }
8977
8978 /* Adjust the stack pointer up to REG while probing it. */
8979
8980 const char *
8981 output_adjust_stack_and_probe (rtx reg)
8982 {
8983 static int labelno = 0;
8984 char loop_lab[32], end_lab[32];
8985 rtx xops[2];
8986
8987 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
8988 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
8989
8990 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8991
8992 /* Jump to END_LAB if SP == LAST_ADDR. */
8993 xops[0] = stack_pointer_rtx;
8994 xops[1] = reg;
8995 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8996 fputs ("\tje\t", asm_out_file);
8997 assemble_name_raw (asm_out_file, end_lab);
8998 fputc ('\n', asm_out_file);
8999
9000 /* SP = SP + PROBE_INTERVAL. */
9001 xops[1] = GEN_INT (PROBE_INTERVAL);
9002 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9003
9004 /* Probe at SP. */
9005 xops[1] = const0_rtx;
9006 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9007
9008 fprintf (asm_out_file, "\tjmp\t");
9009 assemble_name_raw (asm_out_file, loop_lab);
9010 fputc ('\n', asm_out_file);
9011
9012 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9013
9014 return "";
9015 }
9016
9017 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9018 inclusive. These are offsets from the current stack pointer. */
9019
9020 static void
9021 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9022 {
9023 /* See if we have a constant small number of probes to generate. If so,
9024 that's the easy case. The run-time loop is made up of 7 insns in the
9025 generic case while the compile-time loop is made up of n insns for n #
9026 of intervals. */
9027 if (size <= 7 * PROBE_INTERVAL)
9028 {
9029 HOST_WIDE_INT i;
9030
9031 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9032 it exceeds SIZE. If only one probe is needed, this will not
9033 generate any code. Then probe at FIRST + SIZE. */
9034 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9035 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9036
9037 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9038 }
9039
9040 /* Otherwise, do the same as above, but in a loop. Note that we must be
9041 extra careful with variables wrapping around because we might be at
9042 the very top (or the very bottom) of the address space and we have
9043 to be able to handle this case properly; in particular, we use an
9044 equality test for the loop condition. */
9045 else
9046 {
9047 HOST_WIDE_INT rounded_size, last;
9048 struct scratch_reg sr;
9049
9050 get_scratch_register_on_entry (&sr);
9051
9052
9053 /* Step 1: round SIZE to the previous multiple of the interval. */
9054
9055 rounded_size = size & -PROBE_INTERVAL;
9056
9057
9058 /* Step 2: compute initial and final value of the loop counter. */
9059
9060 /* TEST_OFFSET = FIRST. */
9061 emit_move_insn (sr.reg, GEN_INT (-first));
9062
9063 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9064 last = first + rounded_size;
9065
9066
9067 /* Step 3: the loop
9068
9069 while (TEST_ADDR != LAST_ADDR)
9070 {
9071 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9072 probe at TEST_ADDR
9073 }
9074
9075 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9076 until it is equal to ROUNDED_SIZE. */
9077
9078 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9079
9080
9081 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9082 that SIZE is equal to ROUNDED_SIZE. */
9083
9084 if (size != rounded_size)
9085 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9086 stack_pointer_rtx,
9087 sr.reg),
9088 rounded_size - size));
9089
9090 release_scratch_register_on_entry (&sr);
9091 }
9092
9093 /* Make sure nothing is scheduled before we are done. */
9094 emit_insn (gen_blockage ());
9095 }
9096
9097 /* Probe a range of stack addresses from REG to END, inclusive. These are
9098 offsets from the current stack pointer. */
9099
9100 const char *
9101 output_probe_stack_range (rtx reg, rtx end)
9102 {
9103 static int labelno = 0;
9104 char loop_lab[32], end_lab[32];
9105 rtx xops[3];
9106
9107 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9108 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9109
9110 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9111
9112 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9113 xops[0] = reg;
9114 xops[1] = end;
9115 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9116 fputs ("\tje\t", asm_out_file);
9117 assemble_name_raw (asm_out_file, end_lab);
9118 fputc ('\n', asm_out_file);
9119
9120 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9121 xops[1] = GEN_INT (PROBE_INTERVAL);
9122 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9123
9124 /* Probe at TEST_ADDR. */
9125 xops[0] = stack_pointer_rtx;
9126 xops[1] = reg;
9127 xops[2] = const0_rtx;
9128 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9129
9130 fprintf (asm_out_file, "\tjmp\t");
9131 assemble_name_raw (asm_out_file, loop_lab);
9132 fputc ('\n', asm_out_file);
9133
9134 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9135
9136 return "";
9137 }
9138
9139 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9140 to be generated in correct form. */
9141 static void
9142 ix86_finalize_stack_realign_flags (void)
9143 {
9144 /* Check if stack realign is really needed after reload, and
9145 stores result in cfun */
9146 unsigned int incoming_stack_boundary
9147 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9148 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9149 unsigned int stack_realign = (incoming_stack_boundary
9150 < (current_function_is_leaf
9151 ? crtl->max_used_stack_slot_alignment
9152 : crtl->stack_alignment_needed));
9153
9154 if (crtl->stack_realign_finalized)
9155 {
9156 /* After stack_realign_needed is finalized, we can't no longer
9157 change it. */
9158 gcc_assert (crtl->stack_realign_needed == stack_realign);
9159 }
9160 else
9161 {
9162 crtl->stack_realign_needed = stack_realign;
9163 crtl->stack_realign_finalized = true;
9164 }
9165 }
9166
9167 /* Expand the prologue into a bunch of separate insns. */
9168
9169 void
9170 ix86_expand_prologue (void)
9171 {
9172 rtx insn;
9173 bool pic_reg_used;
9174 struct ix86_frame frame;
9175 HOST_WIDE_INT allocate;
9176 int gen_frame_pointer = frame_pointer_needed;
9177
9178 ix86_finalize_stack_realign_flags ();
9179
9180 /* DRAP should not coexist with stack_realign_fp */
9181 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9182
9183 /* Initialize CFA state for before the prologue. */
9184 ix86_cfa_state->reg = stack_pointer_rtx;
9185 ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
9186
9187 ix86_compute_frame_layout (&frame);
9188
9189 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9190 {
9191 rtx push, mov;
9192
9193 /* Make sure the function starts with
9194 8b ff movl.s %edi,%edi (emited by ix86_asm_output_function_label)
9195 55 push %ebp
9196 8b ec movl.s %esp,%ebp
9197
9198 This matches the hookable function prologue in Win32 API
9199 functions in Microsoft Windows XP Service Pack 2 and newer.
9200 Wine uses this to enable Windows apps to hook the Win32 API
9201 functions provided by Wine. */
9202 push = emit_insn (gen_push (hard_frame_pointer_rtx));
9203 mov = emit_insn (gen_vswapmov (hard_frame_pointer_rtx,
9204 stack_pointer_rtx));
9205
9206 if (frame_pointer_needed && !(crtl->drap_reg
9207 && crtl->stack_realign_needed))
9208 {
9209 /* The push %ebp and movl.s %esp, %ebp already set up
9210 the frame pointer. No need to do this again. */
9211 gen_frame_pointer = 0;
9212 RTX_FRAME_RELATED_P (push) = 1;
9213 RTX_FRAME_RELATED_P (mov) = 1;
9214 if (ix86_cfa_state->reg == stack_pointer_rtx)
9215 ix86_cfa_state->reg = hard_frame_pointer_rtx;
9216 }
9217 else
9218 /* If the frame pointer is not needed, pop %ebp again. This
9219 could be optimized for cases where ebp needs to be backed up
9220 for some other reason. If stack realignment is needed, pop
9221 the base pointer again, align the stack, and later regenerate
9222 the frame pointer setup. The frame pointer generated by the
9223 hook prologue is not aligned, so it can't be used. */
9224 insn = emit_insn (ix86_gen_pop1 (hard_frame_pointer_rtx));
9225 }
9226
9227 /* The first insn of a function that accepts its static chain on the
9228 stack is to push the register that would be filled in by a direct
9229 call. This insn will be skipped by the trampoline. */
9230 if (ix86_static_chain_on_stack)
9231 {
9232 rtx t;
9233
9234 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9235 emit_insn (gen_blockage ());
9236
9237 /* We don't want to interpret this push insn as a register save,
9238 only as a stack adjustment. The real copy of the register as
9239 a save will be done later, if needed. */
9240 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9241 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9242 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9243 RTX_FRAME_RELATED_P (insn) = 1;
9244 }
9245
9246 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9247 of DRAP is needed and stack realignment is really needed after reload */
9248 if (crtl->drap_reg && crtl->stack_realign_needed)
9249 {
9250 rtx x, y;
9251 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9252 int param_ptr_offset = UNITS_PER_WORD;
9253
9254 if (ix86_static_chain_on_stack)
9255 param_ptr_offset += UNITS_PER_WORD;
9256 if (!call_used_regs[REGNO (crtl->drap_reg)])
9257 param_ptr_offset += UNITS_PER_WORD;
9258
9259 gcc_assert (stack_realign_drap);
9260
9261 /* Grab the argument pointer. */
9262 x = plus_constant (stack_pointer_rtx, param_ptr_offset);
9263 y = crtl->drap_reg;
9264
9265 /* Only need to push parameter pointer reg if it is caller
9266 saved reg */
9267 if (!call_used_regs[REGNO (crtl->drap_reg)])
9268 {
9269 /* Push arg pointer reg */
9270 insn = emit_insn (gen_push (y));
9271 RTX_FRAME_RELATED_P (insn) = 1;
9272 }
9273
9274 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
9275 RTX_FRAME_RELATED_P (insn) = 1;
9276 ix86_cfa_state->reg = crtl->drap_reg;
9277
9278 /* Align the stack. */
9279 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9280 stack_pointer_rtx,
9281 GEN_INT (-align_bytes)));
9282 RTX_FRAME_RELATED_P (insn) = 1;
9283
9284 /* Replicate the return address on the stack so that return
9285 address can be reached via (argp - 1) slot. This is needed
9286 to implement macro RETURN_ADDR_RTX and intrinsic function
9287 expand_builtin_return_addr etc. */
9288 x = crtl->drap_reg;
9289 x = gen_frame_mem (Pmode,
9290 plus_constant (x, -UNITS_PER_WORD));
9291 insn = emit_insn (gen_push (x));
9292 RTX_FRAME_RELATED_P (insn) = 1;
9293 }
9294
9295 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9296 slower on all targets. Also sdb doesn't like it. */
9297
9298 if (gen_frame_pointer)
9299 {
9300 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9301 RTX_FRAME_RELATED_P (insn) = 1;
9302
9303 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9304 RTX_FRAME_RELATED_P (insn) = 1;
9305
9306 if (ix86_cfa_state->reg == stack_pointer_rtx)
9307 ix86_cfa_state->reg = hard_frame_pointer_rtx;
9308 }
9309
9310 if (stack_realign_fp)
9311 {
9312 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9313 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9314
9315 /* Align the stack. */
9316 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9317 stack_pointer_rtx,
9318 GEN_INT (-align_bytes)));
9319 RTX_FRAME_RELATED_P (insn) = 1;
9320 }
9321
9322 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
9323
9324 if (!frame.save_regs_using_mov)
9325 ix86_emit_save_regs ();
9326 else
9327 allocate += frame.nregs * UNITS_PER_WORD;
9328
9329 /* The stack has already been decremented by the instruction calling us
9330 so we need to probe unconditionally to preserve the protection area. */
9331 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9332 {
9333 /* We expect the registers to be saved when probes are used. */
9334 gcc_assert (!frame.save_regs_using_mov);
9335
9336 if (STACK_CHECK_MOVING_SP)
9337 {
9338 ix86_adjust_stack_and_probe (allocate);
9339 allocate = 0;
9340 }
9341 else
9342 {
9343 HOST_WIDE_INT size = allocate;
9344
9345 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9346 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9347
9348 if (TARGET_STACK_PROBE)
9349 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9350 else
9351 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9352 }
9353 }
9354
9355 /* When using red zone we may start register saving before allocating
9356 the stack frame saving one cycle of the prologue. However I will
9357 avoid doing this if I am going to have to probe the stack since
9358 at least on x86_64 the stack probe can turn into a call that clobbers
9359 a red zone location */
9360 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
9361 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
9362 ix86_emit_save_regs_using_mov ((frame_pointer_needed
9363 && !crtl->stack_realign_needed)
9364 ? hard_frame_pointer_rtx
9365 : stack_pointer_rtx,
9366 -frame.nregs * UNITS_PER_WORD);
9367
9368 if (allocate == 0)
9369 ;
9370 else if (!ix86_target_stack_probe () || allocate < CHECK_STACK_LIMIT)
9371 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9372 GEN_INT (-allocate), -1,
9373 ix86_cfa_state->reg == stack_pointer_rtx);
9374 else
9375 {
9376 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9377 bool eax_live;
9378 rtx t;
9379
9380 if (cfun->machine->call_abi == MS_ABI)
9381 eax_live = false;
9382 else
9383 eax_live = ix86_eax_live_at_start_p ();
9384
9385 if (eax_live)
9386 {
9387 emit_insn (gen_push (eax));
9388 allocate -= UNITS_PER_WORD;
9389 }
9390
9391 emit_move_insn (eax, GEN_INT (allocate));
9392
9393 insn = emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9394
9395 if (ix86_cfa_state->reg == stack_pointer_rtx)
9396 {
9397 ix86_cfa_state->offset += allocate;
9398 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
9399 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9400 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9401 RTX_FRAME_RELATED_P (insn) = 1;
9402 }
9403
9404 if (eax_live)
9405 {
9406 if (frame_pointer_needed)
9407 t = plus_constant (hard_frame_pointer_rtx,
9408 allocate
9409 - frame.to_allocate
9410 - frame.nregs * UNITS_PER_WORD);
9411 else
9412 t = plus_constant (stack_pointer_rtx, allocate);
9413 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
9414 }
9415 }
9416
9417 if (frame.save_regs_using_mov
9418 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
9419 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
9420 {
9421 if (!frame_pointer_needed
9422 || !(frame.to_allocate + frame.padding0)
9423 || crtl->stack_realign_needed)
9424 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
9425 frame.to_allocate
9426 + frame.nsseregs * 16 + frame.padding0);
9427 else
9428 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
9429 -frame.nregs * UNITS_PER_WORD);
9430 }
9431 if (!frame_pointer_needed
9432 || !(frame.to_allocate + frame.padding0)
9433 || crtl->stack_realign_needed)
9434 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
9435 frame.to_allocate);
9436 else
9437 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
9438 - frame.nregs * UNITS_PER_WORD
9439 - frame.nsseregs * 16
9440 - frame.padding0);
9441
9442 pic_reg_used = false;
9443 if (pic_offset_table_rtx
9444 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9445 || crtl->profile))
9446 {
9447 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9448
9449 if (alt_pic_reg_used != INVALID_REGNUM)
9450 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9451
9452 pic_reg_used = true;
9453 }
9454
9455 if (pic_reg_used)
9456 {
9457 if (TARGET_64BIT)
9458 {
9459 if (ix86_cmodel == CM_LARGE_PIC)
9460 {
9461 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9462 rtx label = gen_label_rtx ();
9463 emit_label (label);
9464 LABEL_PRESERVE_P (label) = 1;
9465 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9466 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9467 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9468 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9469 pic_offset_table_rtx, tmp_reg));
9470 }
9471 else
9472 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9473 }
9474 else
9475 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9476 }
9477
9478 /* In the pic_reg_used case, make sure that the got load isn't deleted
9479 when mcount needs it. Blockage to avoid call movement across mcount
9480 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9481 note. */
9482 if (crtl->profile && pic_reg_used)
9483 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9484
9485 if (crtl->drap_reg && !crtl->stack_realign_needed)
9486 {
9487 /* vDRAP is setup but after reload it turns out stack realign
9488 isn't necessary, here we will emit prologue to setup DRAP
9489 without stack realign adjustment */
9490 rtx x;
9491 int drap_bp_offset = UNITS_PER_WORD * 2;
9492
9493 if (ix86_static_chain_on_stack)
9494 drap_bp_offset += UNITS_PER_WORD;
9495 x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
9496 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
9497 }
9498
9499 /* Prevent instructions from being scheduled into register save push
9500 sequence when access to the redzone area is done through frame pointer.
9501 The offset between the frame pointer and the stack pointer is calculated
9502 relative to the value of the stack pointer at the end of the function
9503 prologue, and moving instructions that access redzone area via frame
9504 pointer inside push sequence violates this assumption. */
9505 if (frame_pointer_needed && frame.red_zone_size)
9506 emit_insn (gen_memory_blockage ());
9507
9508 /* Emit cld instruction if stringops are used in the function. */
9509 if (TARGET_CLD && ix86_current_function_needs_cld)
9510 emit_insn (gen_cld ());
9511 }
9512
9513 /* Emit code to restore REG using a POP insn. */
9514
9515 static void
9516 ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
9517 {
9518 rtx insn = emit_insn (ix86_gen_pop1 (reg));
9519
9520 if (ix86_cfa_state->reg == crtl->drap_reg
9521 && REGNO (reg) == REGNO (crtl->drap_reg))
9522 {
9523 /* Previously we'd represented the CFA as an expression
9524 like *(%ebp - 8). We've just popped that value from
9525 the stack, which means we need to reset the CFA to
9526 the drap register. This will remain until we restore
9527 the stack pointer. */
9528 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9529 RTX_FRAME_RELATED_P (insn) = 1;
9530 return;
9531 }
9532
9533 if (ix86_cfa_state->reg == stack_pointer_rtx)
9534 {
9535 ix86_cfa_state->offset -= UNITS_PER_WORD;
9536 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9537 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
9538 RTX_FRAME_RELATED_P (insn) = 1;
9539 }
9540
9541 /* When the frame pointer is the CFA, and we pop it, we are
9542 swapping back to the stack pointer as the CFA. This happens
9543 for stack frames that don't allocate other data, so we assume
9544 the stack pointer is now pointing at the return address, i.e.
9545 the function entry state, which makes the offset be 1 word. */
9546 else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
9547 && reg == hard_frame_pointer_rtx)
9548 {
9549 ix86_cfa_state->reg = stack_pointer_rtx;
9550 ix86_cfa_state->offset -= UNITS_PER_WORD;
9551
9552 add_reg_note (insn, REG_CFA_DEF_CFA,
9553 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9554 GEN_INT (ix86_cfa_state->offset)));
9555 RTX_FRAME_RELATED_P (insn) = 1;
9556 }
9557
9558 ix86_add_cfa_restore_note (insn, reg, red_offset);
9559 }
9560
9561 /* Emit code to restore saved registers using POP insns. */
9562
9563 static void
9564 ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
9565 {
9566 int regno;
9567
9568 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9569 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9570 {
9571 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
9572 red_offset);
9573 red_offset += UNITS_PER_WORD;
9574 }
9575 }
9576
9577 /* Emit code and notes for the LEAVE instruction. */
9578
9579 static void
9580 ix86_emit_leave (HOST_WIDE_INT red_offset)
9581 {
9582 rtx insn = emit_insn (ix86_gen_leave ());
9583
9584 ix86_add_queued_cfa_restore_notes (insn);
9585
9586 if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
9587 {
9588 ix86_cfa_state->reg = stack_pointer_rtx;
9589 ix86_cfa_state->offset -= UNITS_PER_WORD;
9590
9591 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9592 copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
9593 RTX_FRAME_RELATED_P (insn) = 1;
9594 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
9595 }
9596 }
9597
9598 /* Emit code to restore saved registers using MOV insns. First register
9599 is restored from POINTER + OFFSET. */
9600 static void
9601 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
9602 HOST_WIDE_INT red_offset,
9603 int maybe_eh_return)
9604 {
9605 unsigned int regno;
9606 rtx base_address = gen_rtx_MEM (Pmode, pointer);
9607 rtx insn;
9608
9609 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9610 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9611 {
9612 rtx reg = gen_rtx_REG (Pmode, regno);
9613
9614 /* Ensure that adjust_address won't be forced to produce pointer
9615 out of range allowed by x86-64 instruction set. */
9616 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
9617 {
9618 rtx r11;
9619
9620 r11 = gen_rtx_REG (DImode, R11_REG);
9621 emit_move_insn (r11, GEN_INT (offset));
9622 emit_insn (gen_adddi3 (r11, r11, pointer));
9623 base_address = gen_rtx_MEM (Pmode, r11);
9624 offset = 0;
9625 }
9626 insn = emit_move_insn (reg,
9627 adjust_address (base_address, Pmode, offset));
9628 offset += UNITS_PER_WORD;
9629
9630 if (ix86_cfa_state->reg == crtl->drap_reg
9631 && regno == REGNO (crtl->drap_reg))
9632 {
9633 /* Previously we'd represented the CFA as an expression
9634 like *(%ebp - 8). We've just popped that value from
9635 the stack, which means we need to reset the CFA to
9636 the drap register. This will remain until we restore
9637 the stack pointer. */
9638 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9639 RTX_FRAME_RELATED_P (insn) = 1;
9640 }
9641 else
9642 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
9643
9644 red_offset += UNITS_PER_WORD;
9645 }
9646 }
9647
9648 /* Emit code to restore saved registers using MOV insns. First register
9649 is restored from POINTER + OFFSET. */
9650 static void
9651 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
9652 HOST_WIDE_INT red_offset,
9653 int maybe_eh_return)
9654 {
9655 int regno;
9656 rtx base_address = gen_rtx_MEM (TImode, pointer);
9657 rtx mem;
9658
9659 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9660 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9661 {
9662 rtx reg = gen_rtx_REG (TImode, regno);
9663
9664 /* Ensure that adjust_address won't be forced to produce pointer
9665 out of range allowed by x86-64 instruction set. */
9666 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
9667 {
9668 rtx r11;
9669
9670 r11 = gen_rtx_REG (DImode, R11_REG);
9671 emit_move_insn (r11, GEN_INT (offset));
9672 emit_insn (gen_adddi3 (r11, r11, pointer));
9673 base_address = gen_rtx_MEM (TImode, r11);
9674 offset = 0;
9675 }
9676 mem = adjust_address (base_address, TImode, offset);
9677 set_mem_align (mem, 128);
9678 emit_move_insn (reg, mem);
9679 offset += 16;
9680
9681 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
9682
9683 red_offset += 16;
9684 }
9685 }
9686
9687 /* Restore function stack, frame, and registers. */
9688
9689 void
9690 ix86_expand_epilogue (int style)
9691 {
9692 int sp_valid;
9693 struct ix86_frame frame;
9694 HOST_WIDE_INT offset, red_offset;
9695 struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
9696 bool using_drap;
9697
9698 ix86_finalize_stack_realign_flags ();
9699
9700 /* When stack is realigned, SP must be valid. */
9701 sp_valid = (!frame_pointer_needed
9702 || current_function_sp_is_unchanging
9703 || stack_realign_fp);
9704
9705 ix86_compute_frame_layout (&frame);
9706
9707 /* See the comment about red zone and frame
9708 pointer usage in ix86_expand_prologue. */
9709 if (frame_pointer_needed && frame.red_zone_size)
9710 emit_insn (gen_memory_blockage ());
9711
9712 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9713 gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
9714
9715 /* Calculate start of saved registers relative to ebp. Special care
9716 must be taken for the normal return case of a function using
9717 eh_return: the eax and edx registers are marked as saved, but not
9718 restored along this path. */
9719 offset = frame.nregs;
9720 if (crtl->calls_eh_return && style != 2)
9721 offset -= 2;
9722 offset *= -UNITS_PER_WORD;
9723 offset -= frame.nsseregs * 16 + frame.padding0;
9724
9725 /* Calculate start of saved registers relative to esp on entry of the
9726 function. When realigning stack, this needs to be the most negative
9727 value possible at runtime. */
9728 red_offset = offset;
9729 if (using_drap)
9730 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
9731 + UNITS_PER_WORD;
9732 else if (stack_realign_fp)
9733 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
9734 - UNITS_PER_WORD;
9735 if (ix86_static_chain_on_stack)
9736 red_offset -= UNITS_PER_WORD;
9737 if (frame_pointer_needed)
9738 red_offset -= UNITS_PER_WORD;
9739
9740 /* If we're only restoring one register and sp is not valid then
9741 using a move instruction to restore the register since it's
9742 less work than reloading sp and popping the register.
9743
9744 The default code result in stack adjustment using add/lea instruction,
9745 while this code results in LEAVE instruction (or discrete equivalent),
9746 so it is profitable in some other cases as well. Especially when there
9747 are no registers to restore. We also use this code when TARGET_USE_LEAVE
9748 and there is exactly one register to pop. This heuristic may need some
9749 tuning in future. */
9750 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
9751 || (TARGET_EPILOGUE_USING_MOVE
9752 && cfun->machine->use_fast_prologue_epilogue
9753 && ((frame.nregs + frame.nsseregs) > 1
9754 || (frame.to_allocate + frame.padding0) != 0))
9755 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
9756 && (frame.to_allocate + frame.padding0) != 0)
9757 || (frame_pointer_needed && TARGET_USE_LEAVE
9758 && cfun->machine->use_fast_prologue_epilogue
9759 && (frame.nregs + frame.nsseregs) == 1)
9760 || crtl->calls_eh_return)
9761 {
9762 /* Restore registers. We can use ebp or esp to address the memory
9763 locations. If both are available, default to ebp, since offsets
9764 are known to be small. Only exception is esp pointing directly
9765 to the end of block of saved registers, where we may simplify
9766 addressing mode.
9767
9768 If we are realigning stack with bp and sp, regs restore can't
9769 be addressed by bp. sp must be used instead. */
9770
9771 if (!frame_pointer_needed
9772 || (sp_valid && !(frame.to_allocate + frame.padding0))
9773 || stack_realign_fp)
9774 {
9775 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9776 frame.to_allocate, red_offset,
9777 style == 2);
9778 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
9779 frame.to_allocate
9780 + frame.nsseregs * 16
9781 + frame.padding0,
9782 red_offset
9783 + frame.nsseregs * 16
9784 + frame.padding0, style == 2);
9785 }
9786 else
9787 {
9788 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
9789 offset, red_offset,
9790 style == 2);
9791 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
9792 offset
9793 + frame.nsseregs * 16
9794 + frame.padding0,
9795 red_offset
9796 + frame.nsseregs * 16
9797 + frame.padding0, style == 2);
9798 }
9799
9800 red_offset -= offset;
9801
9802 /* eh_return epilogues need %ecx added to the stack pointer. */
9803 if (style == 2)
9804 {
9805 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
9806
9807 /* Stack align doesn't work with eh_return. */
9808 gcc_assert (!crtl->stack_realign_needed);
9809 /* Neither does regparm nested functions. */
9810 gcc_assert (!ix86_static_chain_on_stack);
9811
9812 if (frame_pointer_needed)
9813 {
9814 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9815 tmp = plus_constant (tmp, UNITS_PER_WORD);
9816 tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
9817
9818 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
9819 tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
9820
9821 /* Note that we use SA as a temporary CFA, as the return
9822 address is at the proper place relative to it. We
9823 pretend this happens at the FP restore insn because
9824 prior to this insn the FP would be stored at the wrong
9825 offset relative to SA, and after this insn we have no
9826 other reasonable register to use for the CFA. We don't
9827 bother resetting the CFA to the SP for the duration of
9828 the return insn. */
9829 add_reg_note (tmp, REG_CFA_DEF_CFA,
9830 plus_constant (sa, UNITS_PER_WORD));
9831 ix86_add_queued_cfa_restore_notes (tmp);
9832 add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
9833 RTX_FRAME_RELATED_P (tmp) = 1;
9834 ix86_cfa_state->reg = sa;
9835 ix86_cfa_state->offset = UNITS_PER_WORD;
9836
9837 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9838 const0_rtx, style, false);
9839 }
9840 else
9841 {
9842 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9843 tmp = plus_constant (tmp, (frame.to_allocate
9844 + frame.nregs * UNITS_PER_WORD
9845 + frame.nsseregs * 16
9846 + frame.padding0));
9847 tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
9848 ix86_add_queued_cfa_restore_notes (tmp);
9849
9850 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
9851 if (ix86_cfa_state->offset != UNITS_PER_WORD)
9852 {
9853 ix86_cfa_state->offset = UNITS_PER_WORD;
9854 add_reg_note (tmp, REG_CFA_DEF_CFA,
9855 plus_constant (stack_pointer_rtx,
9856 UNITS_PER_WORD));
9857 RTX_FRAME_RELATED_P (tmp) = 1;
9858 }
9859 }
9860 }
9861 else if (!frame_pointer_needed)
9862 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9863 GEN_INT (frame.to_allocate
9864 + frame.nregs * UNITS_PER_WORD
9865 + frame.nsseregs * 16
9866 + frame.padding0),
9867 style, !using_drap);
9868 /* If not an i386, mov & pop is faster than "leave". */
9869 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
9870 || !cfun->machine->use_fast_prologue_epilogue)
9871 ix86_emit_leave (red_offset);
9872 else
9873 {
9874 pro_epilogue_adjust_stack (stack_pointer_rtx,
9875 hard_frame_pointer_rtx,
9876 const0_rtx, style, !using_drap);
9877
9878 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
9879 }
9880 }
9881 else
9882 {
9883 /* First step is to deallocate the stack frame so that we can
9884 pop the registers.
9885
9886 If we realign stack with frame pointer, then stack pointer
9887 won't be able to recover via lea $offset(%bp), %sp, because
9888 there is a padding area between bp and sp for realign.
9889 "add $to_allocate, %sp" must be used instead. */
9890 if (!sp_valid)
9891 {
9892 gcc_assert (frame_pointer_needed);
9893 gcc_assert (!stack_realign_fp);
9894 pro_epilogue_adjust_stack (stack_pointer_rtx,
9895 hard_frame_pointer_rtx,
9896 GEN_INT (offset), style, false);
9897 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9898 0, red_offset,
9899 style == 2);
9900 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9901 GEN_INT (frame.nsseregs * 16
9902 + frame.padding0),
9903 style, false);
9904 }
9905 else if (frame.to_allocate || frame.padding0 || frame.nsseregs)
9906 {
9907 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9908 frame.to_allocate, red_offset,
9909 style == 2);
9910 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9911 GEN_INT (frame.to_allocate
9912 + frame.nsseregs * 16
9913 + frame.padding0), style,
9914 !using_drap && !frame_pointer_needed);
9915 }
9916
9917 ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
9918 + frame.padding0);
9919 red_offset -= offset;
9920
9921 if (frame_pointer_needed)
9922 {
9923 /* Leave results in shorter dependency chains on CPUs that are
9924 able to grok it fast. */
9925 if (TARGET_USE_LEAVE)
9926 ix86_emit_leave (red_offset);
9927 else
9928 {
9929 /* For stack realigned really happens, recover stack
9930 pointer to hard frame pointer is a must, if not using
9931 leave. */
9932 if (stack_realign_fp)
9933 pro_epilogue_adjust_stack (stack_pointer_rtx,
9934 hard_frame_pointer_rtx,
9935 const0_rtx, style, !using_drap);
9936 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
9937 red_offset);
9938 }
9939 }
9940 }
9941
9942 if (using_drap)
9943 {
9944 int param_ptr_offset = UNITS_PER_WORD;
9945 rtx insn;
9946
9947 gcc_assert (stack_realign_drap);
9948
9949 if (ix86_static_chain_on_stack)
9950 param_ptr_offset += UNITS_PER_WORD;
9951 if (!call_used_regs[REGNO (crtl->drap_reg)])
9952 param_ptr_offset += UNITS_PER_WORD;
9953
9954 insn = emit_insn (gen_rtx_SET
9955 (VOIDmode, stack_pointer_rtx,
9956 gen_rtx_PLUS (Pmode,
9957 crtl->drap_reg,
9958 GEN_INT (-param_ptr_offset))));
9959 ix86_cfa_state->reg = stack_pointer_rtx;
9960 ix86_cfa_state->offset = param_ptr_offset;
9961
9962 add_reg_note (insn, REG_CFA_DEF_CFA,
9963 gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
9964 GEN_INT (ix86_cfa_state->offset)));
9965 RTX_FRAME_RELATED_P (insn) = 1;
9966
9967 if (!call_used_regs[REGNO (crtl->drap_reg)])
9968 ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
9969 }
9970
9971 /* Remove the saved static chain from the stack. The use of ECX is
9972 merely as a scratch register, not as the actual static chain. */
9973 if (ix86_static_chain_on_stack)
9974 {
9975 rtx r, insn;
9976
9977 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
9978 ix86_cfa_state->offset += UNITS_PER_WORD;
9979
9980 r = gen_rtx_REG (Pmode, CX_REG);
9981 insn = emit_insn (ix86_gen_pop1 (r));
9982
9983 r = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9984 r = gen_rtx_SET (VOIDmode, stack_pointer_rtx, r);
9985 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9986 RTX_FRAME_RELATED_P (insn) = 1;
9987 }
9988
9989 /* Sibcall epilogues don't want a return instruction. */
9990 if (style == 0)
9991 {
9992 *ix86_cfa_state = cfa_state_save;
9993 return;
9994 }
9995
9996 if (crtl->args.pops_args && crtl->args.size)
9997 {
9998 rtx popc = GEN_INT (crtl->args.pops_args);
9999
10000 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10001 address, do explicit add, and jump indirectly to the caller. */
10002
10003 if (crtl->args.pops_args >= 65536)
10004 {
10005 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10006 rtx insn;
10007
10008 /* There is no "pascal" calling convention in any 64bit ABI. */
10009 gcc_assert (!TARGET_64BIT);
10010
10011 insn = emit_insn (gen_popsi1 (ecx));
10012 ix86_cfa_state->offset -= UNITS_PER_WORD;
10013
10014 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10015 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10016 add_reg_note (insn, REG_CFA_REGISTER,
10017 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10018 RTX_FRAME_RELATED_P (insn) = 1;
10019
10020 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10021 popc, -1, true);
10022 emit_jump_insn (gen_return_indirect_internal (ecx));
10023 }
10024 else
10025 emit_jump_insn (gen_return_pop_internal (popc));
10026 }
10027 else
10028 emit_jump_insn (gen_return_internal ());
10029
10030 /* Restore the state back to the state from the prologue,
10031 so that it's correct for the next epilogue. */
10032 *ix86_cfa_state = cfa_state_save;
10033 }
10034
10035 /* Reset from the function's potential modifications. */
10036
10037 static void
10038 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10039 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10040 {
10041 if (pic_offset_table_rtx)
10042 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10043 #if TARGET_MACHO
10044 /* Mach-O doesn't support labels at the end of objects, so if
10045 it looks like we might want one, insert a NOP. */
10046 {
10047 rtx insn = get_last_insn ();
10048 while (insn
10049 && NOTE_P (insn)
10050 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10051 insn = PREV_INSN (insn);
10052 if (insn
10053 && (LABEL_P (insn)
10054 || (NOTE_P (insn)
10055 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10056 fputs ("\tnop\n", file);
10057 }
10058 #endif
10059
10060 }
10061 \f
10062 /* Extract the parts of an RTL expression that is a valid memory address
10063 for an instruction. Return 0 if the structure of the address is
10064 grossly off. Return -1 if the address contains ASHIFT, so it is not
10065 strictly valid, but still used for computing length of lea instruction. */
10066
10067 int
10068 ix86_decompose_address (rtx addr, struct ix86_address *out)
10069 {
10070 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10071 rtx base_reg, index_reg;
10072 HOST_WIDE_INT scale = 1;
10073 rtx scale_rtx = NULL_RTX;
10074 rtx tmp;
10075 int retval = 1;
10076 enum ix86_address_seg seg = SEG_DEFAULT;
10077
10078 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10079 base = addr;
10080 else if (GET_CODE (addr) == PLUS)
10081 {
10082 rtx addends[4], op;
10083 int n = 0, i;
10084
10085 op = addr;
10086 do
10087 {
10088 if (n >= 4)
10089 return 0;
10090 addends[n++] = XEXP (op, 1);
10091 op = XEXP (op, 0);
10092 }
10093 while (GET_CODE (op) == PLUS);
10094 if (n >= 4)
10095 return 0;
10096 addends[n] = op;
10097
10098 for (i = n; i >= 0; --i)
10099 {
10100 op = addends[i];
10101 switch (GET_CODE (op))
10102 {
10103 case MULT:
10104 if (index)
10105 return 0;
10106 index = XEXP (op, 0);
10107 scale_rtx = XEXP (op, 1);
10108 break;
10109
10110 case ASHIFT:
10111 if (index)
10112 return 0;
10113 index = XEXP (op, 0);
10114 tmp = XEXP (op, 1);
10115 if (!CONST_INT_P (tmp))
10116 return 0;
10117 scale = INTVAL (tmp);
10118 if ((unsigned HOST_WIDE_INT) scale > 3)
10119 return 0;
10120 scale = 1 << scale;
10121 break;
10122
10123 case UNSPEC:
10124 if (XINT (op, 1) == UNSPEC_TP
10125 && TARGET_TLS_DIRECT_SEG_REFS
10126 && seg == SEG_DEFAULT)
10127 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10128 else
10129 return 0;
10130 break;
10131
10132 case REG:
10133 case SUBREG:
10134 if (!base)
10135 base = op;
10136 else if (!index)
10137 index = op;
10138 else
10139 return 0;
10140 break;
10141
10142 case CONST:
10143 case CONST_INT:
10144 case SYMBOL_REF:
10145 case LABEL_REF:
10146 if (disp)
10147 return 0;
10148 disp = op;
10149 break;
10150
10151 default:
10152 return 0;
10153 }
10154 }
10155 }
10156 else if (GET_CODE (addr) == MULT)
10157 {
10158 index = XEXP (addr, 0); /* index*scale */
10159 scale_rtx = XEXP (addr, 1);
10160 }
10161 else if (GET_CODE (addr) == ASHIFT)
10162 {
10163 /* We're called for lea too, which implements ashift on occasion. */
10164 index = XEXP (addr, 0);
10165 tmp = XEXP (addr, 1);
10166 if (!CONST_INT_P (tmp))
10167 return 0;
10168 scale = INTVAL (tmp);
10169 if ((unsigned HOST_WIDE_INT) scale > 3)
10170 return 0;
10171 scale = 1 << scale;
10172 retval = -1;
10173 }
10174 else
10175 disp = addr; /* displacement */
10176
10177 /* Extract the integral value of scale. */
10178 if (scale_rtx)
10179 {
10180 if (!CONST_INT_P (scale_rtx))
10181 return 0;
10182 scale = INTVAL (scale_rtx);
10183 }
10184
10185 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10186 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10187
10188 /* Avoid useless 0 displacement. */
10189 if (disp == const0_rtx && (base || index))
10190 disp = NULL_RTX;
10191
10192 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10193 if (base_reg && index_reg && scale == 1
10194 && (index_reg == arg_pointer_rtx
10195 || index_reg == frame_pointer_rtx
10196 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10197 {
10198 rtx tmp;
10199 tmp = base, base = index, index = tmp;
10200 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10201 }
10202
10203 /* Special case: %ebp cannot be encoded as a base without a displacement.
10204 Similarly %r13. */
10205 if (!disp
10206 && base_reg
10207 && (base_reg == hard_frame_pointer_rtx
10208 || base_reg == frame_pointer_rtx
10209 || base_reg == arg_pointer_rtx
10210 || (REG_P (base_reg)
10211 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10212 || REGNO (base_reg) == R13_REG))))
10213 disp = const0_rtx;
10214
10215 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10216 Avoid this by transforming to [%esi+0].
10217 Reload calls address legitimization without cfun defined, so we need
10218 to test cfun for being non-NULL. */
10219 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10220 && base_reg && !index_reg && !disp
10221 && REG_P (base_reg)
10222 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
10223 disp = const0_rtx;
10224
10225 /* Special case: encode reg+reg instead of reg*2. */
10226 if (!base && index && scale == 2)
10227 base = index, base_reg = index_reg, scale = 1;
10228
10229 /* Special case: scaling cannot be encoded without base or displacement. */
10230 if (!base && !disp && index && scale != 1)
10231 disp = const0_rtx;
10232
10233 out->base = base;
10234 out->index = index;
10235 out->disp = disp;
10236 out->scale = scale;
10237 out->seg = seg;
10238
10239 return retval;
10240 }
10241 \f
10242 /* Return cost of the memory address x.
10243 For i386, it is better to use a complex address than let gcc copy
10244 the address into a reg and make a new pseudo. But not if the address
10245 requires to two regs - that would mean more pseudos with longer
10246 lifetimes. */
10247 static int
10248 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10249 {
10250 struct ix86_address parts;
10251 int cost = 1;
10252 int ok = ix86_decompose_address (x, &parts);
10253
10254 gcc_assert (ok);
10255
10256 if (parts.base && GET_CODE (parts.base) == SUBREG)
10257 parts.base = SUBREG_REG (parts.base);
10258 if (parts.index && GET_CODE (parts.index) == SUBREG)
10259 parts.index = SUBREG_REG (parts.index);
10260
10261 /* Attempt to minimize number of registers in the address. */
10262 if ((parts.base
10263 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10264 || (parts.index
10265 && (!REG_P (parts.index)
10266 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10267 cost++;
10268
10269 if (parts.base
10270 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10271 && parts.index
10272 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10273 && parts.base != parts.index)
10274 cost++;
10275
10276 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10277 since it's predecode logic can't detect the length of instructions
10278 and it degenerates to vector decoded. Increase cost of such
10279 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10280 to split such addresses or even refuse such addresses at all.
10281
10282 Following addressing modes are affected:
10283 [base+scale*index]
10284 [scale*index+disp]
10285 [base+index]
10286
10287 The first and last case may be avoidable by explicitly coding the zero in
10288 memory address, but I don't have AMD-K6 machine handy to check this
10289 theory. */
10290
10291 if (TARGET_K6
10292 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10293 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10294 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10295 cost += 10;
10296
10297 return cost;
10298 }
10299 \f
10300 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10301 this is used for to form addresses to local data when -fPIC is in
10302 use. */
10303
10304 static bool
10305 darwin_local_data_pic (rtx disp)
10306 {
10307 return (GET_CODE (disp) == UNSPEC
10308 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10309 }
10310
10311 /* Determine if a given RTX is a valid constant. We already know this
10312 satisfies CONSTANT_P. */
10313
10314 bool
10315 legitimate_constant_p (rtx x)
10316 {
10317 switch (GET_CODE (x))
10318 {
10319 case CONST:
10320 x = XEXP (x, 0);
10321
10322 if (GET_CODE (x) == PLUS)
10323 {
10324 if (!CONST_INT_P (XEXP (x, 1)))
10325 return false;
10326 x = XEXP (x, 0);
10327 }
10328
10329 if (TARGET_MACHO && darwin_local_data_pic (x))
10330 return true;
10331
10332 /* Only some unspecs are valid as "constants". */
10333 if (GET_CODE (x) == UNSPEC)
10334 switch (XINT (x, 1))
10335 {
10336 case UNSPEC_GOT:
10337 case UNSPEC_GOTOFF:
10338 case UNSPEC_PLTOFF:
10339 return TARGET_64BIT;
10340 case UNSPEC_TPOFF:
10341 case UNSPEC_NTPOFF:
10342 x = XVECEXP (x, 0, 0);
10343 return (GET_CODE (x) == SYMBOL_REF
10344 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10345 case UNSPEC_DTPOFF:
10346 x = XVECEXP (x, 0, 0);
10347 return (GET_CODE (x) == SYMBOL_REF
10348 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10349 default:
10350 return false;
10351 }
10352
10353 /* We must have drilled down to a symbol. */
10354 if (GET_CODE (x) == LABEL_REF)
10355 return true;
10356 if (GET_CODE (x) != SYMBOL_REF)
10357 return false;
10358 /* FALLTHRU */
10359
10360 case SYMBOL_REF:
10361 /* TLS symbols are never valid. */
10362 if (SYMBOL_REF_TLS_MODEL (x))
10363 return false;
10364
10365 /* DLLIMPORT symbols are never valid. */
10366 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10367 && SYMBOL_REF_DLLIMPORT_P (x))
10368 return false;
10369 break;
10370
10371 case CONST_DOUBLE:
10372 if (GET_MODE (x) == TImode
10373 && x != CONST0_RTX (TImode)
10374 && !TARGET_64BIT)
10375 return false;
10376 break;
10377
10378 case CONST_VECTOR:
10379 if (!standard_sse_constant_p (x))
10380 return false;
10381
10382 default:
10383 break;
10384 }
10385
10386 /* Otherwise we handle everything else in the move patterns. */
10387 return true;
10388 }
10389
10390 /* Determine if it's legal to put X into the constant pool. This
10391 is not possible for the address of thread-local symbols, which
10392 is checked above. */
10393
10394 static bool
10395 ix86_cannot_force_const_mem (rtx x)
10396 {
10397 /* We can always put integral constants and vectors in memory. */
10398 switch (GET_CODE (x))
10399 {
10400 case CONST_INT:
10401 case CONST_DOUBLE:
10402 case CONST_VECTOR:
10403 return false;
10404
10405 default:
10406 break;
10407 }
10408 return !legitimate_constant_p (x);
10409 }
10410
10411
10412 /* Nonzero if the constant value X is a legitimate general operand
10413 when generating PIC code. It is given that flag_pic is on and
10414 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
10415
10416 bool
10417 legitimate_pic_operand_p (rtx x)
10418 {
10419 rtx inner;
10420
10421 switch (GET_CODE (x))
10422 {
10423 case CONST:
10424 inner = XEXP (x, 0);
10425 if (GET_CODE (inner) == PLUS
10426 && CONST_INT_P (XEXP (inner, 1)))
10427 inner = XEXP (inner, 0);
10428
10429 /* Only some unspecs are valid as "constants". */
10430 if (GET_CODE (inner) == UNSPEC)
10431 switch (XINT (inner, 1))
10432 {
10433 case UNSPEC_GOT:
10434 case UNSPEC_GOTOFF:
10435 case UNSPEC_PLTOFF:
10436 return TARGET_64BIT;
10437 case UNSPEC_TPOFF:
10438 x = XVECEXP (inner, 0, 0);
10439 return (GET_CODE (x) == SYMBOL_REF
10440 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10441 case UNSPEC_MACHOPIC_OFFSET:
10442 return legitimate_pic_address_disp_p (x);
10443 default:
10444 return false;
10445 }
10446 /* FALLTHRU */
10447
10448 case SYMBOL_REF:
10449 case LABEL_REF:
10450 return legitimate_pic_address_disp_p (x);
10451
10452 default:
10453 return true;
10454 }
10455 }
10456
10457 /* Determine if a given CONST RTX is a valid memory displacement
10458 in PIC mode. */
10459
10460 int
10461 legitimate_pic_address_disp_p (rtx disp)
10462 {
10463 bool saw_plus;
10464
10465 /* In 64bit mode we can allow direct addresses of symbols and labels
10466 when they are not dynamic symbols. */
10467 if (TARGET_64BIT)
10468 {
10469 rtx op0 = disp, op1;
10470
10471 switch (GET_CODE (disp))
10472 {
10473 case LABEL_REF:
10474 return true;
10475
10476 case CONST:
10477 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10478 break;
10479 op0 = XEXP (XEXP (disp, 0), 0);
10480 op1 = XEXP (XEXP (disp, 0), 1);
10481 if (!CONST_INT_P (op1)
10482 || INTVAL (op1) >= 16*1024*1024
10483 || INTVAL (op1) < -16*1024*1024)
10484 break;
10485 if (GET_CODE (op0) == LABEL_REF)
10486 return true;
10487 if (GET_CODE (op0) != SYMBOL_REF)
10488 break;
10489 /* FALLTHRU */
10490
10491 case SYMBOL_REF:
10492 /* TLS references should always be enclosed in UNSPEC. */
10493 if (SYMBOL_REF_TLS_MODEL (op0))
10494 return false;
10495 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
10496 && ix86_cmodel != CM_LARGE_PIC)
10497 return true;
10498 break;
10499
10500 default:
10501 break;
10502 }
10503 }
10504 if (GET_CODE (disp) != CONST)
10505 return 0;
10506 disp = XEXP (disp, 0);
10507
10508 if (TARGET_64BIT)
10509 {
10510 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10511 of GOT tables. We should not need these anyway. */
10512 if (GET_CODE (disp) != UNSPEC
10513 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10514 && XINT (disp, 1) != UNSPEC_GOTOFF
10515 && XINT (disp, 1) != UNSPEC_PLTOFF))
10516 return 0;
10517
10518 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10519 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10520 return 0;
10521 return 1;
10522 }
10523
10524 saw_plus = false;
10525 if (GET_CODE (disp) == PLUS)
10526 {
10527 if (!CONST_INT_P (XEXP (disp, 1)))
10528 return 0;
10529 disp = XEXP (disp, 0);
10530 saw_plus = true;
10531 }
10532
10533 if (TARGET_MACHO && darwin_local_data_pic (disp))
10534 return 1;
10535
10536 if (GET_CODE (disp) != UNSPEC)
10537 return 0;
10538
10539 switch (XINT (disp, 1))
10540 {
10541 case UNSPEC_GOT:
10542 if (saw_plus)
10543 return false;
10544 /* We need to check for both symbols and labels because VxWorks loads
10545 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10546 details. */
10547 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10548 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10549 case UNSPEC_GOTOFF:
10550 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10551 While ABI specify also 32bit relocation but we don't produce it in
10552 small PIC model at all. */
10553 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10554 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10555 && !TARGET_64BIT)
10556 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10557 return false;
10558 case UNSPEC_GOTTPOFF:
10559 case UNSPEC_GOTNTPOFF:
10560 case UNSPEC_INDNTPOFF:
10561 if (saw_plus)
10562 return false;
10563 disp = XVECEXP (disp, 0, 0);
10564 return (GET_CODE (disp) == SYMBOL_REF
10565 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10566 case UNSPEC_NTPOFF:
10567 disp = XVECEXP (disp, 0, 0);
10568 return (GET_CODE (disp) == SYMBOL_REF
10569 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10570 case UNSPEC_DTPOFF:
10571 disp = XVECEXP (disp, 0, 0);
10572 return (GET_CODE (disp) == SYMBOL_REF
10573 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10574 }
10575
10576 return 0;
10577 }
10578
10579 /* Recognizes RTL expressions that are valid memory addresses for an
10580 instruction. The MODE argument is the machine mode for the MEM
10581 expression that wants to use this address.
10582
10583 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10584 convert common non-canonical forms to canonical form so that they will
10585 be recognized. */
10586
10587 static bool
10588 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
10589 rtx addr, bool strict)
10590 {
10591 struct ix86_address parts;
10592 rtx base, index, disp;
10593 HOST_WIDE_INT scale;
10594
10595 if (ix86_decompose_address (addr, &parts) <= 0)
10596 /* Decomposition failed. */
10597 return false;
10598
10599 base = parts.base;
10600 index = parts.index;
10601 disp = parts.disp;
10602 scale = parts.scale;
10603
10604 /* Validate base register.
10605
10606 Don't allow SUBREG's that span more than a word here. It can lead to spill
10607 failures when the base is one word out of a two word structure, which is
10608 represented internally as a DImode int. */
10609
10610 if (base)
10611 {
10612 rtx reg;
10613
10614 if (REG_P (base))
10615 reg = base;
10616 else if (GET_CODE (base) == SUBREG
10617 && REG_P (SUBREG_REG (base))
10618 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
10619 <= UNITS_PER_WORD)
10620 reg = SUBREG_REG (base);
10621 else
10622 /* Base is not a register. */
10623 return false;
10624
10625 if (GET_MODE (base) != Pmode)
10626 /* Base is not in Pmode. */
10627 return false;
10628
10629 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10630 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10631 /* Base is not valid. */
10632 return false;
10633 }
10634
10635 /* Validate index register.
10636
10637 Don't allow SUBREG's that span more than a word here -- same as above. */
10638
10639 if (index)
10640 {
10641 rtx reg;
10642
10643 if (REG_P (index))
10644 reg = index;
10645 else if (GET_CODE (index) == SUBREG
10646 && REG_P (SUBREG_REG (index))
10647 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
10648 <= UNITS_PER_WORD)
10649 reg = SUBREG_REG (index);
10650 else
10651 /* Index is not a register. */
10652 return false;
10653
10654 if (GET_MODE (index) != Pmode)
10655 /* Index is not in Pmode. */
10656 return false;
10657
10658 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10659 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10660 /* Index is not valid. */
10661 return false;
10662 }
10663
10664 /* Validate scale factor. */
10665 if (scale != 1)
10666 {
10667 if (!index)
10668 /* Scale without index. */
10669 return false;
10670
10671 if (scale != 2 && scale != 4 && scale != 8)
10672 /* Scale is not a valid multiplier. */
10673 return false;
10674 }
10675
10676 /* Validate displacement. */
10677 if (disp)
10678 {
10679 if (GET_CODE (disp) == CONST
10680 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10681 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10682 switch (XINT (XEXP (disp, 0), 1))
10683 {
10684 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
10685 used. While ABI specify also 32bit relocations, we don't produce
10686 them at all and use IP relative instead. */
10687 case UNSPEC_GOT:
10688 case UNSPEC_GOTOFF:
10689 gcc_assert (flag_pic);
10690 if (!TARGET_64BIT)
10691 goto is_legitimate_pic;
10692
10693 /* 64bit address unspec. */
10694 return false;
10695
10696 case UNSPEC_GOTPCREL:
10697 gcc_assert (flag_pic);
10698 goto is_legitimate_pic;
10699
10700 case UNSPEC_GOTTPOFF:
10701 case UNSPEC_GOTNTPOFF:
10702 case UNSPEC_INDNTPOFF:
10703 case UNSPEC_NTPOFF:
10704 case UNSPEC_DTPOFF:
10705 break;
10706
10707 default:
10708 /* Invalid address unspec. */
10709 return false;
10710 }
10711
10712 else if (SYMBOLIC_CONST (disp)
10713 && (flag_pic
10714 || (TARGET_MACHO
10715 #if TARGET_MACHO
10716 && MACHOPIC_INDIRECT
10717 && !machopic_operand_p (disp)
10718 #endif
10719 )))
10720 {
10721
10722 is_legitimate_pic:
10723 if (TARGET_64BIT && (index || base))
10724 {
10725 /* foo@dtpoff(%rX) is ok. */
10726 if (GET_CODE (disp) != CONST
10727 || GET_CODE (XEXP (disp, 0)) != PLUS
10728 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10729 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10730 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10731 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10732 /* Non-constant pic memory reference. */
10733 return false;
10734 }
10735 else if (! legitimate_pic_address_disp_p (disp))
10736 /* Displacement is an invalid pic construct. */
10737 return false;
10738
10739 /* This code used to verify that a symbolic pic displacement
10740 includes the pic_offset_table_rtx register.
10741
10742 While this is good idea, unfortunately these constructs may
10743 be created by "adds using lea" optimization for incorrect
10744 code like:
10745
10746 int a;
10747 int foo(int i)
10748 {
10749 return *(&a+i);
10750 }
10751
10752 This code is nonsensical, but results in addressing
10753 GOT table with pic_offset_table_rtx base. We can't
10754 just refuse it easily, since it gets matched by
10755 "addsi3" pattern, that later gets split to lea in the
10756 case output register differs from input. While this
10757 can be handled by separate addsi pattern for this case
10758 that never results in lea, this seems to be easier and
10759 correct fix for crash to disable this test. */
10760 }
10761 else if (GET_CODE (disp) != LABEL_REF
10762 && !CONST_INT_P (disp)
10763 && (GET_CODE (disp) != CONST
10764 || !legitimate_constant_p (disp))
10765 && (GET_CODE (disp) != SYMBOL_REF
10766 || !legitimate_constant_p (disp)))
10767 /* Displacement is not constant. */
10768 return false;
10769 else if (TARGET_64BIT
10770 && !x86_64_immediate_operand (disp, VOIDmode))
10771 /* Displacement is out of range. */
10772 return false;
10773 }
10774
10775 /* Everything looks valid. */
10776 return true;
10777 }
10778
10779 /* Determine if a given RTX is a valid constant address. */
10780
10781 bool
10782 constant_address_p (rtx x)
10783 {
10784 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
10785 }
10786 \f
10787 /* Return a unique alias set for the GOT. */
10788
10789 static alias_set_type
10790 ix86_GOT_alias_set (void)
10791 {
10792 static alias_set_type set = -1;
10793 if (set == -1)
10794 set = new_alias_set ();
10795 return set;
10796 }
10797
10798 /* Return a legitimate reference for ORIG (an address) using the
10799 register REG. If REG is 0, a new pseudo is generated.
10800
10801 There are two types of references that must be handled:
10802
10803 1. Global data references must load the address from the GOT, via
10804 the PIC reg. An insn is emitted to do this load, and the reg is
10805 returned.
10806
10807 2. Static data references, constant pool addresses, and code labels
10808 compute the address as an offset from the GOT, whose base is in
10809 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
10810 differentiate them from global data objects. The returned
10811 address is the PIC reg + an unspec constant.
10812
10813 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10814 reg also appears in the address. */
10815
10816 static rtx
10817 legitimize_pic_address (rtx orig, rtx reg)
10818 {
10819 rtx addr = orig;
10820 rtx new_rtx = orig;
10821 rtx base;
10822
10823 #if TARGET_MACHO
10824 if (TARGET_MACHO && !TARGET_64BIT)
10825 {
10826 if (reg == 0)
10827 reg = gen_reg_rtx (Pmode);
10828 /* Use the generic Mach-O PIC machinery. */
10829 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
10830 }
10831 #endif
10832
10833 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
10834 new_rtx = addr;
10835 else if (TARGET_64BIT
10836 && ix86_cmodel != CM_SMALL_PIC
10837 && gotoff_operand (addr, Pmode))
10838 {
10839 rtx tmpreg;
10840 /* This symbol may be referenced via a displacement from the PIC
10841 base address (@GOTOFF). */
10842
10843 if (reload_in_progress)
10844 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10845 if (GET_CODE (addr) == CONST)
10846 addr = XEXP (addr, 0);
10847 if (GET_CODE (addr) == PLUS)
10848 {
10849 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10850 UNSPEC_GOTOFF);
10851 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10852 }
10853 else
10854 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10855 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10856 if (!reg)
10857 tmpreg = gen_reg_rtx (Pmode);
10858 else
10859 tmpreg = reg;
10860 emit_move_insn (tmpreg, new_rtx);
10861
10862 if (reg != 0)
10863 {
10864 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
10865 tmpreg, 1, OPTAB_DIRECT);
10866 new_rtx = reg;
10867 }
10868 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
10869 }
10870 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
10871 {
10872 /* This symbol may be referenced via a displacement from the PIC
10873 base address (@GOTOFF). */
10874
10875 if (reload_in_progress)
10876 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10877 if (GET_CODE (addr) == CONST)
10878 addr = XEXP (addr, 0);
10879 if (GET_CODE (addr) == PLUS)
10880 {
10881 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10882 UNSPEC_GOTOFF);
10883 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10884 }
10885 else
10886 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10887 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10888 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10889
10890 if (reg != 0)
10891 {
10892 emit_move_insn (reg, new_rtx);
10893 new_rtx = reg;
10894 }
10895 }
10896 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10897 /* We can't use @GOTOFF for text labels on VxWorks;
10898 see gotoff_operand. */
10899 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10900 {
10901 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10902 {
10903 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
10904 return legitimize_dllimport_symbol (addr, true);
10905 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
10906 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
10907 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
10908 {
10909 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
10910 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
10911 }
10912 }
10913
10914 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10915 {
10916 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
10917 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10918 new_rtx = gen_const_mem (Pmode, new_rtx);
10919 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10920
10921 if (reg == 0)
10922 reg = gen_reg_rtx (Pmode);
10923 /* Use directly gen_movsi, otherwise the address is loaded
10924 into register for CSE. We don't want to CSE this addresses,
10925 instead we CSE addresses from the GOT table, so skip this. */
10926 emit_insn (gen_movsi (reg, new_rtx));
10927 new_rtx = reg;
10928 }
10929 else
10930 {
10931 /* This symbol must be referenced via a load from the
10932 Global Offset Table (@GOT). */
10933
10934 if (reload_in_progress)
10935 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10936 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10937 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10938 if (TARGET_64BIT)
10939 new_rtx = force_reg (Pmode, new_rtx);
10940 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10941 new_rtx = gen_const_mem (Pmode, new_rtx);
10942 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10943
10944 if (reg == 0)
10945 reg = gen_reg_rtx (Pmode);
10946 emit_move_insn (reg, new_rtx);
10947 new_rtx = reg;
10948 }
10949 }
10950 else
10951 {
10952 if (CONST_INT_P (addr)
10953 && !x86_64_immediate_operand (addr, VOIDmode))
10954 {
10955 if (reg)
10956 {
10957 emit_move_insn (reg, addr);
10958 new_rtx = reg;
10959 }
10960 else
10961 new_rtx = force_reg (Pmode, addr);
10962 }
10963 else if (GET_CODE (addr) == CONST)
10964 {
10965 addr = XEXP (addr, 0);
10966
10967 /* We must match stuff we generate before. Assume the only
10968 unspecs that can get here are ours. Not that we could do
10969 anything with them anyway.... */
10970 if (GET_CODE (addr) == UNSPEC
10971 || (GET_CODE (addr) == PLUS
10972 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
10973 return orig;
10974 gcc_assert (GET_CODE (addr) == PLUS);
10975 }
10976 if (GET_CODE (addr) == PLUS)
10977 {
10978 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10979
10980 /* Check first to see if this is a constant offset from a @GOTOFF
10981 symbol reference. */
10982 if (gotoff_operand (op0, Pmode)
10983 && CONST_INT_P (op1))
10984 {
10985 if (!TARGET_64BIT)
10986 {
10987 if (reload_in_progress)
10988 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10989 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10990 UNSPEC_GOTOFF);
10991 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10992 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10993 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10994
10995 if (reg != 0)
10996 {
10997 emit_move_insn (reg, new_rtx);
10998 new_rtx = reg;
10999 }
11000 }
11001 else
11002 {
11003 if (INTVAL (op1) < -16*1024*1024
11004 || INTVAL (op1) >= 16*1024*1024)
11005 {
11006 if (!x86_64_immediate_operand (op1, Pmode))
11007 op1 = force_reg (Pmode, op1);
11008 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11009 }
11010 }
11011 }
11012 else
11013 {
11014 base = legitimize_pic_address (XEXP (addr, 0), reg);
11015 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11016 base == reg ? NULL_RTX : reg);
11017
11018 if (CONST_INT_P (new_rtx))
11019 new_rtx = plus_constant (base, INTVAL (new_rtx));
11020 else
11021 {
11022 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11023 {
11024 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11025 new_rtx = XEXP (new_rtx, 1);
11026 }
11027 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11028 }
11029 }
11030 }
11031 }
11032 return new_rtx;
11033 }
11034 \f
11035 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11036
11037 static rtx
11038 get_thread_pointer (int to_reg)
11039 {
11040 rtx tp, reg, insn;
11041
11042 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11043 if (!to_reg)
11044 return tp;
11045
11046 reg = gen_reg_rtx (Pmode);
11047 insn = gen_rtx_SET (VOIDmode, reg, tp);
11048 insn = emit_insn (insn);
11049
11050 return reg;
11051 }
11052
11053 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11054 false if we expect this to be used for a memory address and true if
11055 we expect to load the address into a register. */
11056
11057 static rtx
11058 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11059 {
11060 rtx dest, base, off, pic, tp;
11061 int type;
11062
11063 switch (model)
11064 {
11065 case TLS_MODEL_GLOBAL_DYNAMIC:
11066 dest = gen_reg_rtx (Pmode);
11067 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11068
11069 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11070 {
11071 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11072
11073 start_sequence ();
11074 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11075 insns = get_insns ();
11076 end_sequence ();
11077
11078 RTL_CONST_CALL_P (insns) = 1;
11079 emit_libcall_block (insns, dest, rax, x);
11080 }
11081 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11082 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11083 else
11084 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11085
11086 if (TARGET_GNU2_TLS)
11087 {
11088 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11089
11090 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11091 }
11092 break;
11093
11094 case TLS_MODEL_LOCAL_DYNAMIC:
11095 base = gen_reg_rtx (Pmode);
11096 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11097
11098 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11099 {
11100 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11101
11102 start_sequence ();
11103 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11104 insns = get_insns ();
11105 end_sequence ();
11106
11107 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11108 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11109 RTL_CONST_CALL_P (insns) = 1;
11110 emit_libcall_block (insns, base, rax, note);
11111 }
11112 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11113 emit_insn (gen_tls_local_dynamic_base_64 (base));
11114 else
11115 emit_insn (gen_tls_local_dynamic_base_32 (base));
11116
11117 if (TARGET_GNU2_TLS)
11118 {
11119 rtx x = ix86_tls_module_base ();
11120
11121 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11122 gen_rtx_MINUS (Pmode, x, tp));
11123 }
11124
11125 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11126 off = gen_rtx_CONST (Pmode, off);
11127
11128 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11129
11130 if (TARGET_GNU2_TLS)
11131 {
11132 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11133
11134 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11135 }
11136
11137 break;
11138
11139 case TLS_MODEL_INITIAL_EXEC:
11140 if (TARGET_64BIT)
11141 {
11142 pic = NULL;
11143 type = UNSPEC_GOTNTPOFF;
11144 }
11145 else if (flag_pic)
11146 {
11147 if (reload_in_progress)
11148 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11149 pic = pic_offset_table_rtx;
11150 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11151 }
11152 else if (!TARGET_ANY_GNU_TLS)
11153 {
11154 pic = gen_reg_rtx (Pmode);
11155 emit_insn (gen_set_got (pic));
11156 type = UNSPEC_GOTTPOFF;
11157 }
11158 else
11159 {
11160 pic = NULL;
11161 type = UNSPEC_INDNTPOFF;
11162 }
11163
11164 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11165 off = gen_rtx_CONST (Pmode, off);
11166 if (pic)
11167 off = gen_rtx_PLUS (Pmode, pic, off);
11168 off = gen_const_mem (Pmode, off);
11169 set_mem_alias_set (off, ix86_GOT_alias_set ());
11170
11171 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11172 {
11173 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11174 off = force_reg (Pmode, off);
11175 return gen_rtx_PLUS (Pmode, base, off);
11176 }
11177 else
11178 {
11179 base = get_thread_pointer (true);
11180 dest = gen_reg_rtx (Pmode);
11181 emit_insn (gen_subsi3 (dest, base, off));
11182 }
11183 break;
11184
11185 case TLS_MODEL_LOCAL_EXEC:
11186 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11187 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11188 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11189 off = gen_rtx_CONST (Pmode, off);
11190
11191 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11192 {
11193 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11194 return gen_rtx_PLUS (Pmode, base, off);
11195 }
11196 else
11197 {
11198 base = get_thread_pointer (true);
11199 dest = gen_reg_rtx (Pmode);
11200 emit_insn (gen_subsi3 (dest, base, off));
11201 }
11202 break;
11203
11204 default:
11205 gcc_unreachable ();
11206 }
11207
11208 return dest;
11209 }
11210
11211 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11212 to symbol DECL. */
11213
11214 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11215 htab_t dllimport_map;
11216
11217 static tree
11218 get_dllimport_decl (tree decl)
11219 {
11220 struct tree_map *h, in;
11221 void **loc;
11222 const char *name;
11223 const char *prefix;
11224 size_t namelen, prefixlen;
11225 char *imp_name;
11226 tree to;
11227 rtx rtl;
11228
11229 if (!dllimport_map)
11230 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11231
11232 in.hash = htab_hash_pointer (decl);
11233 in.base.from = decl;
11234 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11235 h = (struct tree_map *) *loc;
11236 if (h)
11237 return h->to;
11238
11239 *loc = h = ggc_alloc_tree_map ();
11240 h->hash = in.hash;
11241 h->base.from = decl;
11242 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11243 VAR_DECL, NULL, ptr_type_node);
11244 DECL_ARTIFICIAL (to) = 1;
11245 DECL_IGNORED_P (to) = 1;
11246 DECL_EXTERNAL (to) = 1;
11247 TREE_READONLY (to) = 1;
11248
11249 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11250 name = targetm.strip_name_encoding (name);
11251 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11252 ? "*__imp_" : "*__imp__";
11253 namelen = strlen (name);
11254 prefixlen = strlen (prefix);
11255 imp_name = (char *) alloca (namelen + prefixlen + 1);
11256 memcpy (imp_name, prefix, prefixlen);
11257 memcpy (imp_name + prefixlen, name, namelen + 1);
11258
11259 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11260 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11261 SET_SYMBOL_REF_DECL (rtl, to);
11262 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11263
11264 rtl = gen_const_mem (Pmode, rtl);
11265 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11266
11267 SET_DECL_RTL (to, rtl);
11268 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11269
11270 return to;
11271 }
11272
11273 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11274 true if we require the result be a register. */
11275
11276 static rtx
11277 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11278 {
11279 tree imp_decl;
11280 rtx x;
11281
11282 gcc_assert (SYMBOL_REF_DECL (symbol));
11283 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11284
11285 x = DECL_RTL (imp_decl);
11286 if (want_reg)
11287 x = force_reg (Pmode, x);
11288 return x;
11289 }
11290
11291 /* Try machine-dependent ways of modifying an illegitimate address
11292 to be legitimate. If we find one, return the new, valid address.
11293 This macro is used in only one place: `memory_address' in explow.c.
11294
11295 OLDX is the address as it was before break_out_memory_refs was called.
11296 In some cases it is useful to look at this to decide what needs to be done.
11297
11298 It is always safe for this macro to do nothing. It exists to recognize
11299 opportunities to optimize the output.
11300
11301 For the 80386, we handle X+REG by loading X into a register R and
11302 using R+REG. R will go in a general reg and indexing will be used.
11303 However, if REG is a broken-out memory address or multiplication,
11304 nothing needs to be done because REG can certainly go in a general reg.
11305
11306 When -fpic is used, special handling is needed for symbolic references.
11307 See comments by legitimize_pic_address in i386.c for details. */
11308
11309 static rtx
11310 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11311 enum machine_mode mode)
11312 {
11313 int changed = 0;
11314 unsigned log;
11315
11316 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11317 if (log)
11318 return legitimize_tls_address (x, (enum tls_model) log, false);
11319 if (GET_CODE (x) == CONST
11320 && GET_CODE (XEXP (x, 0)) == PLUS
11321 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11322 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11323 {
11324 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11325 (enum tls_model) log, false);
11326 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11327 }
11328
11329 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11330 {
11331 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11332 return legitimize_dllimport_symbol (x, true);
11333 if (GET_CODE (x) == CONST
11334 && GET_CODE (XEXP (x, 0)) == PLUS
11335 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11336 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11337 {
11338 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11339 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11340 }
11341 }
11342
11343 if (flag_pic && SYMBOLIC_CONST (x))
11344 return legitimize_pic_address (x, 0);
11345
11346 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11347 if (GET_CODE (x) == ASHIFT
11348 && CONST_INT_P (XEXP (x, 1))
11349 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11350 {
11351 changed = 1;
11352 log = INTVAL (XEXP (x, 1));
11353 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11354 GEN_INT (1 << log));
11355 }
11356
11357 if (GET_CODE (x) == PLUS)
11358 {
11359 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11360
11361 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11362 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11363 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11364 {
11365 changed = 1;
11366 log = INTVAL (XEXP (XEXP (x, 0), 1));
11367 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11368 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11369 GEN_INT (1 << log));
11370 }
11371
11372 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11373 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11374 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11375 {
11376 changed = 1;
11377 log = INTVAL (XEXP (XEXP (x, 1), 1));
11378 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11379 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11380 GEN_INT (1 << log));
11381 }
11382
11383 /* Put multiply first if it isn't already. */
11384 if (GET_CODE (XEXP (x, 1)) == MULT)
11385 {
11386 rtx tmp = XEXP (x, 0);
11387 XEXP (x, 0) = XEXP (x, 1);
11388 XEXP (x, 1) = tmp;
11389 changed = 1;
11390 }
11391
11392 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11393 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11394 created by virtual register instantiation, register elimination, and
11395 similar optimizations. */
11396 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11397 {
11398 changed = 1;
11399 x = gen_rtx_PLUS (Pmode,
11400 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11401 XEXP (XEXP (x, 1), 0)),
11402 XEXP (XEXP (x, 1), 1));
11403 }
11404
11405 /* Canonicalize
11406 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11407 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11408 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11409 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11410 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11411 && CONSTANT_P (XEXP (x, 1)))
11412 {
11413 rtx constant;
11414 rtx other = NULL_RTX;
11415
11416 if (CONST_INT_P (XEXP (x, 1)))
11417 {
11418 constant = XEXP (x, 1);
11419 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11420 }
11421 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11422 {
11423 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11424 other = XEXP (x, 1);
11425 }
11426 else
11427 constant = 0;
11428
11429 if (constant)
11430 {
11431 changed = 1;
11432 x = gen_rtx_PLUS (Pmode,
11433 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11434 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11435 plus_constant (other, INTVAL (constant)));
11436 }
11437 }
11438
11439 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
11440 return x;
11441
11442 if (GET_CODE (XEXP (x, 0)) == MULT)
11443 {
11444 changed = 1;
11445 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
11446 }
11447
11448 if (GET_CODE (XEXP (x, 1)) == MULT)
11449 {
11450 changed = 1;
11451 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
11452 }
11453
11454 if (changed
11455 && REG_P (XEXP (x, 1))
11456 && REG_P (XEXP (x, 0)))
11457 return x;
11458
11459 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11460 {
11461 changed = 1;
11462 x = legitimize_pic_address (x, 0);
11463 }
11464
11465 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
11466 return x;
11467
11468 if (REG_P (XEXP (x, 0)))
11469 {
11470 rtx temp = gen_reg_rtx (Pmode);
11471 rtx val = force_operand (XEXP (x, 1), temp);
11472 if (val != temp)
11473 emit_move_insn (temp, val);
11474
11475 XEXP (x, 1) = temp;
11476 return x;
11477 }
11478
11479 else if (REG_P (XEXP (x, 1)))
11480 {
11481 rtx temp = gen_reg_rtx (Pmode);
11482 rtx val = force_operand (XEXP (x, 0), temp);
11483 if (val != temp)
11484 emit_move_insn (temp, val);
11485
11486 XEXP (x, 0) = temp;
11487 return x;
11488 }
11489 }
11490
11491 return x;
11492 }
11493 \f
11494 /* Print an integer constant expression in assembler syntax. Addition
11495 and subtraction are the only arithmetic that may appear in these
11496 expressions. FILE is the stdio stream to write to, X is the rtx, and
11497 CODE is the operand print code from the output string. */
11498
11499 static void
11500 output_pic_addr_const (FILE *file, rtx x, int code)
11501 {
11502 char buf[256];
11503
11504 switch (GET_CODE (x))
11505 {
11506 case PC:
11507 gcc_assert (flag_pic);
11508 putc ('.', file);
11509 break;
11510
11511 case SYMBOL_REF:
11512 if (! TARGET_MACHO || TARGET_64BIT)
11513 output_addr_const (file, x);
11514 else
11515 {
11516 const char *name = XSTR (x, 0);
11517
11518 /* Mark the decl as referenced so that cgraph will
11519 output the function. */
11520 if (SYMBOL_REF_DECL (x))
11521 mark_decl_referenced (SYMBOL_REF_DECL (x));
11522
11523 #if TARGET_MACHO
11524 if (MACHOPIC_INDIRECT
11525 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11526 name = machopic_indirection_name (x, /*stub_p=*/true);
11527 #endif
11528 assemble_name (file, name);
11529 }
11530 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
11531 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11532 fputs ("@PLT", file);
11533 break;
11534
11535 case LABEL_REF:
11536 x = XEXP (x, 0);
11537 /* FALLTHRU */
11538 case CODE_LABEL:
11539 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11540 assemble_name (asm_out_file, buf);
11541 break;
11542
11543 case CONST_INT:
11544 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11545 break;
11546
11547 case CONST:
11548 /* This used to output parentheses around the expression,
11549 but that does not work on the 386 (either ATT or BSD assembler). */
11550 output_pic_addr_const (file, XEXP (x, 0), code);
11551 break;
11552
11553 case CONST_DOUBLE:
11554 if (GET_MODE (x) == VOIDmode)
11555 {
11556 /* We can use %d if the number is <32 bits and positive. */
11557 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
11558 fprintf (file, "0x%lx%08lx",
11559 (unsigned long) CONST_DOUBLE_HIGH (x),
11560 (unsigned long) CONST_DOUBLE_LOW (x));
11561 else
11562 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
11563 }
11564 else
11565 /* We can't handle floating point constants;
11566 TARGET_PRINT_OPERAND must handle them. */
11567 output_operand_lossage ("floating constant misused");
11568 break;
11569
11570 case PLUS:
11571 /* Some assemblers need integer constants to appear first. */
11572 if (CONST_INT_P (XEXP (x, 0)))
11573 {
11574 output_pic_addr_const (file, XEXP (x, 0), code);
11575 putc ('+', file);
11576 output_pic_addr_const (file, XEXP (x, 1), code);
11577 }
11578 else
11579 {
11580 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11581 output_pic_addr_const (file, XEXP (x, 1), code);
11582 putc ('+', file);
11583 output_pic_addr_const (file, XEXP (x, 0), code);
11584 }
11585 break;
11586
11587 case MINUS:
11588 if (!TARGET_MACHO)
11589 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11590 output_pic_addr_const (file, XEXP (x, 0), code);
11591 putc ('-', file);
11592 output_pic_addr_const (file, XEXP (x, 1), code);
11593 if (!TARGET_MACHO)
11594 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11595 break;
11596
11597 case UNSPEC:
11598 gcc_assert (XVECLEN (x, 0) == 1);
11599 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11600 switch (XINT (x, 1))
11601 {
11602 case UNSPEC_GOT:
11603 fputs ("@GOT", file);
11604 break;
11605 case UNSPEC_GOTOFF:
11606 fputs ("@GOTOFF", file);
11607 break;
11608 case UNSPEC_PLTOFF:
11609 fputs ("@PLTOFF", file);
11610 break;
11611 case UNSPEC_GOTPCREL:
11612 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11613 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11614 break;
11615 case UNSPEC_GOTTPOFF:
11616 /* FIXME: This might be @TPOFF in Sun ld too. */
11617 fputs ("@gottpoff", file);
11618 break;
11619 case UNSPEC_TPOFF:
11620 fputs ("@tpoff", file);
11621 break;
11622 case UNSPEC_NTPOFF:
11623 if (TARGET_64BIT)
11624 fputs ("@tpoff", file);
11625 else
11626 fputs ("@ntpoff", file);
11627 break;
11628 case UNSPEC_DTPOFF:
11629 fputs ("@dtpoff", file);
11630 break;
11631 case UNSPEC_GOTNTPOFF:
11632 if (TARGET_64BIT)
11633 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11634 "@gottpoff(%rip)": "@gottpoff[rip]", file);
11635 else
11636 fputs ("@gotntpoff", file);
11637 break;
11638 case UNSPEC_INDNTPOFF:
11639 fputs ("@indntpoff", file);
11640 break;
11641 #if TARGET_MACHO
11642 case UNSPEC_MACHOPIC_OFFSET:
11643 putc ('-', file);
11644 machopic_output_function_base_name (file);
11645 break;
11646 #endif
11647 default:
11648 output_operand_lossage ("invalid UNSPEC as operand");
11649 break;
11650 }
11651 break;
11652
11653 default:
11654 output_operand_lossage ("invalid expression as operand");
11655 }
11656 }
11657
11658 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11659 We need to emit DTP-relative relocations. */
11660
11661 static void ATTRIBUTE_UNUSED
11662 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11663 {
11664 fputs (ASM_LONG, file);
11665 output_addr_const (file, x);
11666 fputs ("@dtpoff", file);
11667 switch (size)
11668 {
11669 case 4:
11670 break;
11671 case 8:
11672 fputs (", 0", file);
11673 break;
11674 default:
11675 gcc_unreachable ();
11676 }
11677 }
11678
11679 /* Return true if X is a representation of the PIC register. This copes
11680 with calls from ix86_find_base_term, where the register might have
11681 been replaced by a cselib value. */
11682
11683 static bool
11684 ix86_pic_register_p (rtx x)
11685 {
11686 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11687 return (pic_offset_table_rtx
11688 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11689 else
11690 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11691 }
11692
11693 /* In the name of slightly smaller debug output, and to cater to
11694 general assembler lossage, recognize PIC+GOTOFF and turn it back
11695 into a direct symbol reference.
11696
11697 On Darwin, this is necessary to avoid a crash, because Darwin
11698 has a different PIC label for each routine but the DWARF debugging
11699 information is not associated with any particular routine, so it's
11700 necessary to remove references to the PIC label from RTL stored by
11701 the DWARF output code. */
11702
11703 static rtx
11704 ix86_delegitimize_address (rtx x)
11705 {
11706 rtx orig_x = delegitimize_mem_from_attrs (x);
11707 /* addend is NULL or some rtx if x is something+GOTOFF where
11708 something doesn't include the PIC register. */
11709 rtx addend = NULL_RTX;
11710 /* reg_addend is NULL or a multiple of some register. */
11711 rtx reg_addend = NULL_RTX;
11712 /* const_addend is NULL or a const_int. */
11713 rtx const_addend = NULL_RTX;
11714 /* This is the result, or NULL. */
11715 rtx result = NULL_RTX;
11716
11717 x = orig_x;
11718
11719 if (MEM_P (x))
11720 x = XEXP (x, 0);
11721
11722 if (TARGET_64BIT)
11723 {
11724 if (GET_CODE (x) != CONST
11725 || GET_CODE (XEXP (x, 0)) != UNSPEC
11726 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
11727 || !MEM_P (orig_x))
11728 return orig_x;
11729 x = XVECEXP (XEXP (x, 0), 0, 0);
11730 if (GET_MODE (orig_x) != Pmode)
11731 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
11732 return x;
11733 }
11734
11735 if (GET_CODE (x) != PLUS
11736 || GET_CODE (XEXP (x, 1)) != CONST)
11737 return orig_x;
11738
11739 if (ix86_pic_register_p (XEXP (x, 0)))
11740 /* %ebx + GOT/GOTOFF */
11741 ;
11742 else if (GET_CODE (XEXP (x, 0)) == PLUS)
11743 {
11744 /* %ebx + %reg * scale + GOT/GOTOFF */
11745 reg_addend = XEXP (x, 0);
11746 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
11747 reg_addend = XEXP (reg_addend, 1);
11748 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
11749 reg_addend = XEXP (reg_addend, 0);
11750 else
11751 {
11752 reg_addend = NULL_RTX;
11753 addend = XEXP (x, 0);
11754 }
11755 }
11756 else
11757 addend = XEXP (x, 0);
11758
11759 x = XEXP (XEXP (x, 1), 0);
11760 if (GET_CODE (x) == PLUS
11761 && CONST_INT_P (XEXP (x, 1)))
11762 {
11763 const_addend = XEXP (x, 1);
11764 x = XEXP (x, 0);
11765 }
11766
11767 if (GET_CODE (x) == UNSPEC
11768 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
11769 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
11770 result = XVECEXP (x, 0, 0);
11771
11772 if (TARGET_MACHO && darwin_local_data_pic (x)
11773 && !MEM_P (orig_x))
11774 result = XVECEXP (x, 0, 0);
11775
11776 if (! result)
11777 return orig_x;
11778
11779 if (const_addend)
11780 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
11781 if (reg_addend)
11782 result = gen_rtx_PLUS (Pmode, reg_addend, result);
11783 if (addend)
11784 {
11785 /* If the rest of original X doesn't involve the PIC register, add
11786 addend and subtract pic_offset_table_rtx. This can happen e.g.
11787 for code like:
11788 leal (%ebx, %ecx, 4), %ecx
11789 ...
11790 movl foo@GOTOFF(%ecx), %edx
11791 in which case we return (%ecx - %ebx) + foo. */
11792 if (pic_offset_table_rtx)
11793 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
11794 pic_offset_table_rtx),
11795 result);
11796 else
11797 return orig_x;
11798 }
11799 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
11800 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
11801 return result;
11802 }
11803
11804 /* If X is a machine specific address (i.e. a symbol or label being
11805 referenced as a displacement from the GOT implemented using an
11806 UNSPEC), then return the base term. Otherwise return X. */
11807
11808 rtx
11809 ix86_find_base_term (rtx x)
11810 {
11811 rtx term;
11812
11813 if (TARGET_64BIT)
11814 {
11815 if (GET_CODE (x) != CONST)
11816 return x;
11817 term = XEXP (x, 0);
11818 if (GET_CODE (term) == PLUS
11819 && (CONST_INT_P (XEXP (term, 1))
11820 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
11821 term = XEXP (term, 0);
11822 if (GET_CODE (term) != UNSPEC
11823 || XINT (term, 1) != UNSPEC_GOTPCREL)
11824 return x;
11825
11826 return XVECEXP (term, 0, 0);
11827 }
11828
11829 return ix86_delegitimize_address (x);
11830 }
11831 \f
11832 static void
11833 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
11834 int fp, FILE *file)
11835 {
11836 const char *suffix;
11837
11838 if (mode == CCFPmode || mode == CCFPUmode)
11839 {
11840 code = ix86_fp_compare_code_to_integer (code);
11841 mode = CCmode;
11842 }
11843 if (reverse)
11844 code = reverse_condition (code);
11845
11846 switch (code)
11847 {
11848 case EQ:
11849 switch (mode)
11850 {
11851 case CCAmode:
11852 suffix = "a";
11853 break;
11854
11855 case CCCmode:
11856 suffix = "c";
11857 break;
11858
11859 case CCOmode:
11860 suffix = "o";
11861 break;
11862
11863 case CCSmode:
11864 suffix = "s";
11865 break;
11866
11867 default:
11868 suffix = "e";
11869 }
11870 break;
11871 case NE:
11872 switch (mode)
11873 {
11874 case CCAmode:
11875 suffix = "na";
11876 break;
11877
11878 case CCCmode:
11879 suffix = "nc";
11880 break;
11881
11882 case CCOmode:
11883 suffix = "no";
11884 break;
11885
11886 case CCSmode:
11887 suffix = "ns";
11888 break;
11889
11890 default:
11891 suffix = "ne";
11892 }
11893 break;
11894 case GT:
11895 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
11896 suffix = "g";
11897 break;
11898 case GTU:
11899 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
11900 Those same assemblers have the same but opposite lossage on cmov. */
11901 if (mode == CCmode)
11902 suffix = fp ? "nbe" : "a";
11903 else if (mode == CCCmode)
11904 suffix = "b";
11905 else
11906 gcc_unreachable ();
11907 break;
11908 case LT:
11909 switch (mode)
11910 {
11911 case CCNOmode:
11912 case CCGOCmode:
11913 suffix = "s";
11914 break;
11915
11916 case CCmode:
11917 case CCGCmode:
11918 suffix = "l";
11919 break;
11920
11921 default:
11922 gcc_unreachable ();
11923 }
11924 break;
11925 case LTU:
11926 gcc_assert (mode == CCmode || mode == CCCmode);
11927 suffix = "b";
11928 break;
11929 case GE:
11930 switch (mode)
11931 {
11932 case CCNOmode:
11933 case CCGOCmode:
11934 suffix = "ns";
11935 break;
11936
11937 case CCmode:
11938 case CCGCmode:
11939 suffix = "ge";
11940 break;
11941
11942 default:
11943 gcc_unreachable ();
11944 }
11945 break;
11946 case GEU:
11947 /* ??? As above. */
11948 gcc_assert (mode == CCmode || mode == CCCmode);
11949 suffix = fp ? "nb" : "ae";
11950 break;
11951 case LE:
11952 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
11953 suffix = "le";
11954 break;
11955 case LEU:
11956 /* ??? As above. */
11957 if (mode == CCmode)
11958 suffix = "be";
11959 else if (mode == CCCmode)
11960 suffix = fp ? "nb" : "ae";
11961 else
11962 gcc_unreachable ();
11963 break;
11964 case UNORDERED:
11965 suffix = fp ? "u" : "p";
11966 break;
11967 case ORDERED:
11968 suffix = fp ? "nu" : "np";
11969 break;
11970 default:
11971 gcc_unreachable ();
11972 }
11973 fputs (suffix, file);
11974 }
11975
11976 /* Print the name of register X to FILE based on its machine mode and number.
11977 If CODE is 'w', pretend the mode is HImode.
11978 If CODE is 'b', pretend the mode is QImode.
11979 If CODE is 'k', pretend the mode is SImode.
11980 If CODE is 'q', pretend the mode is DImode.
11981 If CODE is 'x', pretend the mode is V4SFmode.
11982 If CODE is 't', pretend the mode is V8SFmode.
11983 If CODE is 'h', pretend the reg is the 'high' byte register.
11984 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
11985 If CODE is 'd', duplicate the operand for AVX instruction.
11986 */
11987
11988 void
11989 print_reg (rtx x, int code, FILE *file)
11990 {
11991 const char *reg;
11992 bool duplicated = code == 'd' && TARGET_AVX;
11993
11994 gcc_assert (x == pc_rtx
11995 || (REGNO (x) != ARG_POINTER_REGNUM
11996 && REGNO (x) != FRAME_POINTER_REGNUM
11997 && REGNO (x) != FLAGS_REG
11998 && REGNO (x) != FPSR_REG
11999 && REGNO (x) != FPCR_REG));
12000
12001 if (ASSEMBLER_DIALECT == ASM_ATT)
12002 putc ('%', file);
12003
12004 if (x == pc_rtx)
12005 {
12006 gcc_assert (TARGET_64BIT);
12007 fputs ("rip", file);
12008 return;
12009 }
12010
12011 if (code == 'w' || MMX_REG_P (x))
12012 code = 2;
12013 else if (code == 'b')
12014 code = 1;
12015 else if (code == 'k')
12016 code = 4;
12017 else if (code == 'q')
12018 code = 8;
12019 else if (code == 'y')
12020 code = 3;
12021 else if (code == 'h')
12022 code = 0;
12023 else if (code == 'x')
12024 code = 16;
12025 else if (code == 't')
12026 code = 32;
12027 else
12028 code = GET_MODE_SIZE (GET_MODE (x));
12029
12030 /* Irritatingly, AMD extended registers use different naming convention
12031 from the normal registers. */
12032 if (REX_INT_REG_P (x))
12033 {
12034 gcc_assert (TARGET_64BIT);
12035 switch (code)
12036 {
12037 case 0:
12038 error ("extended registers have no high halves");
12039 break;
12040 case 1:
12041 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12042 break;
12043 case 2:
12044 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12045 break;
12046 case 4:
12047 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12048 break;
12049 case 8:
12050 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12051 break;
12052 default:
12053 error ("unsupported operand size for extended register");
12054 break;
12055 }
12056 return;
12057 }
12058
12059 reg = NULL;
12060 switch (code)
12061 {
12062 case 3:
12063 if (STACK_TOP_P (x))
12064 {
12065 reg = "st(0)";
12066 break;
12067 }
12068 /* FALLTHRU */
12069 case 8:
12070 case 4:
12071 case 12:
12072 if (! ANY_FP_REG_P (x))
12073 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12074 /* FALLTHRU */
12075 case 16:
12076 case 2:
12077 normal:
12078 reg = hi_reg_name[REGNO (x)];
12079 break;
12080 case 1:
12081 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12082 goto normal;
12083 reg = qi_reg_name[REGNO (x)];
12084 break;
12085 case 0:
12086 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12087 goto normal;
12088 reg = qi_high_reg_name[REGNO (x)];
12089 break;
12090 case 32:
12091 if (SSE_REG_P (x))
12092 {
12093 gcc_assert (!duplicated);
12094 putc ('y', file);
12095 fputs (hi_reg_name[REGNO (x)] + 1, file);
12096 return;
12097 }
12098 break;
12099 default:
12100 gcc_unreachable ();
12101 }
12102
12103 fputs (reg, file);
12104 if (duplicated)
12105 {
12106 if (ASSEMBLER_DIALECT == ASM_ATT)
12107 fprintf (file, ", %%%s", reg);
12108 else
12109 fprintf (file, ", %s", reg);
12110 }
12111 }
12112
12113 /* Locate some local-dynamic symbol still in use by this function
12114 so that we can print its name in some tls_local_dynamic_base
12115 pattern. */
12116
12117 static int
12118 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12119 {
12120 rtx x = *px;
12121
12122 if (GET_CODE (x) == SYMBOL_REF
12123 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12124 {
12125 cfun->machine->some_ld_name = XSTR (x, 0);
12126 return 1;
12127 }
12128
12129 return 0;
12130 }
12131
12132 static const char *
12133 get_some_local_dynamic_name (void)
12134 {
12135 rtx insn;
12136
12137 if (cfun->machine->some_ld_name)
12138 return cfun->machine->some_ld_name;
12139
12140 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12141 if (NONDEBUG_INSN_P (insn)
12142 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12143 return cfun->machine->some_ld_name;
12144
12145 return NULL;
12146 }
12147
12148 /* Meaning of CODE:
12149 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12150 C -- print opcode suffix for set/cmov insn.
12151 c -- like C, but print reversed condition
12152 F,f -- likewise, but for floating-point.
12153 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12154 otherwise nothing
12155 R -- print the prefix for register names.
12156 z -- print the opcode suffix for the size of the current operand.
12157 Z -- likewise, with special suffixes for x87 instructions.
12158 * -- print a star (in certain assembler syntax)
12159 A -- print an absolute memory reference.
12160 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12161 s -- print a shift double count, followed by the assemblers argument
12162 delimiter.
12163 b -- print the QImode name of the register for the indicated operand.
12164 %b0 would print %al if operands[0] is reg 0.
12165 w -- likewise, print the HImode name of the register.
12166 k -- likewise, print the SImode name of the register.
12167 q -- likewise, print the DImode name of the register.
12168 x -- likewise, print the V4SFmode name of the register.
12169 t -- likewise, print the V8SFmode name of the register.
12170 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12171 y -- print "st(0)" instead of "st" as a register.
12172 d -- print duplicated register operand for AVX instruction.
12173 D -- print condition for SSE cmp instruction.
12174 P -- if PIC, print an @PLT suffix.
12175 X -- don't print any sort of PIC '@' suffix for a symbol.
12176 & -- print some in-use local-dynamic symbol name.
12177 H -- print a memory address offset by 8; used for sse high-parts
12178 Y -- print condition for XOP pcom* instruction.
12179 + -- print a branch hint as 'cs' or 'ds' prefix
12180 ; -- print a semicolon (after prefixes due to bug in older gas).
12181 */
12182
12183 void
12184 ix86_print_operand (FILE *file, rtx x, int code)
12185 {
12186 if (code)
12187 {
12188 switch (code)
12189 {
12190 case '*':
12191 if (ASSEMBLER_DIALECT == ASM_ATT)
12192 putc ('*', file);
12193 return;
12194
12195 case '&':
12196 {
12197 const char *name = get_some_local_dynamic_name ();
12198 if (name == NULL)
12199 output_operand_lossage ("'%%&' used without any "
12200 "local dynamic TLS references");
12201 else
12202 assemble_name (file, name);
12203 return;
12204 }
12205
12206 case 'A':
12207 switch (ASSEMBLER_DIALECT)
12208 {
12209 case ASM_ATT:
12210 putc ('*', file);
12211 break;
12212
12213 case ASM_INTEL:
12214 /* Intel syntax. For absolute addresses, registers should not
12215 be surrounded by braces. */
12216 if (!REG_P (x))
12217 {
12218 putc ('[', file);
12219 ix86_print_operand (file, x, 0);
12220 putc (']', file);
12221 return;
12222 }
12223 break;
12224
12225 default:
12226 gcc_unreachable ();
12227 }
12228
12229 ix86_print_operand (file, x, 0);
12230 return;
12231
12232
12233 case 'L':
12234 if (ASSEMBLER_DIALECT == ASM_ATT)
12235 putc ('l', file);
12236 return;
12237
12238 case 'W':
12239 if (ASSEMBLER_DIALECT == ASM_ATT)
12240 putc ('w', file);
12241 return;
12242
12243 case 'B':
12244 if (ASSEMBLER_DIALECT == ASM_ATT)
12245 putc ('b', file);
12246 return;
12247
12248 case 'Q':
12249 if (ASSEMBLER_DIALECT == ASM_ATT)
12250 putc ('l', file);
12251 return;
12252
12253 case 'S':
12254 if (ASSEMBLER_DIALECT == ASM_ATT)
12255 putc ('s', file);
12256 return;
12257
12258 case 'T':
12259 if (ASSEMBLER_DIALECT == ASM_ATT)
12260 putc ('t', file);
12261 return;
12262
12263 case 'z':
12264 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12265 {
12266 /* Opcodes don't get size suffixes if using Intel opcodes. */
12267 if (ASSEMBLER_DIALECT == ASM_INTEL)
12268 return;
12269
12270 switch (GET_MODE_SIZE (GET_MODE (x)))
12271 {
12272 case 1:
12273 putc ('b', file);
12274 return;
12275
12276 case 2:
12277 putc ('w', file);
12278 return;
12279
12280 case 4:
12281 putc ('l', file);
12282 return;
12283
12284 case 8:
12285 putc ('q', file);
12286 return;
12287
12288 default:
12289 output_operand_lossage
12290 ("invalid operand size for operand code '%c'", code);
12291 return;
12292 }
12293 }
12294
12295 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12296 warning
12297 (0, "non-integer operand used with operand code '%c'", code);
12298 /* FALLTHRU */
12299
12300 case 'Z':
12301 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12302 if (ASSEMBLER_DIALECT == ASM_INTEL)
12303 return;
12304
12305 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12306 {
12307 switch (GET_MODE_SIZE (GET_MODE (x)))
12308 {
12309 case 2:
12310 #ifdef HAVE_AS_IX86_FILDS
12311 putc ('s', file);
12312 #endif
12313 return;
12314
12315 case 4:
12316 putc ('l', file);
12317 return;
12318
12319 case 8:
12320 #ifdef HAVE_AS_IX86_FILDQ
12321 putc ('q', file);
12322 #else
12323 fputs ("ll", file);
12324 #endif
12325 return;
12326
12327 default:
12328 break;
12329 }
12330 }
12331 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12332 {
12333 /* 387 opcodes don't get size suffixes
12334 if the operands are registers. */
12335 if (STACK_REG_P (x))
12336 return;
12337
12338 switch (GET_MODE_SIZE (GET_MODE (x)))
12339 {
12340 case 4:
12341 putc ('s', file);
12342 return;
12343
12344 case 8:
12345 putc ('l', file);
12346 return;
12347
12348 case 12:
12349 case 16:
12350 putc ('t', file);
12351 return;
12352
12353 default:
12354 break;
12355 }
12356 }
12357 else
12358 {
12359 output_operand_lossage
12360 ("invalid operand type used with operand code '%c'", code);
12361 return;
12362 }
12363
12364 output_operand_lossage
12365 ("invalid operand size for operand code '%c'", code);
12366 return;
12367
12368 case 'd':
12369 case 'b':
12370 case 'w':
12371 case 'k':
12372 case 'q':
12373 case 'h':
12374 case 't':
12375 case 'y':
12376 case 'x':
12377 case 'X':
12378 case 'P':
12379 break;
12380
12381 case 's':
12382 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12383 {
12384 ix86_print_operand (file, x, 0);
12385 fputs (", ", file);
12386 }
12387 return;
12388
12389 case 'D':
12390 /* Little bit of braindamage here. The SSE compare instructions
12391 does use completely different names for the comparisons that the
12392 fp conditional moves. */
12393 if (TARGET_AVX)
12394 {
12395 switch (GET_CODE (x))
12396 {
12397 case EQ:
12398 fputs ("eq", file);
12399 break;
12400 case UNEQ:
12401 fputs ("eq_us", file);
12402 break;
12403 case LT:
12404 fputs ("lt", file);
12405 break;
12406 case UNLT:
12407 fputs ("nge", file);
12408 break;
12409 case LE:
12410 fputs ("le", file);
12411 break;
12412 case UNLE:
12413 fputs ("ngt", file);
12414 break;
12415 case UNORDERED:
12416 fputs ("unord", file);
12417 break;
12418 case NE:
12419 fputs ("neq", file);
12420 break;
12421 case LTGT:
12422 fputs ("neq_oq", file);
12423 break;
12424 case GE:
12425 fputs ("ge", file);
12426 break;
12427 case UNGE:
12428 fputs ("nlt", file);
12429 break;
12430 case GT:
12431 fputs ("gt", file);
12432 break;
12433 case UNGT:
12434 fputs ("nle", file);
12435 break;
12436 case ORDERED:
12437 fputs ("ord", file);
12438 break;
12439 default:
12440 output_operand_lossage ("operand is not a condition code, "
12441 "invalid operand code 'D'");
12442 return;
12443 }
12444 }
12445 else
12446 {
12447 switch (GET_CODE (x))
12448 {
12449 case EQ:
12450 case UNEQ:
12451 fputs ("eq", file);
12452 break;
12453 case LT:
12454 case UNLT:
12455 fputs ("lt", file);
12456 break;
12457 case LE:
12458 case UNLE:
12459 fputs ("le", file);
12460 break;
12461 case UNORDERED:
12462 fputs ("unord", file);
12463 break;
12464 case NE:
12465 case LTGT:
12466 fputs ("neq", file);
12467 break;
12468 case UNGE:
12469 case GE:
12470 fputs ("nlt", file);
12471 break;
12472 case UNGT:
12473 case GT:
12474 fputs ("nle", file);
12475 break;
12476 case ORDERED:
12477 fputs ("ord", file);
12478 break;
12479 default:
12480 output_operand_lossage ("operand is not a condition code, "
12481 "invalid operand code 'D'");
12482 return;
12483 }
12484 }
12485 return;
12486 case 'O':
12487 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12488 if (ASSEMBLER_DIALECT == ASM_ATT)
12489 {
12490 switch (GET_MODE (x))
12491 {
12492 case HImode: putc ('w', file); break;
12493 case SImode:
12494 case SFmode: putc ('l', file); break;
12495 case DImode:
12496 case DFmode: putc ('q', file); break;
12497 default: gcc_unreachable ();
12498 }
12499 putc ('.', file);
12500 }
12501 #endif
12502 return;
12503 case 'C':
12504 if (!COMPARISON_P (x))
12505 {
12506 output_operand_lossage ("operand is neither a constant nor a "
12507 "condition code, invalid operand code "
12508 "'C'");
12509 return;
12510 }
12511 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
12512 return;
12513 case 'F':
12514 if (!COMPARISON_P (x))
12515 {
12516 output_operand_lossage ("operand is neither a constant nor a "
12517 "condition code, invalid operand code "
12518 "'F'");
12519 return;
12520 }
12521 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12522 if (ASSEMBLER_DIALECT == ASM_ATT)
12523 putc ('.', file);
12524 #endif
12525 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
12526 return;
12527
12528 /* Like above, but reverse condition */
12529 case 'c':
12530 /* Check to see if argument to %c is really a constant
12531 and not a condition code which needs to be reversed. */
12532 if (!COMPARISON_P (x))
12533 {
12534 output_operand_lossage ("operand is neither a constant nor a "
12535 "condition code, invalid operand "
12536 "code 'c'");
12537 return;
12538 }
12539 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
12540 return;
12541 case 'f':
12542 if (!COMPARISON_P (x))
12543 {
12544 output_operand_lossage ("operand is neither a constant nor a "
12545 "condition code, invalid operand "
12546 "code 'f'");
12547 return;
12548 }
12549 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12550 if (ASSEMBLER_DIALECT == ASM_ATT)
12551 putc ('.', file);
12552 #endif
12553 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
12554 return;
12555
12556 case 'H':
12557 /* It doesn't actually matter what mode we use here, as we're
12558 only going to use this for printing. */
12559 x = adjust_address_nv (x, DImode, 8);
12560 break;
12561
12562 case '+':
12563 {
12564 rtx x;
12565
12566 if (!optimize
12567 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
12568 return;
12569
12570 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
12571 if (x)
12572 {
12573 int pred_val = INTVAL (XEXP (x, 0));
12574
12575 if (pred_val < REG_BR_PROB_BASE * 45 / 100
12576 || pred_val > REG_BR_PROB_BASE * 55 / 100)
12577 {
12578 int taken = pred_val > REG_BR_PROB_BASE / 2;
12579 int cputaken = final_forward_branch_p (current_output_insn) == 0;
12580
12581 /* Emit hints only in the case default branch prediction
12582 heuristics would fail. */
12583 if (taken != cputaken)
12584 {
12585 /* We use 3e (DS) prefix for taken branches and
12586 2e (CS) prefix for not taken branches. */
12587 if (taken)
12588 fputs ("ds ; ", file);
12589 else
12590 fputs ("cs ; ", file);
12591 }
12592 }
12593 }
12594 return;
12595 }
12596
12597 case 'Y':
12598 switch (GET_CODE (x))
12599 {
12600 case NE:
12601 fputs ("neq", file);
12602 break;
12603 case EQ:
12604 fputs ("eq", file);
12605 break;
12606 case GE:
12607 case GEU:
12608 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12609 break;
12610 case GT:
12611 case GTU:
12612 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12613 break;
12614 case LE:
12615 case LEU:
12616 fputs ("le", file);
12617 break;
12618 case LT:
12619 case LTU:
12620 fputs ("lt", file);
12621 break;
12622 case UNORDERED:
12623 fputs ("unord", file);
12624 break;
12625 case ORDERED:
12626 fputs ("ord", file);
12627 break;
12628 case UNEQ:
12629 fputs ("ueq", file);
12630 break;
12631 case UNGE:
12632 fputs ("nlt", file);
12633 break;
12634 case UNGT:
12635 fputs ("nle", file);
12636 break;
12637 case UNLE:
12638 fputs ("ule", file);
12639 break;
12640 case UNLT:
12641 fputs ("ult", file);
12642 break;
12643 case LTGT:
12644 fputs ("une", file);
12645 break;
12646 default:
12647 output_operand_lossage ("operand is not a condition code, "
12648 "invalid operand code 'Y'");
12649 return;
12650 }
12651 return;
12652
12653 case ';':
12654 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
12655 fputs (";", file);
12656 #endif
12657 return;
12658
12659 default:
12660 output_operand_lossage ("invalid operand code '%c'", code);
12661 }
12662 }
12663
12664 if (REG_P (x))
12665 print_reg (x, code, file);
12666
12667 else if (MEM_P (x))
12668 {
12669 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
12670 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
12671 && GET_MODE (x) != BLKmode)
12672 {
12673 const char * size;
12674 switch (GET_MODE_SIZE (GET_MODE (x)))
12675 {
12676 case 1: size = "BYTE"; break;
12677 case 2: size = "WORD"; break;
12678 case 4: size = "DWORD"; break;
12679 case 8: size = "QWORD"; break;
12680 case 12: size = "TBYTE"; break;
12681 case 16:
12682 if (GET_MODE (x) == XFmode)
12683 size = "TBYTE";
12684 else
12685 size = "XMMWORD";
12686 break;
12687 case 32: size = "YMMWORD"; break;
12688 default:
12689 gcc_unreachable ();
12690 }
12691
12692 /* Check for explicit size override (codes 'b', 'w' and 'k') */
12693 if (code == 'b')
12694 size = "BYTE";
12695 else if (code == 'w')
12696 size = "WORD";
12697 else if (code == 'k')
12698 size = "DWORD";
12699
12700 fputs (size, file);
12701 fputs (" PTR ", file);
12702 }
12703
12704 x = XEXP (x, 0);
12705 /* Avoid (%rip) for call operands. */
12706 if (CONSTANT_ADDRESS_P (x) && code == 'P'
12707 && !CONST_INT_P (x))
12708 output_addr_const (file, x);
12709 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
12710 output_operand_lossage ("invalid constraints for operand");
12711 else
12712 output_address (x);
12713 }
12714
12715 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
12716 {
12717 REAL_VALUE_TYPE r;
12718 long l;
12719
12720 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12721 REAL_VALUE_TO_TARGET_SINGLE (r, l);
12722
12723 if (ASSEMBLER_DIALECT == ASM_ATT)
12724 putc ('$', file);
12725 fprintf (file, "0x%08lx", (long unsigned int) l);
12726 }
12727
12728 /* These float cases don't actually occur as immediate operands. */
12729 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
12730 {
12731 char dstr[30];
12732
12733 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12734 fputs (dstr, file);
12735 }
12736
12737 else if (GET_CODE (x) == CONST_DOUBLE
12738 && GET_MODE (x) == XFmode)
12739 {
12740 char dstr[30];
12741
12742 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12743 fputs (dstr, file);
12744 }
12745
12746 else
12747 {
12748 /* We have patterns that allow zero sets of memory, for instance.
12749 In 64-bit mode, we should probably support all 8-byte vectors,
12750 since we can in fact encode that into an immediate. */
12751 if (GET_CODE (x) == CONST_VECTOR)
12752 {
12753 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
12754 x = const0_rtx;
12755 }
12756
12757 if (code != 'P')
12758 {
12759 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
12760 {
12761 if (ASSEMBLER_DIALECT == ASM_ATT)
12762 putc ('$', file);
12763 }
12764 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
12765 || GET_CODE (x) == LABEL_REF)
12766 {
12767 if (ASSEMBLER_DIALECT == ASM_ATT)
12768 putc ('$', file);
12769 else
12770 fputs ("OFFSET FLAT:", file);
12771 }
12772 }
12773 if (CONST_INT_P (x))
12774 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12775 else if (flag_pic)
12776 output_pic_addr_const (file, x, code);
12777 else
12778 output_addr_const (file, x);
12779 }
12780 }
12781
12782 static bool
12783 ix86_print_operand_punct_valid_p (unsigned char code)
12784 {
12785 return (code == '*' || code == '+' || code == '&' || code == ';');
12786 }
12787 \f
12788 /* Print a memory operand whose address is ADDR. */
12789
12790 static void
12791 ix86_print_operand_address (FILE *file, rtx addr)
12792 {
12793 struct ix86_address parts;
12794 rtx base, index, disp;
12795 int scale;
12796 int ok = ix86_decompose_address (addr, &parts);
12797
12798 gcc_assert (ok);
12799
12800 base = parts.base;
12801 index = parts.index;
12802 disp = parts.disp;
12803 scale = parts.scale;
12804
12805 switch (parts.seg)
12806 {
12807 case SEG_DEFAULT:
12808 break;
12809 case SEG_FS:
12810 case SEG_GS:
12811 if (ASSEMBLER_DIALECT == ASM_ATT)
12812 putc ('%', file);
12813 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
12814 break;
12815 default:
12816 gcc_unreachable ();
12817 }
12818
12819 /* Use one byte shorter RIP relative addressing for 64bit mode. */
12820 if (TARGET_64BIT && !base && !index)
12821 {
12822 rtx symbol = disp;
12823
12824 if (GET_CODE (disp) == CONST
12825 && GET_CODE (XEXP (disp, 0)) == PLUS
12826 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
12827 symbol = XEXP (XEXP (disp, 0), 0);
12828
12829 if (GET_CODE (symbol) == LABEL_REF
12830 || (GET_CODE (symbol) == SYMBOL_REF
12831 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
12832 base = pc_rtx;
12833 }
12834 if (!base && !index)
12835 {
12836 /* Displacement only requires special attention. */
12837
12838 if (CONST_INT_P (disp))
12839 {
12840 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
12841 fputs ("ds:", file);
12842 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
12843 }
12844 else if (flag_pic)
12845 output_pic_addr_const (file, disp, 0);
12846 else
12847 output_addr_const (file, disp);
12848 }
12849 else
12850 {
12851 if (ASSEMBLER_DIALECT == ASM_ATT)
12852 {
12853 if (disp)
12854 {
12855 if (flag_pic)
12856 output_pic_addr_const (file, disp, 0);
12857 else if (GET_CODE (disp) == LABEL_REF)
12858 output_asm_label (disp);
12859 else
12860 output_addr_const (file, disp);
12861 }
12862
12863 putc ('(', file);
12864 if (base)
12865 print_reg (base, 0, file);
12866 if (index)
12867 {
12868 putc (',', file);
12869 print_reg (index, 0, file);
12870 if (scale != 1)
12871 fprintf (file, ",%d", scale);
12872 }
12873 putc (')', file);
12874 }
12875 else
12876 {
12877 rtx offset = NULL_RTX;
12878
12879 if (disp)
12880 {
12881 /* Pull out the offset of a symbol; print any symbol itself. */
12882 if (GET_CODE (disp) == CONST
12883 && GET_CODE (XEXP (disp, 0)) == PLUS
12884 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
12885 {
12886 offset = XEXP (XEXP (disp, 0), 1);
12887 disp = gen_rtx_CONST (VOIDmode,
12888 XEXP (XEXP (disp, 0), 0));
12889 }
12890
12891 if (flag_pic)
12892 output_pic_addr_const (file, disp, 0);
12893 else if (GET_CODE (disp) == LABEL_REF)
12894 output_asm_label (disp);
12895 else if (CONST_INT_P (disp))
12896 offset = disp;
12897 else
12898 output_addr_const (file, disp);
12899 }
12900
12901 putc ('[', file);
12902 if (base)
12903 {
12904 print_reg (base, 0, file);
12905 if (offset)
12906 {
12907 if (INTVAL (offset) >= 0)
12908 putc ('+', file);
12909 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
12910 }
12911 }
12912 else if (offset)
12913 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
12914 else
12915 putc ('0', file);
12916
12917 if (index)
12918 {
12919 putc ('+', file);
12920 print_reg (index, 0, file);
12921 if (scale != 1)
12922 fprintf (file, "*%d", scale);
12923 }
12924 putc (']', file);
12925 }
12926 }
12927 }
12928
12929 bool
12930 output_addr_const_extra (FILE *file, rtx x)
12931 {
12932 rtx op;
12933
12934 if (GET_CODE (x) != UNSPEC)
12935 return false;
12936
12937 op = XVECEXP (x, 0, 0);
12938 switch (XINT (x, 1))
12939 {
12940 case UNSPEC_GOTTPOFF:
12941 output_addr_const (file, op);
12942 /* FIXME: This might be @TPOFF in Sun ld. */
12943 fputs ("@gottpoff", file);
12944 break;
12945 case UNSPEC_TPOFF:
12946 output_addr_const (file, op);
12947 fputs ("@tpoff", file);
12948 break;
12949 case UNSPEC_NTPOFF:
12950 output_addr_const (file, op);
12951 if (TARGET_64BIT)
12952 fputs ("@tpoff", file);
12953 else
12954 fputs ("@ntpoff", file);
12955 break;
12956 case UNSPEC_DTPOFF:
12957 output_addr_const (file, op);
12958 fputs ("@dtpoff", file);
12959 break;
12960 case UNSPEC_GOTNTPOFF:
12961 output_addr_const (file, op);
12962 if (TARGET_64BIT)
12963 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12964 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
12965 else
12966 fputs ("@gotntpoff", file);
12967 break;
12968 case UNSPEC_INDNTPOFF:
12969 output_addr_const (file, op);
12970 fputs ("@indntpoff", file);
12971 break;
12972 #if TARGET_MACHO
12973 case UNSPEC_MACHOPIC_OFFSET:
12974 output_addr_const (file, op);
12975 putc ('-', file);
12976 machopic_output_function_base_name (file);
12977 break;
12978 #endif
12979
12980 default:
12981 return false;
12982 }
12983
12984 return true;
12985 }
12986 \f
12987 /* Split one or more DImode RTL references into pairs of SImode
12988 references. The RTL can be REG, offsettable MEM, integer constant, or
12989 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
12990 split and "num" is its length. lo_half and hi_half are output arrays
12991 that parallel "operands". */
12992
12993 void
12994 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12995 {
12996 while (num--)
12997 {
12998 rtx op = operands[num];
12999
13000 /* simplify_subreg refuse to split volatile memory addresses,
13001 but we still have to handle it. */
13002 if (MEM_P (op))
13003 {
13004 lo_half[num] = adjust_address (op, SImode, 0);
13005 hi_half[num] = adjust_address (op, SImode, 4);
13006 }
13007 else
13008 {
13009 lo_half[num] = simplify_gen_subreg (SImode, op,
13010 GET_MODE (op) == VOIDmode
13011 ? DImode : GET_MODE (op), 0);
13012 hi_half[num] = simplify_gen_subreg (SImode, op,
13013 GET_MODE (op) == VOIDmode
13014 ? DImode : GET_MODE (op), 4);
13015 }
13016 }
13017 }
13018 /* Split one or more TImode RTL references into pairs of DImode
13019 references. The RTL can be REG, offsettable MEM, integer constant, or
13020 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13021 split and "num" is its length. lo_half and hi_half are output arrays
13022 that parallel "operands". */
13023
13024 void
13025 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13026 {
13027 while (num--)
13028 {
13029 rtx op = operands[num];
13030
13031 /* simplify_subreg refuse to split volatile memory addresses, but we
13032 still have to handle it. */
13033 if (MEM_P (op))
13034 {
13035 lo_half[num] = adjust_address (op, DImode, 0);
13036 hi_half[num] = adjust_address (op, DImode, 8);
13037 }
13038 else
13039 {
13040 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
13041 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
13042 }
13043 }
13044 }
13045 \f
13046 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13047 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13048 is the expression of the binary operation. The output may either be
13049 emitted here, or returned to the caller, like all output_* functions.
13050
13051 There is no guarantee that the operands are the same mode, as they
13052 might be within FLOAT or FLOAT_EXTEND expressions. */
13053
13054 #ifndef SYSV386_COMPAT
13055 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13056 wants to fix the assemblers because that causes incompatibility
13057 with gcc. No-one wants to fix gcc because that causes
13058 incompatibility with assemblers... You can use the option of
13059 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13060 #define SYSV386_COMPAT 1
13061 #endif
13062
13063 const char *
13064 output_387_binary_op (rtx insn, rtx *operands)
13065 {
13066 static char buf[40];
13067 const char *p;
13068 const char *ssep;
13069 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13070
13071 #ifdef ENABLE_CHECKING
13072 /* Even if we do not want to check the inputs, this documents input
13073 constraints. Which helps in understanding the following code. */
13074 if (STACK_REG_P (operands[0])
13075 && ((REG_P (operands[1])
13076 && REGNO (operands[0]) == REGNO (operands[1])
13077 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13078 || (REG_P (operands[2])
13079 && REGNO (operands[0]) == REGNO (operands[2])
13080 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13081 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13082 ; /* ok */
13083 else
13084 gcc_assert (is_sse);
13085 #endif
13086
13087 switch (GET_CODE (operands[3]))
13088 {
13089 case PLUS:
13090 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13091 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13092 p = "fiadd";
13093 else
13094 p = "fadd";
13095 ssep = "vadd";
13096 break;
13097
13098 case MINUS:
13099 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13100 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13101 p = "fisub";
13102 else
13103 p = "fsub";
13104 ssep = "vsub";
13105 break;
13106
13107 case MULT:
13108 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13109 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13110 p = "fimul";
13111 else
13112 p = "fmul";
13113 ssep = "vmul";
13114 break;
13115
13116 case DIV:
13117 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13118 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13119 p = "fidiv";
13120 else
13121 p = "fdiv";
13122 ssep = "vdiv";
13123 break;
13124
13125 default:
13126 gcc_unreachable ();
13127 }
13128
13129 if (is_sse)
13130 {
13131 if (TARGET_AVX)
13132 {
13133 strcpy (buf, ssep);
13134 if (GET_MODE (operands[0]) == SFmode)
13135 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13136 else
13137 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13138 }
13139 else
13140 {
13141 strcpy (buf, ssep + 1);
13142 if (GET_MODE (operands[0]) == SFmode)
13143 strcat (buf, "ss\t{%2, %0|%0, %2}");
13144 else
13145 strcat (buf, "sd\t{%2, %0|%0, %2}");
13146 }
13147 return buf;
13148 }
13149 strcpy (buf, p);
13150
13151 switch (GET_CODE (operands[3]))
13152 {
13153 case MULT:
13154 case PLUS:
13155 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13156 {
13157 rtx temp = operands[2];
13158 operands[2] = operands[1];
13159 operands[1] = temp;
13160 }
13161
13162 /* know operands[0] == operands[1]. */
13163
13164 if (MEM_P (operands[2]))
13165 {
13166 p = "%Z2\t%2";
13167 break;
13168 }
13169
13170 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13171 {
13172 if (STACK_TOP_P (operands[0]))
13173 /* How is it that we are storing to a dead operand[2]?
13174 Well, presumably operands[1] is dead too. We can't
13175 store the result to st(0) as st(0) gets popped on this
13176 instruction. Instead store to operands[2] (which I
13177 think has to be st(1)). st(1) will be popped later.
13178 gcc <= 2.8.1 didn't have this check and generated
13179 assembly code that the Unixware assembler rejected. */
13180 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13181 else
13182 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13183 break;
13184 }
13185
13186 if (STACK_TOP_P (operands[0]))
13187 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13188 else
13189 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13190 break;
13191
13192 case MINUS:
13193 case DIV:
13194 if (MEM_P (operands[1]))
13195 {
13196 p = "r%Z1\t%1";
13197 break;
13198 }
13199
13200 if (MEM_P (operands[2]))
13201 {
13202 p = "%Z2\t%2";
13203 break;
13204 }
13205
13206 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13207 {
13208 #if SYSV386_COMPAT
13209 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13210 derived assemblers, confusingly reverse the direction of
13211 the operation for fsub{r} and fdiv{r} when the
13212 destination register is not st(0). The Intel assembler
13213 doesn't have this brain damage. Read !SYSV386_COMPAT to
13214 figure out what the hardware really does. */
13215 if (STACK_TOP_P (operands[0]))
13216 p = "{p\t%0, %2|rp\t%2, %0}";
13217 else
13218 p = "{rp\t%2, %0|p\t%0, %2}";
13219 #else
13220 if (STACK_TOP_P (operands[0]))
13221 /* As above for fmul/fadd, we can't store to st(0). */
13222 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13223 else
13224 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13225 #endif
13226 break;
13227 }
13228
13229 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13230 {
13231 #if SYSV386_COMPAT
13232 if (STACK_TOP_P (operands[0]))
13233 p = "{rp\t%0, %1|p\t%1, %0}";
13234 else
13235 p = "{p\t%1, %0|rp\t%0, %1}";
13236 #else
13237 if (STACK_TOP_P (operands[0]))
13238 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13239 else
13240 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13241 #endif
13242 break;
13243 }
13244
13245 if (STACK_TOP_P (operands[0]))
13246 {
13247 if (STACK_TOP_P (operands[1]))
13248 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13249 else
13250 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13251 break;
13252 }
13253 else if (STACK_TOP_P (operands[1]))
13254 {
13255 #if SYSV386_COMPAT
13256 p = "{\t%1, %0|r\t%0, %1}";
13257 #else
13258 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13259 #endif
13260 }
13261 else
13262 {
13263 #if SYSV386_COMPAT
13264 p = "{r\t%2, %0|\t%0, %2}";
13265 #else
13266 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13267 #endif
13268 }
13269 break;
13270
13271 default:
13272 gcc_unreachable ();
13273 }
13274
13275 strcat (buf, p);
13276 return buf;
13277 }
13278
13279 /* Return needed mode for entity in optimize_mode_switching pass. */
13280
13281 int
13282 ix86_mode_needed (int entity, rtx insn)
13283 {
13284 enum attr_i387_cw mode;
13285
13286 /* The mode UNINITIALIZED is used to store control word after a
13287 function call or ASM pattern. The mode ANY specify that function
13288 has no requirements on the control word and make no changes in the
13289 bits we are interested in. */
13290
13291 if (CALL_P (insn)
13292 || (NONJUMP_INSN_P (insn)
13293 && (asm_noperands (PATTERN (insn)) >= 0
13294 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13295 return I387_CW_UNINITIALIZED;
13296
13297 if (recog_memoized (insn) < 0)
13298 return I387_CW_ANY;
13299
13300 mode = get_attr_i387_cw (insn);
13301
13302 switch (entity)
13303 {
13304 case I387_TRUNC:
13305 if (mode == I387_CW_TRUNC)
13306 return mode;
13307 break;
13308
13309 case I387_FLOOR:
13310 if (mode == I387_CW_FLOOR)
13311 return mode;
13312 break;
13313
13314 case I387_CEIL:
13315 if (mode == I387_CW_CEIL)
13316 return mode;
13317 break;
13318
13319 case I387_MASK_PM:
13320 if (mode == I387_CW_MASK_PM)
13321 return mode;
13322 break;
13323
13324 default:
13325 gcc_unreachable ();
13326 }
13327
13328 return I387_CW_ANY;
13329 }
13330
13331 /* Output code to initialize control word copies used by trunc?f?i and
13332 rounding patterns. CURRENT_MODE is set to current control word,
13333 while NEW_MODE is set to new control word. */
13334
13335 void
13336 emit_i387_cw_initialization (int mode)
13337 {
13338 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13339 rtx new_mode;
13340
13341 enum ix86_stack_slot slot;
13342
13343 rtx reg = gen_reg_rtx (HImode);
13344
13345 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13346 emit_move_insn (reg, copy_rtx (stored_mode));
13347
13348 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
13349 || optimize_function_for_size_p (cfun))
13350 {
13351 switch (mode)
13352 {
13353 case I387_CW_TRUNC:
13354 /* round toward zero (truncate) */
13355 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13356 slot = SLOT_CW_TRUNC;
13357 break;
13358
13359 case I387_CW_FLOOR:
13360 /* round down toward -oo */
13361 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13362 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
13363 slot = SLOT_CW_FLOOR;
13364 break;
13365
13366 case I387_CW_CEIL:
13367 /* round up toward +oo */
13368 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13369 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
13370 slot = SLOT_CW_CEIL;
13371 break;
13372
13373 case I387_CW_MASK_PM:
13374 /* mask precision exception for nearbyint() */
13375 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13376 slot = SLOT_CW_MASK_PM;
13377 break;
13378
13379 default:
13380 gcc_unreachable ();
13381 }
13382 }
13383 else
13384 {
13385 switch (mode)
13386 {
13387 case I387_CW_TRUNC:
13388 /* round toward zero (truncate) */
13389 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
13390 slot = SLOT_CW_TRUNC;
13391 break;
13392
13393 case I387_CW_FLOOR:
13394 /* round down toward -oo */
13395 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
13396 slot = SLOT_CW_FLOOR;
13397 break;
13398
13399 case I387_CW_CEIL:
13400 /* round up toward +oo */
13401 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
13402 slot = SLOT_CW_CEIL;
13403 break;
13404
13405 case I387_CW_MASK_PM:
13406 /* mask precision exception for nearbyint() */
13407 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13408 slot = SLOT_CW_MASK_PM;
13409 break;
13410
13411 default:
13412 gcc_unreachable ();
13413 }
13414 }
13415
13416 gcc_assert (slot < MAX_386_STACK_LOCALS);
13417
13418 new_mode = assign_386_stack_local (HImode, slot);
13419 emit_move_insn (new_mode, reg);
13420 }
13421
13422 /* Output code for INSN to convert a float to a signed int. OPERANDS
13423 are the insn operands. The output may be [HSD]Imode and the input
13424 operand may be [SDX]Fmode. */
13425
13426 const char *
13427 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
13428 {
13429 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13430 int dimode_p = GET_MODE (operands[0]) == DImode;
13431 int round_mode = get_attr_i387_cw (insn);
13432
13433 /* Jump through a hoop or two for DImode, since the hardware has no
13434 non-popping instruction. We used to do this a different way, but
13435 that was somewhat fragile and broke with post-reload splitters. */
13436 if ((dimode_p || fisttp) && !stack_top_dies)
13437 output_asm_insn ("fld\t%y1", operands);
13438
13439 gcc_assert (STACK_TOP_P (operands[1]));
13440 gcc_assert (MEM_P (operands[0]));
13441 gcc_assert (GET_MODE (operands[1]) != TFmode);
13442
13443 if (fisttp)
13444 output_asm_insn ("fisttp%Z0\t%0", operands);
13445 else
13446 {
13447 if (round_mode != I387_CW_ANY)
13448 output_asm_insn ("fldcw\t%3", operands);
13449 if (stack_top_dies || dimode_p)
13450 output_asm_insn ("fistp%Z0\t%0", operands);
13451 else
13452 output_asm_insn ("fist%Z0\t%0", operands);
13453 if (round_mode != I387_CW_ANY)
13454 output_asm_insn ("fldcw\t%2", operands);
13455 }
13456
13457 return "";
13458 }
13459
13460 /* Output code for x87 ffreep insn. The OPNO argument, which may only
13461 have the values zero or one, indicates the ffreep insn's operand
13462 from the OPERANDS array. */
13463
13464 static const char *
13465 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
13466 {
13467 if (TARGET_USE_FFREEP)
13468 #ifdef HAVE_AS_IX86_FFREEP
13469 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
13470 #else
13471 {
13472 static char retval[32];
13473 int regno = REGNO (operands[opno]);
13474
13475 gcc_assert (FP_REGNO_P (regno));
13476
13477 regno -= FIRST_STACK_REG;
13478
13479 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
13480 return retval;
13481 }
13482 #endif
13483
13484 return opno ? "fstp\t%y1" : "fstp\t%y0";
13485 }
13486
13487
13488 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
13489 should be used. UNORDERED_P is true when fucom should be used. */
13490
13491 const char *
13492 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
13493 {
13494 int stack_top_dies;
13495 rtx cmp_op0, cmp_op1;
13496 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
13497
13498 if (eflags_p)
13499 {
13500 cmp_op0 = operands[0];
13501 cmp_op1 = operands[1];
13502 }
13503 else
13504 {
13505 cmp_op0 = operands[1];
13506 cmp_op1 = operands[2];
13507 }
13508
13509 if (is_sse)
13510 {
13511 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
13512 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
13513 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
13514 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
13515
13516 if (GET_MODE (operands[0]) == SFmode)
13517 if (unordered_p)
13518 return &ucomiss[TARGET_AVX ? 0 : 1];
13519 else
13520 return &comiss[TARGET_AVX ? 0 : 1];
13521 else
13522 if (unordered_p)
13523 return &ucomisd[TARGET_AVX ? 0 : 1];
13524 else
13525 return &comisd[TARGET_AVX ? 0 : 1];
13526 }
13527
13528 gcc_assert (STACK_TOP_P (cmp_op0));
13529
13530 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13531
13532 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
13533 {
13534 if (stack_top_dies)
13535 {
13536 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
13537 return output_387_ffreep (operands, 1);
13538 }
13539 else
13540 return "ftst\n\tfnstsw\t%0";
13541 }
13542
13543 if (STACK_REG_P (cmp_op1)
13544 && stack_top_dies
13545 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
13546 && REGNO (cmp_op1) != FIRST_STACK_REG)
13547 {
13548 /* If both the top of the 387 stack dies, and the other operand
13549 is also a stack register that dies, then this must be a
13550 `fcompp' float compare */
13551
13552 if (eflags_p)
13553 {
13554 /* There is no double popping fcomi variant. Fortunately,
13555 eflags is immune from the fstp's cc clobbering. */
13556 if (unordered_p)
13557 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
13558 else
13559 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
13560 return output_387_ffreep (operands, 0);
13561 }
13562 else
13563 {
13564 if (unordered_p)
13565 return "fucompp\n\tfnstsw\t%0";
13566 else
13567 return "fcompp\n\tfnstsw\t%0";
13568 }
13569 }
13570 else
13571 {
13572 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
13573
13574 static const char * const alt[16] =
13575 {
13576 "fcom%Z2\t%y2\n\tfnstsw\t%0",
13577 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
13578 "fucom%Z2\t%y2\n\tfnstsw\t%0",
13579 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
13580
13581 "ficom%Z2\t%y2\n\tfnstsw\t%0",
13582 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
13583 NULL,
13584 NULL,
13585
13586 "fcomi\t{%y1, %0|%0, %y1}",
13587 "fcomip\t{%y1, %0|%0, %y1}",
13588 "fucomi\t{%y1, %0|%0, %y1}",
13589 "fucomip\t{%y1, %0|%0, %y1}",
13590
13591 NULL,
13592 NULL,
13593 NULL,
13594 NULL
13595 };
13596
13597 int mask;
13598 const char *ret;
13599
13600 mask = eflags_p << 3;
13601 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
13602 mask |= unordered_p << 1;
13603 mask |= stack_top_dies;
13604
13605 gcc_assert (mask < 16);
13606 ret = alt[mask];
13607 gcc_assert (ret);
13608
13609 return ret;
13610 }
13611 }
13612
13613 void
13614 ix86_output_addr_vec_elt (FILE *file, int value)
13615 {
13616 const char *directive = ASM_LONG;
13617
13618 #ifdef ASM_QUAD
13619 if (TARGET_64BIT)
13620 directive = ASM_QUAD;
13621 #else
13622 gcc_assert (!TARGET_64BIT);
13623 #endif
13624
13625 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
13626 }
13627
13628 void
13629 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
13630 {
13631 const char *directive = ASM_LONG;
13632
13633 #ifdef ASM_QUAD
13634 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
13635 directive = ASM_QUAD;
13636 #else
13637 gcc_assert (!TARGET_64BIT);
13638 #endif
13639 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
13640 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
13641 fprintf (file, "%s%s%d-%s%d\n",
13642 directive, LPREFIX, value, LPREFIX, rel);
13643 else if (HAVE_AS_GOTOFF_IN_DATA)
13644 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
13645 #if TARGET_MACHO
13646 else if (TARGET_MACHO)
13647 {
13648 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
13649 machopic_output_function_base_name (file);
13650 putc ('\n', file);
13651 }
13652 #endif
13653 else
13654 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
13655 GOT_SYMBOL_NAME, LPREFIX, value);
13656 }
13657 \f
13658 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
13659 for the target. */
13660
13661 void
13662 ix86_expand_clear (rtx dest)
13663 {
13664 rtx tmp;
13665
13666 /* We play register width games, which are only valid after reload. */
13667 gcc_assert (reload_completed);
13668
13669 /* Avoid HImode and its attendant prefix byte. */
13670 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
13671 dest = gen_rtx_REG (SImode, REGNO (dest));
13672 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
13673
13674 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
13675 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
13676 {
13677 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13678 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
13679 }
13680
13681 emit_insn (tmp);
13682 }
13683
13684 /* X is an unchanging MEM. If it is a constant pool reference, return
13685 the constant pool rtx, else NULL. */
13686
13687 rtx
13688 maybe_get_pool_constant (rtx x)
13689 {
13690 x = ix86_delegitimize_address (XEXP (x, 0));
13691
13692 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
13693 return get_pool_constant (x);
13694
13695 return NULL_RTX;
13696 }
13697
13698 void
13699 ix86_expand_move (enum machine_mode mode, rtx operands[])
13700 {
13701 rtx op0, op1;
13702 enum tls_model model;
13703
13704 op0 = operands[0];
13705 op1 = operands[1];
13706
13707 if (GET_CODE (op1) == SYMBOL_REF)
13708 {
13709 model = SYMBOL_REF_TLS_MODEL (op1);
13710 if (model)
13711 {
13712 op1 = legitimize_tls_address (op1, model, true);
13713 op1 = force_operand (op1, op0);
13714 if (op1 == op0)
13715 return;
13716 }
13717 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13718 && SYMBOL_REF_DLLIMPORT_P (op1))
13719 op1 = legitimize_dllimport_symbol (op1, false);
13720 }
13721 else if (GET_CODE (op1) == CONST
13722 && GET_CODE (XEXP (op1, 0)) == PLUS
13723 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
13724 {
13725 rtx addend = XEXP (XEXP (op1, 0), 1);
13726 rtx symbol = XEXP (XEXP (op1, 0), 0);
13727 rtx tmp = NULL;
13728
13729 model = SYMBOL_REF_TLS_MODEL (symbol);
13730 if (model)
13731 tmp = legitimize_tls_address (symbol, model, true);
13732 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13733 && SYMBOL_REF_DLLIMPORT_P (symbol))
13734 tmp = legitimize_dllimport_symbol (symbol, true);
13735
13736 if (tmp)
13737 {
13738 tmp = force_operand (tmp, NULL);
13739 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
13740 op0, 1, OPTAB_DIRECT);
13741 if (tmp == op0)
13742 return;
13743 }
13744 }
13745
13746 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
13747 {
13748 if (TARGET_MACHO && !TARGET_64BIT)
13749 {
13750 #if TARGET_MACHO
13751 if (MACHOPIC_PURE)
13752 {
13753 rtx temp = ((reload_in_progress
13754 || ((op0 && REG_P (op0))
13755 && mode == Pmode))
13756 ? op0 : gen_reg_rtx (Pmode));
13757 op1 = machopic_indirect_data_reference (op1, temp);
13758 op1 = machopic_legitimize_pic_address (op1, mode,
13759 temp == op1 ? 0 : temp);
13760 }
13761 else if (MACHOPIC_INDIRECT)
13762 op1 = machopic_indirect_data_reference (op1, 0);
13763 if (op0 == op1)
13764 return;
13765 #endif
13766 }
13767 else
13768 {
13769 if (MEM_P (op0))
13770 op1 = force_reg (Pmode, op1);
13771 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
13772 {
13773 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
13774 op1 = legitimize_pic_address (op1, reg);
13775 if (op0 == op1)
13776 return;
13777 }
13778 }
13779 }
13780 else
13781 {
13782 if (MEM_P (op0)
13783 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
13784 || !push_operand (op0, mode))
13785 && MEM_P (op1))
13786 op1 = force_reg (mode, op1);
13787
13788 if (push_operand (op0, mode)
13789 && ! general_no_elim_operand (op1, mode))
13790 op1 = copy_to_mode_reg (mode, op1);
13791
13792 /* Force large constants in 64bit compilation into register
13793 to get them CSEed. */
13794 if (can_create_pseudo_p ()
13795 && (mode == DImode) && TARGET_64BIT
13796 && immediate_operand (op1, mode)
13797 && !x86_64_zext_immediate_operand (op1, VOIDmode)
13798 && !register_operand (op0, mode)
13799 && optimize)
13800 op1 = copy_to_mode_reg (mode, op1);
13801
13802 if (can_create_pseudo_p ()
13803 && FLOAT_MODE_P (mode)
13804 && GET_CODE (op1) == CONST_DOUBLE)
13805 {
13806 /* If we are loading a floating point constant to a register,
13807 force the value to memory now, since we'll get better code
13808 out the back end. */
13809
13810 op1 = validize_mem (force_const_mem (mode, op1));
13811 if (!register_operand (op0, mode))
13812 {
13813 rtx temp = gen_reg_rtx (mode);
13814 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
13815 emit_move_insn (op0, temp);
13816 return;
13817 }
13818 }
13819 }
13820
13821 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
13822 }
13823
13824 void
13825 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
13826 {
13827 rtx op0 = operands[0], op1 = operands[1];
13828 unsigned int align = GET_MODE_ALIGNMENT (mode);
13829
13830 /* Force constants other than zero into memory. We do not know how
13831 the instructions used to build constants modify the upper 64 bits
13832 of the register, once we have that information we may be able
13833 to handle some of them more efficiently. */
13834 if (can_create_pseudo_p ()
13835 && register_operand (op0, mode)
13836 && (CONSTANT_P (op1)
13837 || (GET_CODE (op1) == SUBREG
13838 && CONSTANT_P (SUBREG_REG (op1))))
13839 && !standard_sse_constant_p (op1))
13840 op1 = validize_mem (force_const_mem (mode, op1));
13841
13842 /* We need to check memory alignment for SSE mode since attribute
13843 can make operands unaligned. */
13844 if (can_create_pseudo_p ()
13845 && SSE_REG_MODE_P (mode)
13846 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
13847 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
13848 {
13849 rtx tmp[2];
13850
13851 /* ix86_expand_vector_move_misalign() does not like constants ... */
13852 if (CONSTANT_P (op1)
13853 || (GET_CODE (op1) == SUBREG
13854 && CONSTANT_P (SUBREG_REG (op1))))
13855 op1 = validize_mem (force_const_mem (mode, op1));
13856
13857 /* ... nor both arguments in memory. */
13858 if (!register_operand (op0, mode)
13859 && !register_operand (op1, mode))
13860 op1 = force_reg (mode, op1);
13861
13862 tmp[0] = op0; tmp[1] = op1;
13863 ix86_expand_vector_move_misalign (mode, tmp);
13864 return;
13865 }
13866
13867 /* Make operand1 a register if it isn't already. */
13868 if (can_create_pseudo_p ()
13869 && !register_operand (op0, mode)
13870 && !register_operand (op1, mode))
13871 {
13872 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
13873 return;
13874 }
13875
13876 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
13877 }
13878
13879 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
13880 straight to ix86_expand_vector_move. */
13881 /* Code generation for scalar reg-reg moves of single and double precision data:
13882 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
13883 movaps reg, reg
13884 else
13885 movss reg, reg
13886 if (x86_sse_partial_reg_dependency == true)
13887 movapd reg, reg
13888 else
13889 movsd reg, reg
13890
13891 Code generation for scalar loads of double precision data:
13892 if (x86_sse_split_regs == true)
13893 movlpd mem, reg (gas syntax)
13894 else
13895 movsd mem, reg
13896
13897 Code generation for unaligned packed loads of single precision data
13898 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
13899 if (x86_sse_unaligned_move_optimal)
13900 movups mem, reg
13901
13902 if (x86_sse_partial_reg_dependency == true)
13903 {
13904 xorps reg, reg
13905 movlps mem, reg
13906 movhps mem+8, reg
13907 }
13908 else
13909 {
13910 movlps mem, reg
13911 movhps mem+8, reg
13912 }
13913
13914 Code generation for unaligned packed loads of double precision data
13915 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
13916 if (x86_sse_unaligned_move_optimal)
13917 movupd mem, reg
13918
13919 if (x86_sse_split_regs == true)
13920 {
13921 movlpd mem, reg
13922 movhpd mem+8, reg
13923 }
13924 else
13925 {
13926 movsd mem, reg
13927 movhpd mem+8, reg
13928 }
13929 */
13930
13931 void
13932 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
13933 {
13934 rtx op0, op1, m;
13935
13936 op0 = operands[0];
13937 op1 = operands[1];
13938
13939 if (TARGET_AVX)
13940 {
13941 switch (GET_MODE_CLASS (mode))
13942 {
13943 case MODE_VECTOR_INT:
13944 case MODE_INT:
13945 switch (GET_MODE_SIZE (mode))
13946 {
13947 case 16:
13948 /* If we're optimizing for size, movups is the smallest. */
13949 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
13950 {
13951 op0 = gen_lowpart (V4SFmode, op0);
13952 op1 = gen_lowpart (V4SFmode, op1);
13953 emit_insn (gen_avx_movups (op0, op1));
13954 return;
13955 }
13956 op0 = gen_lowpart (V16QImode, op0);
13957 op1 = gen_lowpart (V16QImode, op1);
13958 emit_insn (gen_avx_movdqu (op0, op1));
13959 break;
13960 case 32:
13961 op0 = gen_lowpart (V32QImode, op0);
13962 op1 = gen_lowpart (V32QImode, op1);
13963 emit_insn (gen_avx_movdqu256 (op0, op1));
13964 break;
13965 default:
13966 gcc_unreachable ();
13967 }
13968 break;
13969 case MODE_VECTOR_FLOAT:
13970 op0 = gen_lowpart (mode, op0);
13971 op1 = gen_lowpart (mode, op1);
13972
13973 switch (mode)
13974 {
13975 case V4SFmode:
13976 emit_insn (gen_avx_movups (op0, op1));
13977 break;
13978 case V8SFmode:
13979 emit_insn (gen_avx_movups256 (op0, op1));
13980 break;
13981 case V2DFmode:
13982 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
13983 {
13984 op0 = gen_lowpart (V4SFmode, op0);
13985 op1 = gen_lowpart (V4SFmode, op1);
13986 emit_insn (gen_avx_movups (op0, op1));
13987 return;
13988 }
13989 emit_insn (gen_avx_movupd (op0, op1));
13990 break;
13991 case V4DFmode:
13992 emit_insn (gen_avx_movupd256 (op0, op1));
13993 break;
13994 default:
13995 gcc_unreachable ();
13996 }
13997 break;
13998
13999 default:
14000 gcc_unreachable ();
14001 }
14002
14003 return;
14004 }
14005
14006 if (MEM_P (op1))
14007 {
14008 /* If we're optimizing for size, movups is the smallest. */
14009 if (optimize_insn_for_size_p ()
14010 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14011 {
14012 op0 = gen_lowpart (V4SFmode, op0);
14013 op1 = gen_lowpart (V4SFmode, op1);
14014 emit_insn (gen_sse_movups (op0, op1));
14015 return;
14016 }
14017
14018 /* ??? If we have typed data, then it would appear that using
14019 movdqu is the only way to get unaligned data loaded with
14020 integer type. */
14021 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14022 {
14023 op0 = gen_lowpart (V16QImode, op0);
14024 op1 = gen_lowpart (V16QImode, op1);
14025 emit_insn (gen_sse2_movdqu (op0, op1));
14026 return;
14027 }
14028
14029 if (TARGET_SSE2 && mode == V2DFmode)
14030 {
14031 rtx zero;
14032
14033 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14034 {
14035 op0 = gen_lowpart (V2DFmode, op0);
14036 op1 = gen_lowpart (V2DFmode, op1);
14037 emit_insn (gen_sse2_movupd (op0, op1));
14038 return;
14039 }
14040
14041 /* When SSE registers are split into halves, we can avoid
14042 writing to the top half twice. */
14043 if (TARGET_SSE_SPLIT_REGS)
14044 {
14045 emit_clobber (op0);
14046 zero = op0;
14047 }
14048 else
14049 {
14050 /* ??? Not sure about the best option for the Intel chips.
14051 The following would seem to satisfy; the register is
14052 entirely cleared, breaking the dependency chain. We
14053 then store to the upper half, with a dependency depth
14054 of one. A rumor has it that Intel recommends two movsd
14055 followed by an unpacklpd, but this is unconfirmed. And
14056 given that the dependency depth of the unpacklpd would
14057 still be one, I'm not sure why this would be better. */
14058 zero = CONST0_RTX (V2DFmode);
14059 }
14060
14061 m = adjust_address (op1, DFmode, 0);
14062 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14063 m = adjust_address (op1, DFmode, 8);
14064 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14065 }
14066 else
14067 {
14068 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14069 {
14070 op0 = gen_lowpart (V4SFmode, op0);
14071 op1 = gen_lowpart (V4SFmode, op1);
14072 emit_insn (gen_sse_movups (op0, op1));
14073 return;
14074 }
14075
14076 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14077 emit_move_insn (op0, CONST0_RTX (mode));
14078 else
14079 emit_clobber (op0);
14080
14081 if (mode != V4SFmode)
14082 op0 = gen_lowpart (V4SFmode, op0);
14083 m = adjust_address (op1, V2SFmode, 0);
14084 emit_insn (gen_sse_loadlps (op0, op0, m));
14085 m = adjust_address (op1, V2SFmode, 8);
14086 emit_insn (gen_sse_loadhps (op0, op0, m));
14087 }
14088 }
14089 else if (MEM_P (op0))
14090 {
14091 /* If we're optimizing for size, movups is the smallest. */
14092 if (optimize_insn_for_size_p ()
14093 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14094 {
14095 op0 = gen_lowpart (V4SFmode, op0);
14096 op1 = gen_lowpart (V4SFmode, op1);
14097 emit_insn (gen_sse_movups (op0, op1));
14098 return;
14099 }
14100
14101 /* ??? Similar to above, only less clear because of quote
14102 typeless stores unquote. */
14103 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14104 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14105 {
14106 op0 = gen_lowpart (V16QImode, op0);
14107 op1 = gen_lowpart (V16QImode, op1);
14108 emit_insn (gen_sse2_movdqu (op0, op1));
14109 return;
14110 }
14111
14112 if (TARGET_SSE2 && mode == V2DFmode)
14113 {
14114 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14115 {
14116 op0 = gen_lowpart (V2DFmode, op0);
14117 op1 = gen_lowpart (V2DFmode, op1);
14118 emit_insn (gen_sse2_movupd (op0, op1));
14119 }
14120 else
14121 {
14122 m = adjust_address (op0, DFmode, 0);
14123 emit_insn (gen_sse2_storelpd (m, op1));
14124 m = adjust_address (op0, DFmode, 8);
14125 emit_insn (gen_sse2_storehpd (m, op1));
14126 }
14127 }
14128 else
14129 {
14130 if (mode != V4SFmode)
14131 op1 = gen_lowpart (V4SFmode, op1);
14132
14133 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14134 {
14135 op0 = gen_lowpart (V4SFmode, op0);
14136 emit_insn (gen_sse_movups (op0, op1));
14137 }
14138 else
14139 {
14140 m = adjust_address (op0, V2SFmode, 0);
14141 emit_insn (gen_sse_storelps (m, op1));
14142 m = adjust_address (op0, V2SFmode, 8);
14143 emit_insn (gen_sse_storehps (m, op1));
14144 }
14145 }
14146 }
14147 else
14148 gcc_unreachable ();
14149 }
14150
14151 /* Expand a push in MODE. This is some mode for which we do not support
14152 proper push instructions, at least from the registers that we expect
14153 the value to live in. */
14154
14155 void
14156 ix86_expand_push (enum machine_mode mode, rtx x)
14157 {
14158 rtx tmp;
14159
14160 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14161 GEN_INT (-GET_MODE_SIZE (mode)),
14162 stack_pointer_rtx, 1, OPTAB_DIRECT);
14163 if (tmp != stack_pointer_rtx)
14164 emit_move_insn (stack_pointer_rtx, tmp);
14165
14166 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14167
14168 /* When we push an operand onto stack, it has to be aligned at least
14169 at the function argument boundary. However since we don't have
14170 the argument type, we can't determine the actual argument
14171 boundary. */
14172 emit_move_insn (tmp, x);
14173 }
14174
14175 /* Helper function of ix86_fixup_binary_operands to canonicalize
14176 operand order. Returns true if the operands should be swapped. */
14177
14178 static bool
14179 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14180 rtx operands[])
14181 {
14182 rtx dst = operands[0];
14183 rtx src1 = operands[1];
14184 rtx src2 = operands[2];
14185
14186 /* If the operation is not commutative, we can't do anything. */
14187 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14188 return false;
14189
14190 /* Highest priority is that src1 should match dst. */
14191 if (rtx_equal_p (dst, src1))
14192 return false;
14193 if (rtx_equal_p (dst, src2))
14194 return true;
14195
14196 /* Next highest priority is that immediate constants come second. */
14197 if (immediate_operand (src2, mode))
14198 return false;
14199 if (immediate_operand (src1, mode))
14200 return true;
14201
14202 /* Lowest priority is that memory references should come second. */
14203 if (MEM_P (src2))
14204 return false;
14205 if (MEM_P (src1))
14206 return true;
14207
14208 return false;
14209 }
14210
14211
14212 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14213 destination to use for the operation. If different from the true
14214 destination in operands[0], a copy operation will be required. */
14215
14216 rtx
14217 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14218 rtx operands[])
14219 {
14220 rtx dst = operands[0];
14221 rtx src1 = operands[1];
14222 rtx src2 = operands[2];
14223
14224 /* Canonicalize operand order. */
14225 if (ix86_swap_binary_operands_p (code, mode, operands))
14226 {
14227 rtx temp;
14228
14229 /* It is invalid to swap operands of different modes. */
14230 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14231
14232 temp = src1;
14233 src1 = src2;
14234 src2 = temp;
14235 }
14236
14237 /* Both source operands cannot be in memory. */
14238 if (MEM_P (src1) && MEM_P (src2))
14239 {
14240 /* Optimization: Only read from memory once. */
14241 if (rtx_equal_p (src1, src2))
14242 {
14243 src2 = force_reg (mode, src2);
14244 src1 = src2;
14245 }
14246 else
14247 src2 = force_reg (mode, src2);
14248 }
14249
14250 /* If the destination is memory, and we do not have matching source
14251 operands, do things in registers. */
14252 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14253 dst = gen_reg_rtx (mode);
14254
14255 /* Source 1 cannot be a constant. */
14256 if (CONSTANT_P (src1))
14257 src1 = force_reg (mode, src1);
14258
14259 /* Source 1 cannot be a non-matching memory. */
14260 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14261 src1 = force_reg (mode, src1);
14262
14263 operands[1] = src1;
14264 operands[2] = src2;
14265 return dst;
14266 }
14267
14268 /* Similarly, but assume that the destination has already been
14269 set up properly. */
14270
14271 void
14272 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14273 enum machine_mode mode, rtx operands[])
14274 {
14275 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14276 gcc_assert (dst == operands[0]);
14277 }
14278
14279 /* Attempt to expand a binary operator. Make the expansion closer to the
14280 actual machine, then just general_operand, which will allow 3 separate
14281 memory references (one output, two input) in a single insn. */
14282
14283 void
14284 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14285 rtx operands[])
14286 {
14287 rtx src1, src2, dst, op, clob;
14288
14289 dst = ix86_fixup_binary_operands (code, mode, operands);
14290 src1 = operands[1];
14291 src2 = operands[2];
14292
14293 /* Emit the instruction. */
14294
14295 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14296 if (reload_in_progress)
14297 {
14298 /* Reload doesn't know about the flags register, and doesn't know that
14299 it doesn't want to clobber it. We can only do this with PLUS. */
14300 gcc_assert (code == PLUS);
14301 emit_insn (op);
14302 }
14303 else
14304 {
14305 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14306 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14307 }
14308
14309 /* Fix up the destination if needed. */
14310 if (dst != operands[0])
14311 emit_move_insn (operands[0], dst);
14312 }
14313
14314 /* Return TRUE or FALSE depending on whether the binary operator meets the
14315 appropriate constraints. */
14316
14317 int
14318 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
14319 rtx operands[3])
14320 {
14321 rtx dst = operands[0];
14322 rtx src1 = operands[1];
14323 rtx src2 = operands[2];
14324
14325 /* Both source operands cannot be in memory. */
14326 if (MEM_P (src1) && MEM_P (src2))
14327 return 0;
14328
14329 /* Canonicalize operand order for commutative operators. */
14330 if (ix86_swap_binary_operands_p (code, mode, operands))
14331 {
14332 rtx temp = src1;
14333 src1 = src2;
14334 src2 = temp;
14335 }
14336
14337 /* If the destination is memory, we must have a matching source operand. */
14338 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14339 return 0;
14340
14341 /* Source 1 cannot be a constant. */
14342 if (CONSTANT_P (src1))
14343 return 0;
14344
14345 /* Source 1 cannot be a non-matching memory. */
14346 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14347 return 0;
14348
14349 return 1;
14350 }
14351
14352 /* Attempt to expand a unary operator. Make the expansion closer to the
14353 actual machine, then just general_operand, which will allow 2 separate
14354 memory references (one output, one input) in a single insn. */
14355
14356 void
14357 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
14358 rtx operands[])
14359 {
14360 int matching_memory;
14361 rtx src, dst, op, clob;
14362
14363 dst = operands[0];
14364 src = operands[1];
14365
14366 /* If the destination is memory, and we do not have matching source
14367 operands, do things in registers. */
14368 matching_memory = 0;
14369 if (MEM_P (dst))
14370 {
14371 if (rtx_equal_p (dst, src))
14372 matching_memory = 1;
14373 else
14374 dst = gen_reg_rtx (mode);
14375 }
14376
14377 /* When source operand is memory, destination must match. */
14378 if (MEM_P (src) && !matching_memory)
14379 src = force_reg (mode, src);
14380
14381 /* Emit the instruction. */
14382
14383 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
14384 if (reload_in_progress || code == NOT)
14385 {
14386 /* Reload doesn't know about the flags register, and doesn't know that
14387 it doesn't want to clobber it. */
14388 gcc_assert (code == NOT);
14389 emit_insn (op);
14390 }
14391 else
14392 {
14393 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14394 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14395 }
14396
14397 /* Fix up the destination if needed. */
14398 if (dst != operands[0])
14399 emit_move_insn (operands[0], dst);
14400 }
14401
14402 #define LEA_SEARCH_THRESHOLD 12
14403
14404 /* Search backward for non-agu definition of register number REGNO1
14405 or register number REGNO2 in INSN's basic block until
14406 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14407 2. Reach BB boundary, or
14408 3. Reach agu definition.
14409 Returns the distance between the non-agu definition point and INSN.
14410 If no definition point, returns -1. */
14411
14412 static int
14413 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14414 rtx insn)
14415 {
14416 basic_block bb = BLOCK_FOR_INSN (insn);
14417 int distance = 0;
14418 df_ref *def_rec;
14419 enum attr_type insn_type;
14420
14421 if (insn != BB_HEAD (bb))
14422 {
14423 rtx prev = PREV_INSN (insn);
14424 while (prev && distance < LEA_SEARCH_THRESHOLD)
14425 {
14426 if (NONDEBUG_INSN_P (prev))
14427 {
14428 distance++;
14429 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14430 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14431 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14432 && (regno1 == DF_REF_REGNO (*def_rec)
14433 || regno2 == DF_REF_REGNO (*def_rec)))
14434 {
14435 insn_type = get_attr_type (prev);
14436 if (insn_type != TYPE_LEA)
14437 goto done;
14438 }
14439 }
14440 if (prev == BB_HEAD (bb))
14441 break;
14442 prev = PREV_INSN (prev);
14443 }
14444 }
14445
14446 if (distance < LEA_SEARCH_THRESHOLD)
14447 {
14448 edge e;
14449 edge_iterator ei;
14450 bool simple_loop = false;
14451
14452 FOR_EACH_EDGE (e, ei, bb->preds)
14453 if (e->src == bb)
14454 {
14455 simple_loop = true;
14456 break;
14457 }
14458
14459 if (simple_loop)
14460 {
14461 rtx prev = BB_END (bb);
14462 while (prev
14463 && prev != insn
14464 && distance < LEA_SEARCH_THRESHOLD)
14465 {
14466 if (NONDEBUG_INSN_P (prev))
14467 {
14468 distance++;
14469 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14470 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14471 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14472 && (regno1 == DF_REF_REGNO (*def_rec)
14473 || regno2 == DF_REF_REGNO (*def_rec)))
14474 {
14475 insn_type = get_attr_type (prev);
14476 if (insn_type != TYPE_LEA)
14477 goto done;
14478 }
14479 }
14480 prev = PREV_INSN (prev);
14481 }
14482 }
14483 }
14484
14485 distance = -1;
14486
14487 done:
14488 /* get_attr_type may modify recog data. We want to make sure
14489 that recog data is valid for instruction INSN, on which
14490 distance_non_agu_define is called. INSN is unchanged here. */
14491 extract_insn_cached (insn);
14492 return distance;
14493 }
14494
14495 /* Return the distance between INSN and the next insn that uses
14496 register number REGNO0 in memory address. Return -1 if no such
14497 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14498
14499 static int
14500 distance_agu_use (unsigned int regno0, rtx insn)
14501 {
14502 basic_block bb = BLOCK_FOR_INSN (insn);
14503 int distance = 0;
14504 df_ref *def_rec;
14505 df_ref *use_rec;
14506
14507 if (insn != BB_END (bb))
14508 {
14509 rtx next = NEXT_INSN (insn);
14510 while (next && distance < LEA_SEARCH_THRESHOLD)
14511 {
14512 if (NONDEBUG_INSN_P (next))
14513 {
14514 distance++;
14515
14516 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14517 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14518 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14519 && regno0 == DF_REF_REGNO (*use_rec))
14520 {
14521 /* Return DISTANCE if OP0 is used in memory
14522 address in NEXT. */
14523 return distance;
14524 }
14525
14526 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14527 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14528 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14529 && regno0 == DF_REF_REGNO (*def_rec))
14530 {
14531 /* Return -1 if OP0 is set in NEXT. */
14532 return -1;
14533 }
14534 }
14535 if (next == BB_END (bb))
14536 break;
14537 next = NEXT_INSN (next);
14538 }
14539 }
14540
14541 if (distance < LEA_SEARCH_THRESHOLD)
14542 {
14543 edge e;
14544 edge_iterator ei;
14545 bool simple_loop = false;
14546
14547 FOR_EACH_EDGE (e, ei, bb->succs)
14548 if (e->dest == bb)
14549 {
14550 simple_loop = true;
14551 break;
14552 }
14553
14554 if (simple_loop)
14555 {
14556 rtx next = BB_HEAD (bb);
14557 while (next
14558 && next != insn
14559 && distance < LEA_SEARCH_THRESHOLD)
14560 {
14561 if (NONDEBUG_INSN_P (next))
14562 {
14563 distance++;
14564
14565 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14566 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14567 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14568 && regno0 == DF_REF_REGNO (*use_rec))
14569 {
14570 /* Return DISTANCE if OP0 is used in memory
14571 address in NEXT. */
14572 return distance;
14573 }
14574
14575 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14576 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14577 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14578 && regno0 == DF_REF_REGNO (*def_rec))
14579 {
14580 /* Return -1 if OP0 is set in NEXT. */
14581 return -1;
14582 }
14583
14584 }
14585 next = NEXT_INSN (next);
14586 }
14587 }
14588 }
14589
14590 return -1;
14591 }
14592
14593 /* Define this macro to tune LEA priority vs ADD, it take effect when
14594 there is a dilemma of choicing LEA or ADD
14595 Negative value: ADD is more preferred than LEA
14596 Zero: Netrual
14597 Positive value: LEA is more preferred than ADD*/
14598 #define IX86_LEA_PRIORITY 2
14599
14600 /* Return true if it is ok to optimize an ADD operation to LEA
14601 operation to avoid flag register consumation. For the processors
14602 like ATOM, if the destination register of LEA holds an actual
14603 address which will be used soon, LEA is better and otherwise ADD
14604 is better. */
14605
14606 bool
14607 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
14608 rtx insn, rtx operands[])
14609 {
14610 unsigned int regno0 = true_regnum (operands[0]);
14611 unsigned int regno1 = true_regnum (operands[1]);
14612 unsigned int regno2;
14613
14614 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14615 return regno0 != regno1;
14616
14617 regno2 = true_regnum (operands[2]);
14618
14619 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14620 if (regno0 != regno1 && regno0 != regno2)
14621 return true;
14622 else
14623 {
14624 int dist_define, dist_use;
14625 dist_define = distance_non_agu_define (regno1, regno2, insn);
14626 if (dist_define <= 0)
14627 return true;
14628
14629 /* If this insn has both backward non-agu dependence and forward
14630 agu dependence, the one with short distance take effect. */
14631 dist_use = distance_agu_use (regno0, insn);
14632 if (dist_use <= 0
14633 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
14634 return false;
14635
14636 return true;
14637 }
14638 }
14639
14640 /* Return true if destination reg of SET_BODY is shift count of
14641 USE_BODY. */
14642
14643 static bool
14644 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
14645 {
14646 rtx set_dest;
14647 rtx shift_rtx;
14648 int i;
14649
14650 /* Retrieve destination of SET_BODY. */
14651 switch (GET_CODE (set_body))
14652 {
14653 case SET:
14654 set_dest = SET_DEST (set_body);
14655 if (!set_dest || !REG_P (set_dest))
14656 return false;
14657 break;
14658 case PARALLEL:
14659 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
14660 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
14661 use_body))
14662 return true;
14663 default:
14664 return false;
14665 break;
14666 }
14667
14668 /* Retrieve shift count of USE_BODY. */
14669 switch (GET_CODE (use_body))
14670 {
14671 case SET:
14672 shift_rtx = XEXP (use_body, 1);
14673 break;
14674 case PARALLEL:
14675 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
14676 if (ix86_dep_by_shift_count_body (set_body,
14677 XVECEXP (use_body, 0, i)))
14678 return true;
14679 default:
14680 return false;
14681 break;
14682 }
14683
14684 if (shift_rtx
14685 && (GET_CODE (shift_rtx) == ASHIFT
14686 || GET_CODE (shift_rtx) == LSHIFTRT
14687 || GET_CODE (shift_rtx) == ASHIFTRT
14688 || GET_CODE (shift_rtx) == ROTATE
14689 || GET_CODE (shift_rtx) == ROTATERT))
14690 {
14691 rtx shift_count = XEXP (shift_rtx, 1);
14692
14693 /* Return true if shift count is dest of SET_BODY. */
14694 if (REG_P (shift_count)
14695 && true_regnum (set_dest) == true_regnum (shift_count))
14696 return true;
14697 }
14698
14699 return false;
14700 }
14701
14702 /* Return true if destination reg of SET_INSN is shift count of
14703 USE_INSN. */
14704
14705 bool
14706 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
14707 {
14708 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
14709 PATTERN (use_insn));
14710 }
14711
14712 /* Return TRUE or FALSE depending on whether the unary operator meets the
14713 appropriate constraints. */
14714
14715 int
14716 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
14717 enum machine_mode mode ATTRIBUTE_UNUSED,
14718 rtx operands[2] ATTRIBUTE_UNUSED)
14719 {
14720 /* If one of operands is memory, source and destination must match. */
14721 if ((MEM_P (operands[0])
14722 || MEM_P (operands[1]))
14723 && ! rtx_equal_p (operands[0], operands[1]))
14724 return FALSE;
14725 return TRUE;
14726 }
14727
14728 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
14729 are ok, keeping in mind the possible movddup alternative. */
14730
14731 bool
14732 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
14733 {
14734 if (MEM_P (operands[0]))
14735 return rtx_equal_p (operands[0], operands[1 + high]);
14736 if (MEM_P (operands[1]) && MEM_P (operands[2]))
14737 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
14738 return true;
14739 }
14740
14741 /* Post-reload splitter for converting an SF or DFmode value in an
14742 SSE register into an unsigned SImode. */
14743
14744 void
14745 ix86_split_convert_uns_si_sse (rtx operands[])
14746 {
14747 enum machine_mode vecmode;
14748 rtx value, large, zero_or_two31, input, two31, x;
14749
14750 large = operands[1];
14751 zero_or_two31 = operands[2];
14752 input = operands[3];
14753 two31 = operands[4];
14754 vecmode = GET_MODE (large);
14755 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
14756
14757 /* Load up the value into the low element. We must ensure that the other
14758 elements are valid floats -- zero is the easiest such value. */
14759 if (MEM_P (input))
14760 {
14761 if (vecmode == V4SFmode)
14762 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
14763 else
14764 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
14765 }
14766 else
14767 {
14768 input = gen_rtx_REG (vecmode, REGNO (input));
14769 emit_move_insn (value, CONST0_RTX (vecmode));
14770 if (vecmode == V4SFmode)
14771 emit_insn (gen_sse_movss (value, value, input));
14772 else
14773 emit_insn (gen_sse2_movsd (value, value, input));
14774 }
14775
14776 emit_move_insn (large, two31);
14777 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
14778
14779 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
14780 emit_insn (gen_rtx_SET (VOIDmode, large, x));
14781
14782 x = gen_rtx_AND (vecmode, zero_or_two31, large);
14783 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
14784
14785 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
14786 emit_insn (gen_rtx_SET (VOIDmode, value, x));
14787
14788 large = gen_rtx_REG (V4SImode, REGNO (large));
14789 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
14790
14791 x = gen_rtx_REG (V4SImode, REGNO (value));
14792 if (vecmode == V4SFmode)
14793 emit_insn (gen_sse2_cvttps2dq (x, value));
14794 else
14795 emit_insn (gen_sse2_cvttpd2dq (x, value));
14796 value = x;
14797
14798 emit_insn (gen_xorv4si3 (value, value, large));
14799 }
14800
14801 /* Convert an unsigned DImode value into a DFmode, using only SSE.
14802 Expects the 64-bit DImode to be supplied in a pair of integral
14803 registers. Requires SSE2; will use SSE3 if available. For x86_32,
14804 -mfpmath=sse, !optimize_size only. */
14805
14806 void
14807 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
14808 {
14809 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
14810 rtx int_xmm, fp_xmm;
14811 rtx biases, exponents;
14812 rtx x;
14813
14814 int_xmm = gen_reg_rtx (V4SImode);
14815 if (TARGET_INTER_UNIT_MOVES)
14816 emit_insn (gen_movdi_to_sse (int_xmm, input));
14817 else if (TARGET_SSE_SPLIT_REGS)
14818 {
14819 emit_clobber (int_xmm);
14820 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
14821 }
14822 else
14823 {
14824 x = gen_reg_rtx (V2DImode);
14825 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
14826 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
14827 }
14828
14829 x = gen_rtx_CONST_VECTOR (V4SImode,
14830 gen_rtvec (4, GEN_INT (0x43300000UL),
14831 GEN_INT (0x45300000UL),
14832 const0_rtx, const0_rtx));
14833 exponents = validize_mem (force_const_mem (V4SImode, x));
14834
14835 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
14836 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
14837
14838 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
14839 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
14840 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
14841 (0x1.0p84 + double(fp_value_hi_xmm)).
14842 Note these exponents differ by 32. */
14843
14844 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
14845
14846 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
14847 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
14848 real_ldexp (&bias_lo_rvt, &dconst1, 52);
14849 real_ldexp (&bias_hi_rvt, &dconst1, 84);
14850 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
14851 x = const_double_from_real_value (bias_hi_rvt, DFmode);
14852 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
14853 biases = validize_mem (force_const_mem (V2DFmode, biases));
14854 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
14855
14856 /* Add the upper and lower DFmode values together. */
14857 if (TARGET_SSE3)
14858 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
14859 else
14860 {
14861 x = copy_to_mode_reg (V2DFmode, fp_xmm);
14862 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
14863 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
14864 }
14865
14866 ix86_expand_vector_extract (false, target, fp_xmm, 0);
14867 }
14868
14869 /* Not used, but eases macroization of patterns. */
14870 void
14871 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
14872 rtx input ATTRIBUTE_UNUSED)
14873 {
14874 gcc_unreachable ();
14875 }
14876
14877 /* Convert an unsigned SImode value into a DFmode. Only currently used
14878 for SSE, but applicable anywhere. */
14879
14880 void
14881 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
14882 {
14883 REAL_VALUE_TYPE TWO31r;
14884 rtx x, fp;
14885
14886 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
14887 NULL, 1, OPTAB_DIRECT);
14888
14889 fp = gen_reg_rtx (DFmode);
14890 emit_insn (gen_floatsidf2 (fp, x));
14891
14892 real_ldexp (&TWO31r, &dconst1, 31);
14893 x = const_double_from_real_value (TWO31r, DFmode);
14894
14895 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
14896 if (x != target)
14897 emit_move_insn (target, x);
14898 }
14899
14900 /* Convert a signed DImode value into a DFmode. Only used for SSE in
14901 32-bit mode; otherwise we have a direct convert instruction. */
14902
14903 void
14904 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
14905 {
14906 REAL_VALUE_TYPE TWO32r;
14907 rtx fp_lo, fp_hi, x;
14908
14909 fp_lo = gen_reg_rtx (DFmode);
14910 fp_hi = gen_reg_rtx (DFmode);
14911
14912 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
14913
14914 real_ldexp (&TWO32r, &dconst1, 32);
14915 x = const_double_from_real_value (TWO32r, DFmode);
14916 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
14917
14918 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
14919
14920 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
14921 0, OPTAB_DIRECT);
14922 if (x != target)
14923 emit_move_insn (target, x);
14924 }
14925
14926 /* Convert an unsigned SImode value into a SFmode, using only SSE.
14927 For x86_32, -mfpmath=sse, !optimize_size only. */
14928 void
14929 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
14930 {
14931 REAL_VALUE_TYPE ONE16r;
14932 rtx fp_hi, fp_lo, int_hi, int_lo, x;
14933
14934 real_ldexp (&ONE16r, &dconst1, 16);
14935 x = const_double_from_real_value (ONE16r, SFmode);
14936 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
14937 NULL, 0, OPTAB_DIRECT);
14938 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
14939 NULL, 0, OPTAB_DIRECT);
14940 fp_hi = gen_reg_rtx (SFmode);
14941 fp_lo = gen_reg_rtx (SFmode);
14942 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
14943 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
14944 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
14945 0, OPTAB_DIRECT);
14946 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
14947 0, OPTAB_DIRECT);
14948 if (!rtx_equal_p (target, fp_hi))
14949 emit_move_insn (target, fp_hi);
14950 }
14951
14952 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
14953 then replicate the value for all elements of the vector
14954 register. */
14955
14956 rtx
14957 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
14958 {
14959 rtvec v;
14960 switch (mode)
14961 {
14962 case SImode:
14963 gcc_assert (vect);
14964 v = gen_rtvec (4, value, value, value, value);
14965 return gen_rtx_CONST_VECTOR (V4SImode, v);
14966
14967 case DImode:
14968 gcc_assert (vect);
14969 v = gen_rtvec (2, value, value);
14970 return gen_rtx_CONST_VECTOR (V2DImode, v);
14971
14972 case SFmode:
14973 if (vect)
14974 v = gen_rtvec (4, value, value, value, value);
14975 else
14976 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
14977 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
14978 return gen_rtx_CONST_VECTOR (V4SFmode, v);
14979
14980 case DFmode:
14981 if (vect)
14982 v = gen_rtvec (2, value, value);
14983 else
14984 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
14985 return gen_rtx_CONST_VECTOR (V2DFmode, v);
14986
14987 default:
14988 gcc_unreachable ();
14989 }
14990 }
14991
14992 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
14993 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
14994 for an SSE register. If VECT is true, then replicate the mask for
14995 all elements of the vector register. If INVERT is true, then create
14996 a mask excluding the sign bit. */
14997
14998 rtx
14999 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15000 {
15001 enum machine_mode vec_mode, imode;
15002 HOST_WIDE_INT hi, lo;
15003 int shift = 63;
15004 rtx v;
15005 rtx mask;
15006
15007 /* Find the sign bit, sign extended to 2*HWI. */
15008 switch (mode)
15009 {
15010 case SImode:
15011 case SFmode:
15012 imode = SImode;
15013 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
15014 lo = 0x80000000, hi = lo < 0;
15015 break;
15016
15017 case DImode:
15018 case DFmode:
15019 imode = DImode;
15020 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
15021 if (HOST_BITS_PER_WIDE_INT >= 64)
15022 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15023 else
15024 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15025 break;
15026
15027 case TImode:
15028 case TFmode:
15029 vec_mode = VOIDmode;
15030 if (HOST_BITS_PER_WIDE_INT >= 64)
15031 {
15032 imode = TImode;
15033 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15034 }
15035 else
15036 {
15037 rtvec vec;
15038
15039 imode = DImode;
15040 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15041
15042 if (invert)
15043 {
15044 lo = ~lo, hi = ~hi;
15045 v = constm1_rtx;
15046 }
15047 else
15048 v = const0_rtx;
15049
15050 mask = immed_double_const (lo, hi, imode);
15051
15052 vec = gen_rtvec (2, v, mask);
15053 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15054 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15055
15056 return v;
15057 }
15058 break;
15059
15060 default:
15061 gcc_unreachable ();
15062 }
15063
15064 if (invert)
15065 lo = ~lo, hi = ~hi;
15066
15067 /* Force this value into the low part of a fp vector constant. */
15068 mask = immed_double_const (lo, hi, imode);
15069 mask = gen_lowpart (mode, mask);
15070
15071 if (vec_mode == VOIDmode)
15072 return force_reg (mode, mask);
15073
15074 v = ix86_build_const_vector (mode, vect, mask);
15075 return force_reg (vec_mode, v);
15076 }
15077
15078 /* Generate code for floating point ABS or NEG. */
15079
15080 void
15081 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15082 rtx operands[])
15083 {
15084 rtx mask, set, use, clob, dst, src;
15085 bool use_sse = false;
15086 bool vector_mode = VECTOR_MODE_P (mode);
15087 enum machine_mode elt_mode = mode;
15088
15089 if (vector_mode)
15090 {
15091 elt_mode = GET_MODE_INNER (mode);
15092 use_sse = true;
15093 }
15094 else if (mode == TFmode)
15095 use_sse = true;
15096 else if (TARGET_SSE_MATH)
15097 use_sse = SSE_FLOAT_MODE_P (mode);
15098
15099 /* NEG and ABS performed with SSE use bitwise mask operations.
15100 Create the appropriate mask now. */
15101 if (use_sse)
15102 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
15103 else
15104 mask = NULL_RTX;
15105
15106 dst = operands[0];
15107 src = operands[1];
15108
15109 if (vector_mode)
15110 {
15111 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15112 set = gen_rtx_SET (VOIDmode, dst, set);
15113 emit_insn (set);
15114 }
15115 else
15116 {
15117 set = gen_rtx_fmt_e (code, mode, src);
15118 set = gen_rtx_SET (VOIDmode, dst, set);
15119 if (mask)
15120 {
15121 use = gen_rtx_USE (VOIDmode, mask);
15122 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15123 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15124 gen_rtvec (3, set, use, clob)));
15125 }
15126 else
15127 emit_insn (set);
15128 }
15129 }
15130
15131 /* Expand a copysign operation. Special case operand 0 being a constant. */
15132
15133 void
15134 ix86_expand_copysign (rtx operands[])
15135 {
15136 enum machine_mode mode;
15137 rtx dest, op0, op1, mask, nmask;
15138
15139 dest = operands[0];
15140 op0 = operands[1];
15141 op1 = operands[2];
15142
15143 mode = GET_MODE (dest);
15144
15145 if (GET_CODE (op0) == CONST_DOUBLE)
15146 {
15147 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15148
15149 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15150 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15151
15152 if (mode == SFmode || mode == DFmode)
15153 {
15154 enum machine_mode vmode;
15155
15156 vmode = mode == SFmode ? V4SFmode : V2DFmode;
15157
15158 if (op0 == CONST0_RTX (mode))
15159 op0 = CONST0_RTX (vmode);
15160 else
15161 {
15162 rtx v = ix86_build_const_vector (mode, false, op0);
15163
15164 op0 = force_reg (vmode, v);
15165 }
15166 }
15167 else if (op0 != CONST0_RTX (mode))
15168 op0 = force_reg (mode, op0);
15169
15170 mask = ix86_build_signbit_mask (mode, 0, 0);
15171
15172 if (mode == SFmode)
15173 copysign_insn = gen_copysignsf3_const;
15174 else if (mode == DFmode)
15175 copysign_insn = gen_copysigndf3_const;
15176 else
15177 copysign_insn = gen_copysigntf3_const;
15178
15179 emit_insn (copysign_insn (dest, op0, op1, mask));
15180 }
15181 else
15182 {
15183 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
15184
15185 nmask = ix86_build_signbit_mask (mode, 0, 1);
15186 mask = ix86_build_signbit_mask (mode, 0, 0);
15187
15188 if (mode == SFmode)
15189 copysign_insn = gen_copysignsf3_var;
15190 else if (mode == DFmode)
15191 copysign_insn = gen_copysigndf3_var;
15192 else
15193 copysign_insn = gen_copysigntf3_var;
15194
15195 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
15196 }
15197 }
15198
15199 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
15200 be a constant, and so has already been expanded into a vector constant. */
15201
15202 void
15203 ix86_split_copysign_const (rtx operands[])
15204 {
15205 enum machine_mode mode, vmode;
15206 rtx dest, op0, mask, x;
15207
15208 dest = operands[0];
15209 op0 = operands[1];
15210 mask = operands[3];
15211
15212 mode = GET_MODE (dest);
15213 vmode = GET_MODE (mask);
15214
15215 dest = simplify_gen_subreg (vmode, dest, mode, 0);
15216 x = gen_rtx_AND (vmode, dest, mask);
15217 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15218
15219 if (op0 != CONST0_RTX (vmode))
15220 {
15221 x = gen_rtx_IOR (vmode, dest, op0);
15222 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15223 }
15224 }
15225
15226 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
15227 so we have to do two masks. */
15228
15229 void
15230 ix86_split_copysign_var (rtx operands[])
15231 {
15232 enum machine_mode mode, vmode;
15233 rtx dest, scratch, op0, op1, mask, nmask, x;
15234
15235 dest = operands[0];
15236 scratch = operands[1];
15237 op0 = operands[2];
15238 op1 = operands[3];
15239 nmask = operands[4];
15240 mask = operands[5];
15241
15242 mode = GET_MODE (dest);
15243 vmode = GET_MODE (mask);
15244
15245 if (rtx_equal_p (op0, op1))
15246 {
15247 /* Shouldn't happen often (it's useless, obviously), but when it does
15248 we'd generate incorrect code if we continue below. */
15249 emit_move_insn (dest, op0);
15250 return;
15251 }
15252
15253 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
15254 {
15255 gcc_assert (REGNO (op1) == REGNO (scratch));
15256
15257 x = gen_rtx_AND (vmode, scratch, mask);
15258 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15259
15260 dest = mask;
15261 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15262 x = gen_rtx_NOT (vmode, dest);
15263 x = gen_rtx_AND (vmode, x, op0);
15264 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15265 }
15266 else
15267 {
15268 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
15269 {
15270 x = gen_rtx_AND (vmode, scratch, mask);
15271 }
15272 else /* alternative 2,4 */
15273 {
15274 gcc_assert (REGNO (mask) == REGNO (scratch));
15275 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
15276 x = gen_rtx_AND (vmode, scratch, op1);
15277 }
15278 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15279
15280 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
15281 {
15282 dest = simplify_gen_subreg (vmode, op0, mode, 0);
15283 x = gen_rtx_AND (vmode, dest, nmask);
15284 }
15285 else /* alternative 3,4 */
15286 {
15287 gcc_assert (REGNO (nmask) == REGNO (dest));
15288 dest = nmask;
15289 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15290 x = gen_rtx_AND (vmode, dest, op0);
15291 }
15292 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15293 }
15294
15295 x = gen_rtx_IOR (vmode, dest, scratch);
15296 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15297 }
15298
15299 /* Return TRUE or FALSE depending on whether the first SET in INSN
15300 has source and destination with matching CC modes, and that the
15301 CC mode is at least as constrained as REQ_MODE. */
15302
15303 int
15304 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
15305 {
15306 rtx set;
15307 enum machine_mode set_mode;
15308
15309 set = PATTERN (insn);
15310 if (GET_CODE (set) == PARALLEL)
15311 set = XVECEXP (set, 0, 0);
15312 gcc_assert (GET_CODE (set) == SET);
15313 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15314
15315 set_mode = GET_MODE (SET_DEST (set));
15316 switch (set_mode)
15317 {
15318 case CCNOmode:
15319 if (req_mode != CCNOmode
15320 && (req_mode != CCmode
15321 || XEXP (SET_SRC (set), 1) != const0_rtx))
15322 return 0;
15323 break;
15324 case CCmode:
15325 if (req_mode == CCGCmode)
15326 return 0;
15327 /* FALLTHRU */
15328 case CCGCmode:
15329 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15330 return 0;
15331 /* FALLTHRU */
15332 case CCGOCmode:
15333 if (req_mode == CCZmode)
15334 return 0;
15335 /* FALLTHRU */
15336 case CCAmode:
15337 case CCCmode:
15338 case CCOmode:
15339 case CCSmode:
15340 case CCZmode:
15341 break;
15342
15343 default:
15344 gcc_unreachable ();
15345 }
15346
15347 return (GET_MODE (SET_SRC (set)) == set_mode);
15348 }
15349
15350 /* Generate insn patterns to do an integer compare of OPERANDS. */
15351
15352 static rtx
15353 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
15354 {
15355 enum machine_mode cmpmode;
15356 rtx tmp, flags;
15357
15358 cmpmode = SELECT_CC_MODE (code, op0, op1);
15359 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
15360
15361 /* This is very simple, but making the interface the same as in the
15362 FP case makes the rest of the code easier. */
15363 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
15364 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
15365
15366 /* Return the test that should be put into the flags user, i.e.
15367 the bcc, scc, or cmov instruction. */
15368 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
15369 }
15370
15371 /* Figure out whether to use ordered or unordered fp comparisons.
15372 Return the appropriate mode to use. */
15373
15374 enum machine_mode
15375 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
15376 {
15377 /* ??? In order to make all comparisons reversible, we do all comparisons
15378 non-trapping when compiling for IEEE. Once gcc is able to distinguish
15379 all forms trapping and nontrapping comparisons, we can make inequality
15380 comparisons trapping again, since it results in better code when using
15381 FCOM based compares. */
15382 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
15383 }
15384
15385 enum machine_mode
15386 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15387 {
15388 enum machine_mode mode = GET_MODE (op0);
15389
15390 if (SCALAR_FLOAT_MODE_P (mode))
15391 {
15392 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15393 return ix86_fp_compare_mode (code);
15394 }
15395
15396 switch (code)
15397 {
15398 /* Only zero flag is needed. */
15399 case EQ: /* ZF=0 */
15400 case NE: /* ZF!=0 */
15401 return CCZmode;
15402 /* Codes needing carry flag. */
15403 case GEU: /* CF=0 */
15404 case LTU: /* CF=1 */
15405 /* Detect overflow checks. They need just the carry flag. */
15406 if (GET_CODE (op0) == PLUS
15407 && rtx_equal_p (op1, XEXP (op0, 0)))
15408 return CCCmode;
15409 else
15410 return CCmode;
15411 case GTU: /* CF=0 & ZF=0 */
15412 case LEU: /* CF=1 | ZF=1 */
15413 /* Detect overflow checks. They need just the carry flag. */
15414 if (GET_CODE (op0) == MINUS
15415 && rtx_equal_p (op1, XEXP (op0, 0)))
15416 return CCCmode;
15417 else
15418 return CCmode;
15419 /* Codes possibly doable only with sign flag when
15420 comparing against zero. */
15421 case GE: /* SF=OF or SF=0 */
15422 case LT: /* SF<>OF or SF=1 */
15423 if (op1 == const0_rtx)
15424 return CCGOCmode;
15425 else
15426 /* For other cases Carry flag is not required. */
15427 return CCGCmode;
15428 /* Codes doable only with sign flag when comparing
15429 against zero, but we miss jump instruction for it
15430 so we need to use relational tests against overflow
15431 that thus needs to be zero. */
15432 case GT: /* ZF=0 & SF=OF */
15433 case LE: /* ZF=1 | SF<>OF */
15434 if (op1 == const0_rtx)
15435 return CCNOmode;
15436 else
15437 return CCGCmode;
15438 /* strcmp pattern do (use flags) and combine may ask us for proper
15439 mode. */
15440 case USE:
15441 return CCmode;
15442 default:
15443 gcc_unreachable ();
15444 }
15445 }
15446
15447 /* Return the fixed registers used for condition codes. */
15448
15449 static bool
15450 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15451 {
15452 *p1 = FLAGS_REG;
15453 *p2 = FPSR_REG;
15454 return true;
15455 }
15456
15457 /* If two condition code modes are compatible, return a condition code
15458 mode which is compatible with both. Otherwise, return
15459 VOIDmode. */
15460
15461 static enum machine_mode
15462 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
15463 {
15464 if (m1 == m2)
15465 return m1;
15466
15467 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15468 return VOIDmode;
15469
15470 if ((m1 == CCGCmode && m2 == CCGOCmode)
15471 || (m1 == CCGOCmode && m2 == CCGCmode))
15472 return CCGCmode;
15473
15474 switch (m1)
15475 {
15476 default:
15477 gcc_unreachable ();
15478
15479 case CCmode:
15480 case CCGCmode:
15481 case CCGOCmode:
15482 case CCNOmode:
15483 case CCAmode:
15484 case CCCmode:
15485 case CCOmode:
15486 case CCSmode:
15487 case CCZmode:
15488 switch (m2)
15489 {
15490 default:
15491 return VOIDmode;
15492
15493 case CCmode:
15494 case CCGCmode:
15495 case CCGOCmode:
15496 case CCNOmode:
15497 case CCAmode:
15498 case CCCmode:
15499 case CCOmode:
15500 case CCSmode:
15501 case CCZmode:
15502 return CCmode;
15503 }
15504
15505 case CCFPmode:
15506 case CCFPUmode:
15507 /* These are only compatible with themselves, which we already
15508 checked above. */
15509 return VOIDmode;
15510 }
15511 }
15512
15513
15514 /* Return a comparison we can do and that it is equivalent to
15515 swap_condition (code) apart possibly from orderedness.
15516 But, never change orderedness if TARGET_IEEE_FP, returning
15517 UNKNOWN in that case if necessary. */
15518
15519 static enum rtx_code
15520 ix86_fp_swap_condition (enum rtx_code code)
15521 {
15522 switch (code)
15523 {
15524 case GT: /* GTU - CF=0 & ZF=0 */
15525 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
15526 case GE: /* GEU - CF=0 */
15527 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
15528 case UNLT: /* LTU - CF=1 */
15529 return TARGET_IEEE_FP ? UNKNOWN : GT;
15530 case UNLE: /* LEU - CF=1 | ZF=1 */
15531 return TARGET_IEEE_FP ? UNKNOWN : GE;
15532 default:
15533 return swap_condition (code);
15534 }
15535 }
15536
15537 /* Return cost of comparison CODE using the best strategy for performance.
15538 All following functions do use number of instructions as a cost metrics.
15539 In future this should be tweaked to compute bytes for optimize_size and
15540 take into account performance of various instructions on various CPUs. */
15541
15542 static int
15543 ix86_fp_comparison_cost (enum rtx_code code)
15544 {
15545 int arith_cost;
15546
15547 /* The cost of code using bit-twiddling on %ah. */
15548 switch (code)
15549 {
15550 case UNLE:
15551 case UNLT:
15552 case LTGT:
15553 case GT:
15554 case GE:
15555 case UNORDERED:
15556 case ORDERED:
15557 case UNEQ:
15558 arith_cost = 4;
15559 break;
15560 case LT:
15561 case NE:
15562 case EQ:
15563 case UNGE:
15564 arith_cost = TARGET_IEEE_FP ? 5 : 4;
15565 break;
15566 case LE:
15567 case UNGT:
15568 arith_cost = TARGET_IEEE_FP ? 6 : 4;
15569 break;
15570 default:
15571 gcc_unreachable ();
15572 }
15573
15574 switch (ix86_fp_comparison_strategy (code))
15575 {
15576 case IX86_FPCMP_COMI:
15577 return arith_cost > 4 ? 3 : 2;
15578 case IX86_FPCMP_SAHF:
15579 return arith_cost > 4 ? 4 : 3;
15580 default:
15581 return arith_cost;
15582 }
15583 }
15584
15585 /* Return strategy to use for floating-point. We assume that fcomi is always
15586 preferrable where available, since that is also true when looking at size
15587 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15588
15589 enum ix86_fpcmp_strategy
15590 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
15591 {
15592 /* Do fcomi/sahf based test when profitable. */
15593
15594 if (TARGET_CMOVE)
15595 return IX86_FPCMP_COMI;
15596
15597 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
15598 return IX86_FPCMP_SAHF;
15599
15600 return IX86_FPCMP_ARITH;
15601 }
15602
15603 /* Swap, force into registers, or otherwise massage the two operands
15604 to a fp comparison. The operands are updated in place; the new
15605 comparison code is returned. */
15606
15607 static enum rtx_code
15608 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
15609 {
15610 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
15611 rtx op0 = *pop0, op1 = *pop1;
15612 enum machine_mode op_mode = GET_MODE (op0);
15613 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
15614
15615 /* All of the unordered compare instructions only work on registers.
15616 The same is true of the fcomi compare instructions. The XFmode
15617 compare instructions require registers except when comparing
15618 against zero or when converting operand 1 from fixed point to
15619 floating point. */
15620
15621 if (!is_sse
15622 && (fpcmp_mode == CCFPUmode
15623 || (op_mode == XFmode
15624 && ! (standard_80387_constant_p (op0) == 1
15625 || standard_80387_constant_p (op1) == 1)
15626 && GET_CODE (op1) != FLOAT)
15627 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
15628 {
15629 op0 = force_reg (op_mode, op0);
15630 op1 = force_reg (op_mode, op1);
15631 }
15632 else
15633 {
15634 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
15635 things around if they appear profitable, otherwise force op0
15636 into a register. */
15637
15638 if (standard_80387_constant_p (op0) == 0
15639 || (MEM_P (op0)
15640 && ! (standard_80387_constant_p (op1) == 0
15641 || MEM_P (op1))))
15642 {
15643 enum rtx_code new_code = ix86_fp_swap_condition (code);
15644 if (new_code != UNKNOWN)
15645 {
15646 rtx tmp;
15647 tmp = op0, op0 = op1, op1 = tmp;
15648 code = new_code;
15649 }
15650 }
15651
15652 if (!REG_P (op0))
15653 op0 = force_reg (op_mode, op0);
15654
15655 if (CONSTANT_P (op1))
15656 {
15657 int tmp = standard_80387_constant_p (op1);
15658 if (tmp == 0)
15659 op1 = validize_mem (force_const_mem (op_mode, op1));
15660 else if (tmp == 1)
15661 {
15662 if (TARGET_CMOVE)
15663 op1 = force_reg (op_mode, op1);
15664 }
15665 else
15666 op1 = force_reg (op_mode, op1);
15667 }
15668 }
15669
15670 /* Try to rearrange the comparison to make it cheaper. */
15671 if (ix86_fp_comparison_cost (code)
15672 > ix86_fp_comparison_cost (swap_condition (code))
15673 && (REG_P (op1) || can_create_pseudo_p ()))
15674 {
15675 rtx tmp;
15676 tmp = op0, op0 = op1, op1 = tmp;
15677 code = swap_condition (code);
15678 if (!REG_P (op0))
15679 op0 = force_reg (op_mode, op0);
15680 }
15681
15682 *pop0 = op0;
15683 *pop1 = op1;
15684 return code;
15685 }
15686
15687 /* Convert comparison codes we use to represent FP comparison to integer
15688 code that will result in proper branch. Return UNKNOWN if no such code
15689 is available. */
15690
15691 enum rtx_code
15692 ix86_fp_compare_code_to_integer (enum rtx_code code)
15693 {
15694 switch (code)
15695 {
15696 case GT:
15697 return GTU;
15698 case GE:
15699 return GEU;
15700 case ORDERED:
15701 case UNORDERED:
15702 return code;
15703 break;
15704 case UNEQ:
15705 return EQ;
15706 break;
15707 case UNLT:
15708 return LTU;
15709 break;
15710 case UNLE:
15711 return LEU;
15712 break;
15713 case LTGT:
15714 return NE;
15715 break;
15716 default:
15717 return UNKNOWN;
15718 }
15719 }
15720
15721 /* Generate insn patterns to do a floating point compare of OPERANDS. */
15722
15723 static rtx
15724 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
15725 {
15726 enum machine_mode fpcmp_mode, intcmp_mode;
15727 rtx tmp, tmp2;
15728
15729 fpcmp_mode = ix86_fp_compare_mode (code);
15730 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
15731
15732 /* Do fcomi/sahf based test when profitable. */
15733 switch (ix86_fp_comparison_strategy (code))
15734 {
15735 case IX86_FPCMP_COMI:
15736 intcmp_mode = fpcmp_mode;
15737 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15738 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15739 tmp);
15740 emit_insn (tmp);
15741 break;
15742
15743 case IX86_FPCMP_SAHF:
15744 intcmp_mode = fpcmp_mode;
15745 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15746 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15747 tmp);
15748
15749 if (!scratch)
15750 scratch = gen_reg_rtx (HImode);
15751 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
15752 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
15753 break;
15754
15755 case IX86_FPCMP_ARITH:
15756 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
15757 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15758 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
15759 if (!scratch)
15760 scratch = gen_reg_rtx (HImode);
15761 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
15762
15763 /* In the unordered case, we have to check C2 for NaN's, which
15764 doesn't happen to work out to anything nice combination-wise.
15765 So do some bit twiddling on the value we've got in AH to come
15766 up with an appropriate set of condition codes. */
15767
15768 intcmp_mode = CCNOmode;
15769 switch (code)
15770 {
15771 case GT:
15772 case UNGT:
15773 if (code == GT || !TARGET_IEEE_FP)
15774 {
15775 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
15776 code = EQ;
15777 }
15778 else
15779 {
15780 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15781 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
15782 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
15783 intcmp_mode = CCmode;
15784 code = GEU;
15785 }
15786 break;
15787 case LT:
15788 case UNLT:
15789 if (code == LT && TARGET_IEEE_FP)
15790 {
15791 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15792 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
15793 intcmp_mode = CCmode;
15794 code = EQ;
15795 }
15796 else
15797 {
15798 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
15799 code = NE;
15800 }
15801 break;
15802 case GE:
15803 case UNGE:
15804 if (code == GE || !TARGET_IEEE_FP)
15805 {
15806 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
15807 code = EQ;
15808 }
15809 else
15810 {
15811 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15812 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
15813 code = NE;
15814 }
15815 break;
15816 case LE:
15817 case UNLE:
15818 if (code == LE && TARGET_IEEE_FP)
15819 {
15820 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15821 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
15822 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
15823 intcmp_mode = CCmode;
15824 code = LTU;
15825 }
15826 else
15827 {
15828 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
15829 code = NE;
15830 }
15831 break;
15832 case EQ:
15833 case UNEQ:
15834 if (code == EQ && TARGET_IEEE_FP)
15835 {
15836 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15837 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
15838 intcmp_mode = CCmode;
15839 code = EQ;
15840 }
15841 else
15842 {
15843 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
15844 code = NE;
15845 }
15846 break;
15847 case NE:
15848 case LTGT:
15849 if (code == NE && TARGET_IEEE_FP)
15850 {
15851 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15852 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
15853 GEN_INT (0x40)));
15854 code = NE;
15855 }
15856 else
15857 {
15858 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
15859 code = EQ;
15860 }
15861 break;
15862
15863 case UNORDERED:
15864 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
15865 code = NE;
15866 break;
15867 case ORDERED:
15868 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
15869 code = EQ;
15870 break;
15871
15872 default:
15873 gcc_unreachable ();
15874 }
15875 break;
15876
15877 default:
15878 gcc_unreachable();
15879 }
15880
15881 /* Return the test that should be put into the flags user, i.e.
15882 the bcc, scc, or cmov instruction. */
15883 return gen_rtx_fmt_ee (code, VOIDmode,
15884 gen_rtx_REG (intcmp_mode, FLAGS_REG),
15885 const0_rtx);
15886 }
15887
15888 rtx
15889 ix86_expand_compare (enum rtx_code code)
15890 {
15891 rtx op0, op1, ret;
15892 op0 = ix86_compare_op0;
15893 op1 = ix86_compare_op1;
15894
15895 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
15896 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
15897
15898 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
15899 {
15900 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
15901 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
15902 }
15903 else
15904 ret = ix86_expand_int_compare (code, op0, op1);
15905
15906 return ret;
15907 }
15908
15909 void
15910 ix86_expand_branch (enum rtx_code code, rtx label)
15911 {
15912 rtx tmp;
15913
15914 switch (GET_MODE (ix86_compare_op0))
15915 {
15916 case SFmode:
15917 case DFmode:
15918 case XFmode:
15919 case QImode:
15920 case HImode:
15921 case SImode:
15922 simple:
15923 tmp = ix86_expand_compare (code);
15924 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15925 gen_rtx_LABEL_REF (VOIDmode, label),
15926 pc_rtx);
15927 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15928 return;
15929
15930 case DImode:
15931 if (TARGET_64BIT)
15932 goto simple;
15933 case TImode:
15934 /* Expand DImode branch into multiple compare+branch. */
15935 {
15936 rtx lo[2], hi[2], label2;
15937 enum rtx_code code1, code2, code3;
15938 enum machine_mode submode;
15939
15940 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
15941 {
15942 tmp = ix86_compare_op0;
15943 ix86_compare_op0 = ix86_compare_op1;
15944 ix86_compare_op1 = tmp;
15945 code = swap_condition (code);
15946 }
15947 if (GET_MODE (ix86_compare_op0) == DImode)
15948 {
15949 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
15950 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
15951 submode = SImode;
15952 }
15953 else
15954 {
15955 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
15956 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
15957 submode = DImode;
15958 }
15959
15960 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
15961 avoid two branches. This costs one extra insn, so disable when
15962 optimizing for size. */
15963
15964 if ((code == EQ || code == NE)
15965 && (!optimize_insn_for_size_p ()
15966 || hi[1] == const0_rtx || lo[1] == const0_rtx))
15967 {
15968 rtx xor0, xor1;
15969
15970 xor1 = hi[0];
15971 if (hi[1] != const0_rtx)
15972 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
15973 NULL_RTX, 0, OPTAB_WIDEN);
15974
15975 xor0 = lo[0];
15976 if (lo[1] != const0_rtx)
15977 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
15978 NULL_RTX, 0, OPTAB_WIDEN);
15979
15980 tmp = expand_binop (submode, ior_optab, xor1, xor0,
15981 NULL_RTX, 0, OPTAB_WIDEN);
15982
15983 ix86_compare_op0 = tmp;
15984 ix86_compare_op1 = const0_rtx;
15985 ix86_expand_branch (code, label);
15986 return;
15987 }
15988
15989 /* Otherwise, if we are doing less-than or greater-or-equal-than,
15990 op1 is a constant and the low word is zero, then we can just
15991 examine the high word. Similarly for low word -1 and
15992 less-or-equal-than or greater-than. */
15993
15994 if (CONST_INT_P (hi[1]))
15995 switch (code)
15996 {
15997 case LT: case LTU: case GE: case GEU:
15998 if (lo[1] == const0_rtx)
15999 {
16000 ix86_compare_op0 = hi[0];
16001 ix86_compare_op1 = hi[1];
16002 ix86_expand_branch (code, label);
16003 return;
16004 }
16005 break;
16006 case LE: case LEU: case GT: case GTU:
16007 if (lo[1] == constm1_rtx)
16008 {
16009 ix86_compare_op0 = hi[0];
16010 ix86_compare_op1 = hi[1];
16011 ix86_expand_branch (code, label);
16012 return;
16013 }
16014 break;
16015 default:
16016 break;
16017 }
16018
16019 /* Otherwise, we need two or three jumps. */
16020
16021 label2 = gen_label_rtx ();
16022
16023 code1 = code;
16024 code2 = swap_condition (code);
16025 code3 = unsigned_condition (code);
16026
16027 switch (code)
16028 {
16029 case LT: case GT: case LTU: case GTU:
16030 break;
16031
16032 case LE: code1 = LT; code2 = GT; break;
16033 case GE: code1 = GT; code2 = LT; break;
16034 case LEU: code1 = LTU; code2 = GTU; break;
16035 case GEU: code1 = GTU; code2 = LTU; break;
16036
16037 case EQ: code1 = UNKNOWN; code2 = NE; break;
16038 case NE: code2 = UNKNOWN; break;
16039
16040 default:
16041 gcc_unreachable ();
16042 }
16043
16044 /*
16045 * a < b =>
16046 * if (hi(a) < hi(b)) goto true;
16047 * if (hi(a) > hi(b)) goto false;
16048 * if (lo(a) < lo(b)) goto true;
16049 * false:
16050 */
16051
16052 ix86_compare_op0 = hi[0];
16053 ix86_compare_op1 = hi[1];
16054
16055 if (code1 != UNKNOWN)
16056 ix86_expand_branch (code1, label);
16057 if (code2 != UNKNOWN)
16058 ix86_expand_branch (code2, label2);
16059
16060 ix86_compare_op0 = lo[0];
16061 ix86_compare_op1 = lo[1];
16062 ix86_expand_branch (code3, label);
16063
16064 if (code2 != UNKNOWN)
16065 emit_label (label2);
16066 return;
16067 }
16068
16069 default:
16070 /* If we have already emitted a compare insn, go straight to simple.
16071 ix86_expand_compare won't emit anything if ix86_compare_emitted
16072 is non NULL. */
16073 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
16074 goto simple;
16075 }
16076 }
16077
16078 /* Split branch based on floating point condition. */
16079 void
16080 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16081 rtx target1, rtx target2, rtx tmp, rtx pushed)
16082 {
16083 rtx condition;
16084 rtx i;
16085
16086 if (target2 != pc_rtx)
16087 {
16088 rtx tmp = target2;
16089 code = reverse_condition_maybe_unordered (code);
16090 target2 = target1;
16091 target1 = tmp;
16092 }
16093
16094 condition = ix86_expand_fp_compare (code, op1, op2,
16095 tmp);
16096
16097 /* Remove pushed operand from stack. */
16098 if (pushed)
16099 ix86_free_from_memory (GET_MODE (pushed));
16100
16101 i = emit_jump_insn (gen_rtx_SET
16102 (VOIDmode, pc_rtx,
16103 gen_rtx_IF_THEN_ELSE (VOIDmode,
16104 condition, target1, target2)));
16105 if (split_branch_probability >= 0)
16106 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16107 }
16108
16109 void
16110 ix86_expand_setcc (enum rtx_code code, rtx dest)
16111 {
16112 rtx ret;
16113
16114 gcc_assert (GET_MODE (dest) == QImode);
16115
16116 ret = ix86_expand_compare (code);
16117 PUT_MODE (ret, QImode);
16118 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16119 }
16120
16121 /* Expand comparison setting or clearing carry flag. Return true when
16122 successful and set pop for the operation. */
16123 static bool
16124 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16125 {
16126 enum machine_mode mode =
16127 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16128
16129 /* Do not handle DImode compares that go through special path. */
16130 if (mode == (TARGET_64BIT ? TImode : DImode))
16131 return false;
16132
16133 if (SCALAR_FLOAT_MODE_P (mode))
16134 {
16135 rtx compare_op, compare_seq;
16136
16137 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16138
16139 /* Shortcut: following common codes never translate
16140 into carry flag compares. */
16141 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16142 || code == ORDERED || code == UNORDERED)
16143 return false;
16144
16145 /* These comparisons require zero flag; swap operands so they won't. */
16146 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16147 && !TARGET_IEEE_FP)
16148 {
16149 rtx tmp = op0;
16150 op0 = op1;
16151 op1 = tmp;
16152 code = swap_condition (code);
16153 }
16154
16155 /* Try to expand the comparison and verify that we end up with
16156 carry flag based comparison. This fails to be true only when
16157 we decide to expand comparison using arithmetic that is not
16158 too common scenario. */
16159 start_sequence ();
16160 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16161 compare_seq = get_insns ();
16162 end_sequence ();
16163
16164 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16165 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16166 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16167 else
16168 code = GET_CODE (compare_op);
16169
16170 if (code != LTU && code != GEU)
16171 return false;
16172
16173 emit_insn (compare_seq);
16174 *pop = compare_op;
16175 return true;
16176 }
16177
16178 if (!INTEGRAL_MODE_P (mode))
16179 return false;
16180
16181 switch (code)
16182 {
16183 case LTU:
16184 case GEU:
16185 break;
16186
16187 /* Convert a==0 into (unsigned)a<1. */
16188 case EQ:
16189 case NE:
16190 if (op1 != const0_rtx)
16191 return false;
16192 op1 = const1_rtx;
16193 code = (code == EQ ? LTU : GEU);
16194 break;
16195
16196 /* Convert a>b into b<a or a>=b-1. */
16197 case GTU:
16198 case LEU:
16199 if (CONST_INT_P (op1))
16200 {
16201 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
16202 /* Bail out on overflow. We still can swap operands but that
16203 would force loading of the constant into register. */
16204 if (op1 == const0_rtx
16205 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
16206 return false;
16207 code = (code == GTU ? GEU : LTU);
16208 }
16209 else
16210 {
16211 rtx tmp = op1;
16212 op1 = op0;
16213 op0 = tmp;
16214 code = (code == GTU ? LTU : GEU);
16215 }
16216 break;
16217
16218 /* Convert a>=0 into (unsigned)a<0x80000000. */
16219 case LT:
16220 case GE:
16221 if (mode == DImode || op1 != const0_rtx)
16222 return false;
16223 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16224 code = (code == LT ? GEU : LTU);
16225 break;
16226 case LE:
16227 case GT:
16228 if (mode == DImode || op1 != constm1_rtx)
16229 return false;
16230 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16231 code = (code == LE ? GEU : LTU);
16232 break;
16233
16234 default:
16235 return false;
16236 }
16237 /* Swapping operands may cause constant to appear as first operand. */
16238 if (!nonimmediate_operand (op0, VOIDmode))
16239 {
16240 if (!can_create_pseudo_p ())
16241 return false;
16242 op0 = force_reg (mode, op0);
16243 }
16244 ix86_compare_op0 = op0;
16245 ix86_compare_op1 = op1;
16246 *pop = ix86_expand_compare (code);
16247 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
16248 return true;
16249 }
16250
16251 int
16252 ix86_expand_int_movcc (rtx operands[])
16253 {
16254 enum rtx_code code = GET_CODE (operands[1]), compare_code;
16255 rtx compare_seq, compare_op;
16256 enum machine_mode mode = GET_MODE (operands[0]);
16257 bool sign_bit_compare_p = false;
16258
16259 start_sequence ();
16260 ix86_compare_op0 = XEXP (operands[1], 0);
16261 ix86_compare_op1 = XEXP (operands[1], 1);
16262 compare_op = ix86_expand_compare (code);
16263 compare_seq = get_insns ();
16264 end_sequence ();
16265
16266 compare_code = GET_CODE (compare_op);
16267
16268 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
16269 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
16270 sign_bit_compare_p = true;
16271
16272 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
16273 HImode insns, we'd be swallowed in word prefix ops. */
16274
16275 if ((mode != HImode || TARGET_FAST_PREFIX)
16276 && (mode != (TARGET_64BIT ? TImode : DImode))
16277 && CONST_INT_P (operands[2])
16278 && CONST_INT_P (operands[3]))
16279 {
16280 rtx out = operands[0];
16281 HOST_WIDE_INT ct = INTVAL (operands[2]);
16282 HOST_WIDE_INT cf = INTVAL (operands[3]);
16283 HOST_WIDE_INT diff;
16284
16285 diff = ct - cf;
16286 /* Sign bit compares are better done using shifts than we do by using
16287 sbb. */
16288 if (sign_bit_compare_p
16289 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16290 ix86_compare_op1, &compare_op))
16291 {
16292 /* Detect overlap between destination and compare sources. */
16293 rtx tmp = out;
16294
16295 if (!sign_bit_compare_p)
16296 {
16297 rtx flags;
16298 bool fpcmp = false;
16299
16300 compare_code = GET_CODE (compare_op);
16301
16302 flags = XEXP (compare_op, 0);
16303
16304 if (GET_MODE (flags) == CCFPmode
16305 || GET_MODE (flags) == CCFPUmode)
16306 {
16307 fpcmp = true;
16308 compare_code
16309 = ix86_fp_compare_code_to_integer (compare_code);
16310 }
16311
16312 /* To simplify rest of code, restrict to the GEU case. */
16313 if (compare_code == LTU)
16314 {
16315 HOST_WIDE_INT tmp = ct;
16316 ct = cf;
16317 cf = tmp;
16318 compare_code = reverse_condition (compare_code);
16319 code = reverse_condition (code);
16320 }
16321 else
16322 {
16323 if (fpcmp)
16324 PUT_CODE (compare_op,
16325 reverse_condition_maybe_unordered
16326 (GET_CODE (compare_op)));
16327 else
16328 PUT_CODE (compare_op,
16329 reverse_condition (GET_CODE (compare_op)));
16330 }
16331 diff = ct - cf;
16332
16333 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
16334 || reg_overlap_mentioned_p (out, ix86_compare_op1))
16335 tmp = gen_reg_rtx (mode);
16336
16337 if (mode == DImode)
16338 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
16339 else
16340 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
16341 flags, compare_op));
16342 }
16343 else
16344 {
16345 if (code == GT || code == GE)
16346 code = reverse_condition (code);
16347 else
16348 {
16349 HOST_WIDE_INT tmp = ct;
16350 ct = cf;
16351 cf = tmp;
16352 diff = ct - cf;
16353 }
16354 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
16355 ix86_compare_op1, VOIDmode, 0, -1);
16356 }
16357
16358 if (diff == 1)
16359 {
16360 /*
16361 * cmpl op0,op1
16362 * sbbl dest,dest
16363 * [addl dest, ct]
16364 *
16365 * Size 5 - 8.
16366 */
16367 if (ct)
16368 tmp = expand_simple_binop (mode, PLUS,
16369 tmp, GEN_INT (ct),
16370 copy_rtx (tmp), 1, OPTAB_DIRECT);
16371 }
16372 else if (cf == -1)
16373 {
16374 /*
16375 * cmpl op0,op1
16376 * sbbl dest,dest
16377 * orl $ct, dest
16378 *
16379 * Size 8.
16380 */
16381 tmp = expand_simple_binop (mode, IOR,
16382 tmp, GEN_INT (ct),
16383 copy_rtx (tmp), 1, OPTAB_DIRECT);
16384 }
16385 else if (diff == -1 && ct)
16386 {
16387 /*
16388 * cmpl op0,op1
16389 * sbbl dest,dest
16390 * notl dest
16391 * [addl dest, cf]
16392 *
16393 * Size 8 - 11.
16394 */
16395 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16396 if (cf)
16397 tmp = expand_simple_binop (mode, PLUS,
16398 copy_rtx (tmp), GEN_INT (cf),
16399 copy_rtx (tmp), 1, OPTAB_DIRECT);
16400 }
16401 else
16402 {
16403 /*
16404 * cmpl op0,op1
16405 * sbbl dest,dest
16406 * [notl dest]
16407 * andl cf - ct, dest
16408 * [addl dest, ct]
16409 *
16410 * Size 8 - 11.
16411 */
16412
16413 if (cf == 0)
16414 {
16415 cf = ct;
16416 ct = 0;
16417 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16418 }
16419
16420 tmp = expand_simple_binop (mode, AND,
16421 copy_rtx (tmp),
16422 gen_int_mode (cf - ct, mode),
16423 copy_rtx (tmp), 1, OPTAB_DIRECT);
16424 if (ct)
16425 tmp = expand_simple_binop (mode, PLUS,
16426 copy_rtx (tmp), GEN_INT (ct),
16427 copy_rtx (tmp), 1, OPTAB_DIRECT);
16428 }
16429
16430 if (!rtx_equal_p (tmp, out))
16431 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
16432
16433 return 1; /* DONE */
16434 }
16435
16436 if (diff < 0)
16437 {
16438 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
16439
16440 HOST_WIDE_INT tmp;
16441 tmp = ct, ct = cf, cf = tmp;
16442 diff = -diff;
16443
16444 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16445 {
16446 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16447
16448 /* We may be reversing unordered compare to normal compare, that
16449 is not valid in general (we may convert non-trapping condition
16450 to trapping one), however on i386 we currently emit all
16451 comparisons unordered. */
16452 compare_code = reverse_condition_maybe_unordered (compare_code);
16453 code = reverse_condition_maybe_unordered (code);
16454 }
16455 else
16456 {
16457 compare_code = reverse_condition (compare_code);
16458 code = reverse_condition (code);
16459 }
16460 }
16461
16462 compare_code = UNKNOWN;
16463 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
16464 && CONST_INT_P (ix86_compare_op1))
16465 {
16466 if (ix86_compare_op1 == const0_rtx
16467 && (code == LT || code == GE))
16468 compare_code = code;
16469 else if (ix86_compare_op1 == constm1_rtx)
16470 {
16471 if (code == LE)
16472 compare_code = LT;
16473 else if (code == GT)
16474 compare_code = GE;
16475 }
16476 }
16477
16478 /* Optimize dest = (op0 < 0) ? -1 : cf. */
16479 if (compare_code != UNKNOWN
16480 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
16481 && (cf == -1 || ct == -1))
16482 {
16483 /* If lea code below could be used, only optimize
16484 if it results in a 2 insn sequence. */
16485
16486 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
16487 || diff == 3 || diff == 5 || diff == 9)
16488 || (compare_code == LT && ct == -1)
16489 || (compare_code == GE && cf == -1))
16490 {
16491 /*
16492 * notl op1 (if necessary)
16493 * sarl $31, op1
16494 * orl cf, op1
16495 */
16496 if (ct != -1)
16497 {
16498 cf = ct;
16499 ct = -1;
16500 code = reverse_condition (code);
16501 }
16502
16503 out = emit_store_flag (out, code, ix86_compare_op0,
16504 ix86_compare_op1, VOIDmode, 0, -1);
16505
16506 out = expand_simple_binop (mode, IOR,
16507 out, GEN_INT (cf),
16508 out, 1, OPTAB_DIRECT);
16509 if (out != operands[0])
16510 emit_move_insn (operands[0], out);
16511
16512 return 1; /* DONE */
16513 }
16514 }
16515
16516
16517 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
16518 || diff == 3 || diff == 5 || diff == 9)
16519 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
16520 && (mode != DImode
16521 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
16522 {
16523 /*
16524 * xorl dest,dest
16525 * cmpl op1,op2
16526 * setcc dest
16527 * lea cf(dest*(ct-cf)),dest
16528 *
16529 * Size 14.
16530 *
16531 * This also catches the degenerate setcc-only case.
16532 */
16533
16534 rtx tmp;
16535 int nops;
16536
16537 out = emit_store_flag (out, code, ix86_compare_op0,
16538 ix86_compare_op1, VOIDmode, 0, 1);
16539
16540 nops = 0;
16541 /* On x86_64 the lea instruction operates on Pmode, so we need
16542 to get arithmetics done in proper mode to match. */
16543 if (diff == 1)
16544 tmp = copy_rtx (out);
16545 else
16546 {
16547 rtx out1;
16548 out1 = copy_rtx (out);
16549 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
16550 nops++;
16551 if (diff & 1)
16552 {
16553 tmp = gen_rtx_PLUS (mode, tmp, out1);
16554 nops++;
16555 }
16556 }
16557 if (cf != 0)
16558 {
16559 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
16560 nops++;
16561 }
16562 if (!rtx_equal_p (tmp, out))
16563 {
16564 if (nops == 1)
16565 out = force_operand (tmp, copy_rtx (out));
16566 else
16567 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
16568 }
16569 if (!rtx_equal_p (out, operands[0]))
16570 emit_move_insn (operands[0], copy_rtx (out));
16571
16572 return 1; /* DONE */
16573 }
16574
16575 /*
16576 * General case: Jumpful:
16577 * xorl dest,dest cmpl op1, op2
16578 * cmpl op1, op2 movl ct, dest
16579 * setcc dest jcc 1f
16580 * decl dest movl cf, dest
16581 * andl (cf-ct),dest 1:
16582 * addl ct,dest
16583 *
16584 * Size 20. Size 14.
16585 *
16586 * This is reasonably steep, but branch mispredict costs are
16587 * high on modern cpus, so consider failing only if optimizing
16588 * for space.
16589 */
16590
16591 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16592 && BRANCH_COST (optimize_insn_for_speed_p (),
16593 false) >= 2)
16594 {
16595 if (cf == 0)
16596 {
16597 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
16598
16599 cf = ct;
16600 ct = 0;
16601
16602 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16603 {
16604 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16605
16606 /* We may be reversing unordered compare to normal compare,
16607 that is not valid in general (we may convert non-trapping
16608 condition to trapping one), however on i386 we currently
16609 emit all comparisons unordered. */
16610 code = reverse_condition_maybe_unordered (code);
16611 }
16612 else
16613 {
16614 code = reverse_condition (code);
16615 if (compare_code != UNKNOWN)
16616 compare_code = reverse_condition (compare_code);
16617 }
16618 }
16619
16620 if (compare_code != UNKNOWN)
16621 {
16622 /* notl op1 (if needed)
16623 sarl $31, op1
16624 andl (cf-ct), op1
16625 addl ct, op1
16626
16627 For x < 0 (resp. x <= -1) there will be no notl,
16628 so if possible swap the constants to get rid of the
16629 complement.
16630 True/false will be -1/0 while code below (store flag
16631 followed by decrement) is 0/-1, so the constants need
16632 to be exchanged once more. */
16633
16634 if (compare_code == GE || !cf)
16635 {
16636 code = reverse_condition (code);
16637 compare_code = LT;
16638 }
16639 else
16640 {
16641 HOST_WIDE_INT tmp = cf;
16642 cf = ct;
16643 ct = tmp;
16644 }
16645
16646 out = emit_store_flag (out, code, ix86_compare_op0,
16647 ix86_compare_op1, VOIDmode, 0, -1);
16648 }
16649 else
16650 {
16651 out = emit_store_flag (out, code, ix86_compare_op0,
16652 ix86_compare_op1, VOIDmode, 0, 1);
16653
16654 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
16655 copy_rtx (out), 1, OPTAB_DIRECT);
16656 }
16657
16658 out = expand_simple_binop (mode, AND, copy_rtx (out),
16659 gen_int_mode (cf - ct, mode),
16660 copy_rtx (out), 1, OPTAB_DIRECT);
16661 if (ct)
16662 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
16663 copy_rtx (out), 1, OPTAB_DIRECT);
16664 if (!rtx_equal_p (out, operands[0]))
16665 emit_move_insn (operands[0], copy_rtx (out));
16666
16667 return 1; /* DONE */
16668 }
16669 }
16670
16671 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16672 {
16673 /* Try a few things more with specific constants and a variable. */
16674
16675 optab op;
16676 rtx var, orig_out, out, tmp;
16677
16678 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
16679 return 0; /* FAIL */
16680
16681 /* If one of the two operands is an interesting constant, load a
16682 constant with the above and mask it in with a logical operation. */
16683
16684 if (CONST_INT_P (operands[2]))
16685 {
16686 var = operands[3];
16687 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
16688 operands[3] = constm1_rtx, op = and_optab;
16689 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
16690 operands[3] = const0_rtx, op = ior_optab;
16691 else
16692 return 0; /* FAIL */
16693 }
16694 else if (CONST_INT_P (operands[3]))
16695 {
16696 var = operands[2];
16697 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
16698 operands[2] = constm1_rtx, op = and_optab;
16699 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
16700 operands[2] = const0_rtx, op = ior_optab;
16701 else
16702 return 0; /* FAIL */
16703 }
16704 else
16705 return 0; /* FAIL */
16706
16707 orig_out = operands[0];
16708 tmp = gen_reg_rtx (mode);
16709 operands[0] = tmp;
16710
16711 /* Recurse to get the constant loaded. */
16712 if (ix86_expand_int_movcc (operands) == 0)
16713 return 0; /* FAIL */
16714
16715 /* Mask in the interesting variable. */
16716 out = expand_binop (mode, op, var, tmp, orig_out, 0,
16717 OPTAB_WIDEN);
16718 if (!rtx_equal_p (out, orig_out))
16719 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
16720
16721 return 1; /* DONE */
16722 }
16723
16724 /*
16725 * For comparison with above,
16726 *
16727 * movl cf,dest
16728 * movl ct,tmp
16729 * cmpl op1,op2
16730 * cmovcc tmp,dest
16731 *
16732 * Size 15.
16733 */
16734
16735 if (! nonimmediate_operand (operands[2], mode))
16736 operands[2] = force_reg (mode, operands[2]);
16737 if (! nonimmediate_operand (operands[3], mode))
16738 operands[3] = force_reg (mode, operands[3]);
16739
16740 if (! register_operand (operands[2], VOIDmode)
16741 && (mode == QImode
16742 || ! register_operand (operands[3], VOIDmode)))
16743 operands[2] = force_reg (mode, operands[2]);
16744
16745 if (mode == QImode
16746 && ! register_operand (operands[3], VOIDmode))
16747 operands[3] = force_reg (mode, operands[3]);
16748
16749 emit_insn (compare_seq);
16750 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16751 gen_rtx_IF_THEN_ELSE (mode,
16752 compare_op, operands[2],
16753 operands[3])));
16754
16755 return 1; /* DONE */
16756 }
16757
16758 /* Swap, force into registers, or otherwise massage the two operands
16759 to an sse comparison with a mask result. Thus we differ a bit from
16760 ix86_prepare_fp_compare_args which expects to produce a flags result.
16761
16762 The DEST operand exists to help determine whether to commute commutative
16763 operators. The POP0/POP1 operands are updated in place. The new
16764 comparison code is returned, or UNKNOWN if not implementable. */
16765
16766 static enum rtx_code
16767 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
16768 rtx *pop0, rtx *pop1)
16769 {
16770 rtx tmp;
16771
16772 switch (code)
16773 {
16774 case LTGT:
16775 case UNEQ:
16776 /* We have no LTGT as an operator. We could implement it with
16777 NE & ORDERED, but this requires an extra temporary. It's
16778 not clear that it's worth it. */
16779 return UNKNOWN;
16780
16781 case LT:
16782 case LE:
16783 case UNGT:
16784 case UNGE:
16785 /* These are supported directly. */
16786 break;
16787
16788 case EQ:
16789 case NE:
16790 case UNORDERED:
16791 case ORDERED:
16792 /* For commutative operators, try to canonicalize the destination
16793 operand to be first in the comparison - this helps reload to
16794 avoid extra moves. */
16795 if (!dest || !rtx_equal_p (dest, *pop1))
16796 break;
16797 /* FALLTHRU */
16798
16799 case GE:
16800 case GT:
16801 case UNLE:
16802 case UNLT:
16803 /* These are not supported directly. Swap the comparison operands
16804 to transform into something that is supported. */
16805 tmp = *pop0;
16806 *pop0 = *pop1;
16807 *pop1 = tmp;
16808 code = swap_condition (code);
16809 break;
16810
16811 default:
16812 gcc_unreachable ();
16813 }
16814
16815 return code;
16816 }
16817
16818 /* Detect conditional moves that exactly match min/max operational
16819 semantics. Note that this is IEEE safe, as long as we don't
16820 interchange the operands.
16821
16822 Returns FALSE if this conditional move doesn't match a MIN/MAX,
16823 and TRUE if the operation is successful and instructions are emitted. */
16824
16825 static bool
16826 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
16827 rtx cmp_op1, rtx if_true, rtx if_false)
16828 {
16829 enum machine_mode mode;
16830 bool is_min;
16831 rtx tmp;
16832
16833 if (code == LT)
16834 ;
16835 else if (code == UNGE)
16836 {
16837 tmp = if_true;
16838 if_true = if_false;
16839 if_false = tmp;
16840 }
16841 else
16842 return false;
16843
16844 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
16845 is_min = true;
16846 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
16847 is_min = false;
16848 else
16849 return false;
16850
16851 mode = GET_MODE (dest);
16852
16853 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
16854 but MODE may be a vector mode and thus not appropriate. */
16855 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
16856 {
16857 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
16858 rtvec v;
16859
16860 if_true = force_reg (mode, if_true);
16861 v = gen_rtvec (2, if_true, if_false);
16862 tmp = gen_rtx_UNSPEC (mode, v, u);
16863 }
16864 else
16865 {
16866 code = is_min ? SMIN : SMAX;
16867 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
16868 }
16869
16870 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
16871 return true;
16872 }
16873
16874 /* Expand an sse vector comparison. Return the register with the result. */
16875
16876 static rtx
16877 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
16878 rtx op_true, rtx op_false)
16879 {
16880 enum machine_mode mode = GET_MODE (dest);
16881 rtx x;
16882
16883 cmp_op0 = force_reg (mode, cmp_op0);
16884 if (!nonimmediate_operand (cmp_op1, mode))
16885 cmp_op1 = force_reg (mode, cmp_op1);
16886
16887 if (optimize
16888 || reg_overlap_mentioned_p (dest, op_true)
16889 || reg_overlap_mentioned_p (dest, op_false))
16890 dest = gen_reg_rtx (mode);
16891
16892 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
16893 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16894
16895 return dest;
16896 }
16897
16898 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
16899 operations. This is used for both scalar and vector conditional moves. */
16900
16901 static void
16902 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
16903 {
16904 enum machine_mode mode = GET_MODE (dest);
16905 rtx t2, t3, x;
16906
16907 if (op_false == CONST0_RTX (mode))
16908 {
16909 op_true = force_reg (mode, op_true);
16910 x = gen_rtx_AND (mode, cmp, op_true);
16911 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16912 }
16913 else if (op_true == CONST0_RTX (mode))
16914 {
16915 op_false = force_reg (mode, op_false);
16916 x = gen_rtx_NOT (mode, cmp);
16917 x = gen_rtx_AND (mode, x, op_false);
16918 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16919 }
16920 else if (TARGET_XOP)
16921 {
16922 rtx pcmov = gen_rtx_SET (mode, dest,
16923 gen_rtx_IF_THEN_ELSE (mode, cmp,
16924 op_true,
16925 op_false));
16926 emit_insn (pcmov);
16927 }
16928 else
16929 {
16930 op_true = force_reg (mode, op_true);
16931 op_false = force_reg (mode, op_false);
16932
16933 t2 = gen_reg_rtx (mode);
16934 if (optimize)
16935 t3 = gen_reg_rtx (mode);
16936 else
16937 t3 = dest;
16938
16939 x = gen_rtx_AND (mode, op_true, cmp);
16940 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
16941
16942 x = gen_rtx_NOT (mode, cmp);
16943 x = gen_rtx_AND (mode, x, op_false);
16944 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
16945
16946 x = gen_rtx_IOR (mode, t3, t2);
16947 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16948 }
16949 }
16950
16951 /* Expand a floating-point conditional move. Return true if successful. */
16952
16953 int
16954 ix86_expand_fp_movcc (rtx operands[])
16955 {
16956 enum machine_mode mode = GET_MODE (operands[0]);
16957 enum rtx_code code = GET_CODE (operands[1]);
16958 rtx tmp, compare_op;
16959
16960 ix86_compare_op0 = XEXP (operands[1], 0);
16961 ix86_compare_op1 = XEXP (operands[1], 1);
16962 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16963 {
16964 enum machine_mode cmode;
16965
16966 /* Since we've no cmove for sse registers, don't force bad register
16967 allocation just to gain access to it. Deny movcc when the
16968 comparison mode doesn't match the move mode. */
16969 cmode = GET_MODE (ix86_compare_op0);
16970 if (cmode == VOIDmode)
16971 cmode = GET_MODE (ix86_compare_op1);
16972 if (cmode != mode)
16973 return 0;
16974
16975 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16976 &ix86_compare_op0,
16977 &ix86_compare_op1);
16978 if (code == UNKNOWN)
16979 return 0;
16980
16981 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
16982 ix86_compare_op1, operands[2],
16983 operands[3]))
16984 return 1;
16985
16986 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
16987 ix86_compare_op1, operands[2], operands[3]);
16988 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
16989 return 1;
16990 }
16991
16992 /* The floating point conditional move instructions don't directly
16993 support conditions resulting from a signed integer comparison. */
16994
16995 compare_op = ix86_expand_compare (code);
16996 if (!fcmov_comparison_operator (compare_op, VOIDmode))
16997 {
16998 tmp = gen_reg_rtx (QImode);
16999 ix86_expand_setcc (code, tmp);
17000 code = NE;
17001 ix86_compare_op0 = tmp;
17002 ix86_compare_op1 = const0_rtx;
17003 compare_op = ix86_expand_compare (code);
17004 }
17005
17006 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17007 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17008 operands[2], operands[3])));
17009
17010 return 1;
17011 }
17012
17013 /* Expand a floating-point vector conditional move; a vcond operation
17014 rather than a movcc operation. */
17015
17016 bool
17017 ix86_expand_fp_vcond (rtx operands[])
17018 {
17019 enum rtx_code code = GET_CODE (operands[3]);
17020 rtx cmp;
17021
17022 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17023 &operands[4], &operands[5]);
17024 if (code == UNKNOWN)
17025 return false;
17026
17027 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17028 operands[5], operands[1], operands[2]))
17029 return true;
17030
17031 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17032 operands[1], operands[2]);
17033 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17034 return true;
17035 }
17036
17037 /* Expand a signed/unsigned integral vector conditional move. */
17038
17039 bool
17040 ix86_expand_int_vcond (rtx operands[])
17041 {
17042 enum machine_mode mode = GET_MODE (operands[0]);
17043 enum rtx_code code = GET_CODE (operands[3]);
17044 bool negate = false;
17045 rtx x, cop0, cop1;
17046
17047 cop0 = operands[4];
17048 cop1 = operands[5];
17049
17050 /* XOP supports all of the comparisons on all vector int types. */
17051 if (!TARGET_XOP)
17052 {
17053 /* Canonicalize the comparison to EQ, GT, GTU. */
17054 switch (code)
17055 {
17056 case EQ:
17057 case GT:
17058 case GTU:
17059 break;
17060
17061 case NE:
17062 case LE:
17063 case LEU:
17064 code = reverse_condition (code);
17065 negate = true;
17066 break;
17067
17068 case GE:
17069 case GEU:
17070 code = reverse_condition (code);
17071 negate = true;
17072 /* FALLTHRU */
17073
17074 case LT:
17075 case LTU:
17076 code = swap_condition (code);
17077 x = cop0, cop0 = cop1, cop1 = x;
17078 break;
17079
17080 default:
17081 gcc_unreachable ();
17082 }
17083
17084 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17085 if (mode == V2DImode)
17086 {
17087 switch (code)
17088 {
17089 case EQ:
17090 /* SSE4.1 supports EQ. */
17091 if (!TARGET_SSE4_1)
17092 return false;
17093 break;
17094
17095 case GT:
17096 case GTU:
17097 /* SSE4.2 supports GT/GTU. */
17098 if (!TARGET_SSE4_2)
17099 return false;
17100 break;
17101
17102 default:
17103 gcc_unreachable ();
17104 }
17105 }
17106
17107 /* Unsigned parallel compare is not supported by the hardware.
17108 Play some tricks to turn this into a signed comparison
17109 against 0. */
17110 if (code == GTU)
17111 {
17112 cop0 = force_reg (mode, cop0);
17113
17114 switch (mode)
17115 {
17116 case V4SImode:
17117 case V2DImode:
17118 {
17119 rtx t1, t2, mask;
17120 rtx (*gen_sub3) (rtx, rtx, rtx);
17121
17122 /* Subtract (-(INT MAX) - 1) from both operands to make
17123 them signed. */
17124 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
17125 true, false);
17126 gen_sub3 = (mode == V4SImode
17127 ? gen_subv4si3 : gen_subv2di3);
17128 t1 = gen_reg_rtx (mode);
17129 emit_insn (gen_sub3 (t1, cop0, mask));
17130
17131 t2 = gen_reg_rtx (mode);
17132 emit_insn (gen_sub3 (t2, cop1, mask));
17133
17134 cop0 = t1;
17135 cop1 = t2;
17136 code = GT;
17137 }
17138 break;
17139
17140 case V16QImode:
17141 case V8HImode:
17142 /* Perform a parallel unsigned saturating subtraction. */
17143 x = gen_reg_rtx (mode);
17144 emit_insn (gen_rtx_SET (VOIDmode, x,
17145 gen_rtx_US_MINUS (mode, cop0, cop1)));
17146
17147 cop0 = x;
17148 cop1 = CONST0_RTX (mode);
17149 code = EQ;
17150 negate = !negate;
17151 break;
17152
17153 default:
17154 gcc_unreachable ();
17155 }
17156 }
17157 }
17158
17159 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17160 operands[1+negate], operands[2-negate]);
17161
17162 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17163 operands[2-negate]);
17164 return true;
17165 }
17166
17167 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17168 true if we should do zero extension, else sign extension. HIGH_P is
17169 true if we want the N/2 high elements, else the low elements. */
17170
17171 void
17172 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17173 {
17174 enum machine_mode imode = GET_MODE (operands[1]);
17175 rtx (*unpack)(rtx, rtx, rtx);
17176 rtx se, dest;
17177
17178 switch (imode)
17179 {
17180 case V16QImode:
17181 if (high_p)
17182 unpack = gen_vec_interleave_highv16qi;
17183 else
17184 unpack = gen_vec_interleave_lowv16qi;
17185 break;
17186 case V8HImode:
17187 if (high_p)
17188 unpack = gen_vec_interleave_highv8hi;
17189 else
17190 unpack = gen_vec_interleave_lowv8hi;
17191 break;
17192 case V4SImode:
17193 if (high_p)
17194 unpack = gen_vec_interleave_highv4si;
17195 else
17196 unpack = gen_vec_interleave_lowv4si;
17197 break;
17198 default:
17199 gcc_unreachable ();
17200 }
17201
17202 dest = gen_lowpart (imode, operands[0]);
17203
17204 if (unsigned_p)
17205 se = force_reg (imode, CONST0_RTX (imode));
17206 else
17207 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17208 operands[1], pc_rtx, pc_rtx);
17209
17210 emit_insn (unpack (dest, operands[1], se));
17211 }
17212
17213 /* This function performs the same task as ix86_expand_sse_unpack,
17214 but with SSE4.1 instructions. */
17215
17216 void
17217 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17218 {
17219 enum machine_mode imode = GET_MODE (operands[1]);
17220 rtx (*unpack)(rtx, rtx);
17221 rtx src, dest;
17222
17223 switch (imode)
17224 {
17225 case V16QImode:
17226 if (unsigned_p)
17227 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
17228 else
17229 unpack = gen_sse4_1_extendv8qiv8hi2;
17230 break;
17231 case V8HImode:
17232 if (unsigned_p)
17233 unpack = gen_sse4_1_zero_extendv4hiv4si2;
17234 else
17235 unpack = gen_sse4_1_extendv4hiv4si2;
17236 break;
17237 case V4SImode:
17238 if (unsigned_p)
17239 unpack = gen_sse4_1_zero_extendv2siv2di2;
17240 else
17241 unpack = gen_sse4_1_extendv2siv2di2;
17242 break;
17243 default:
17244 gcc_unreachable ();
17245 }
17246
17247 dest = operands[0];
17248 if (high_p)
17249 {
17250 /* Shift higher 8 bytes to lower 8 bytes. */
17251 src = gen_reg_rtx (imode);
17252 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
17253 gen_lowpart (V1TImode, operands[1]),
17254 GEN_INT (64)));
17255 }
17256 else
17257 src = operands[1];
17258
17259 emit_insn (unpack (dest, src));
17260 }
17261
17262 /* Expand conditional increment or decrement using adb/sbb instructions.
17263 The default case using setcc followed by the conditional move can be
17264 done by generic code. */
17265 int
17266 ix86_expand_int_addcc (rtx operands[])
17267 {
17268 enum rtx_code code = GET_CODE (operands[1]);
17269 rtx flags;
17270 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17271 rtx compare_op;
17272 rtx val = const0_rtx;
17273 bool fpcmp = false;
17274 enum machine_mode mode;
17275
17276 ix86_compare_op0 = XEXP (operands[1], 0);
17277 ix86_compare_op1 = XEXP (operands[1], 1);
17278 if (operands[3] != const1_rtx
17279 && operands[3] != constm1_rtx)
17280 return 0;
17281 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
17282 ix86_compare_op1, &compare_op))
17283 return 0;
17284 code = GET_CODE (compare_op);
17285
17286 flags = XEXP (compare_op, 0);
17287
17288 if (GET_MODE (flags) == CCFPmode
17289 || GET_MODE (flags) == CCFPUmode)
17290 {
17291 fpcmp = true;
17292 code = ix86_fp_compare_code_to_integer (code);
17293 }
17294
17295 if (code != LTU)
17296 {
17297 val = constm1_rtx;
17298 if (fpcmp)
17299 PUT_CODE (compare_op,
17300 reverse_condition_maybe_unordered
17301 (GET_CODE (compare_op)));
17302 else
17303 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
17304 }
17305
17306 mode = GET_MODE (operands[0]);
17307
17308 /* Construct either adc or sbb insn. */
17309 if ((code == LTU) == (operands[3] == constm1_rtx))
17310 {
17311 switch (mode)
17312 {
17313 case QImode:
17314 insn = gen_subqi3_carry;
17315 break;
17316 case HImode:
17317 insn = gen_subhi3_carry;
17318 break;
17319 case SImode:
17320 insn = gen_subsi3_carry;
17321 break;
17322 case DImode:
17323 insn = gen_subdi3_carry;
17324 break;
17325 default:
17326 gcc_unreachable ();
17327 }
17328 }
17329 else
17330 {
17331 switch (mode)
17332 {
17333 case QImode:
17334 insn = gen_addqi3_carry;
17335 break;
17336 case HImode:
17337 insn = gen_addhi3_carry;
17338 break;
17339 case SImode:
17340 insn = gen_addsi3_carry;
17341 break;
17342 case DImode:
17343 insn = gen_adddi3_carry;
17344 break;
17345 default:
17346 gcc_unreachable ();
17347 }
17348 }
17349 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
17350
17351 return 1; /* DONE */
17352 }
17353
17354
17355 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
17356 works for floating pointer parameters and nonoffsetable memories.
17357 For pushes, it returns just stack offsets; the values will be saved
17358 in the right order. Maximally three parts are generated. */
17359
17360 static int
17361 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
17362 {
17363 int size;
17364
17365 if (!TARGET_64BIT)
17366 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
17367 else
17368 size = (GET_MODE_SIZE (mode) + 4) / 8;
17369
17370 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
17371 gcc_assert (size >= 2 && size <= 4);
17372
17373 /* Optimize constant pool reference to immediates. This is used by fp
17374 moves, that force all constants to memory to allow combining. */
17375 if (MEM_P (operand) && MEM_READONLY_P (operand))
17376 {
17377 rtx tmp = maybe_get_pool_constant (operand);
17378 if (tmp)
17379 operand = tmp;
17380 }
17381
17382 if (MEM_P (operand) && !offsettable_memref_p (operand))
17383 {
17384 /* The only non-offsetable memories we handle are pushes. */
17385 int ok = push_operand (operand, VOIDmode);
17386
17387 gcc_assert (ok);
17388
17389 operand = copy_rtx (operand);
17390 PUT_MODE (operand, Pmode);
17391 parts[0] = parts[1] = parts[2] = parts[3] = operand;
17392 return size;
17393 }
17394
17395 if (GET_CODE (operand) == CONST_VECTOR)
17396 {
17397 enum machine_mode imode = int_mode_for_mode (mode);
17398 /* Caution: if we looked through a constant pool memory above,
17399 the operand may actually have a different mode now. That's
17400 ok, since we want to pun this all the way back to an integer. */
17401 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
17402 gcc_assert (operand != NULL);
17403 mode = imode;
17404 }
17405
17406 if (!TARGET_64BIT)
17407 {
17408 if (mode == DImode)
17409 split_di (&operand, 1, &parts[0], &parts[1]);
17410 else
17411 {
17412 int i;
17413
17414 if (REG_P (operand))
17415 {
17416 gcc_assert (reload_completed);
17417 for (i = 0; i < size; i++)
17418 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
17419 }
17420 else if (offsettable_memref_p (operand))
17421 {
17422 operand = adjust_address (operand, SImode, 0);
17423 parts[0] = operand;
17424 for (i = 1; i < size; i++)
17425 parts[i] = adjust_address (operand, SImode, 4 * i);
17426 }
17427 else if (GET_CODE (operand) == CONST_DOUBLE)
17428 {
17429 REAL_VALUE_TYPE r;
17430 long l[4];
17431
17432 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17433 switch (mode)
17434 {
17435 case TFmode:
17436 real_to_target (l, &r, mode);
17437 parts[3] = gen_int_mode (l[3], SImode);
17438 parts[2] = gen_int_mode (l[2], SImode);
17439 break;
17440 case XFmode:
17441 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
17442 parts[2] = gen_int_mode (l[2], SImode);
17443 break;
17444 case DFmode:
17445 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
17446 break;
17447 default:
17448 gcc_unreachable ();
17449 }
17450 parts[1] = gen_int_mode (l[1], SImode);
17451 parts[0] = gen_int_mode (l[0], SImode);
17452 }
17453 else
17454 gcc_unreachable ();
17455 }
17456 }
17457 else
17458 {
17459 if (mode == TImode)
17460 split_ti (&operand, 1, &parts[0], &parts[1]);
17461 if (mode == XFmode || mode == TFmode)
17462 {
17463 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
17464 if (REG_P (operand))
17465 {
17466 gcc_assert (reload_completed);
17467 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
17468 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
17469 }
17470 else if (offsettable_memref_p (operand))
17471 {
17472 operand = adjust_address (operand, DImode, 0);
17473 parts[0] = operand;
17474 parts[1] = adjust_address (operand, upper_mode, 8);
17475 }
17476 else if (GET_CODE (operand) == CONST_DOUBLE)
17477 {
17478 REAL_VALUE_TYPE r;
17479 long l[4];
17480
17481 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17482 real_to_target (l, &r, mode);
17483
17484 /* Do not use shift by 32 to avoid warning on 32bit systems. */
17485 if (HOST_BITS_PER_WIDE_INT >= 64)
17486 parts[0]
17487 = gen_int_mode
17488 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
17489 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
17490 DImode);
17491 else
17492 parts[0] = immed_double_const (l[0], l[1], DImode);
17493
17494 if (upper_mode == SImode)
17495 parts[1] = gen_int_mode (l[2], SImode);
17496 else if (HOST_BITS_PER_WIDE_INT >= 64)
17497 parts[1]
17498 = gen_int_mode
17499 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
17500 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
17501 DImode);
17502 else
17503 parts[1] = immed_double_const (l[2], l[3], DImode);
17504 }
17505 else
17506 gcc_unreachable ();
17507 }
17508 }
17509
17510 return size;
17511 }
17512
17513 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
17514 Return false when normal moves are needed; true when all required
17515 insns have been emitted. Operands 2-4 contain the input values
17516 int the correct order; operands 5-7 contain the output values. */
17517
17518 void
17519 ix86_split_long_move (rtx operands[])
17520 {
17521 rtx part[2][4];
17522 int nparts, i, j;
17523 int push = 0;
17524 int collisions = 0;
17525 enum machine_mode mode = GET_MODE (operands[0]);
17526 bool collisionparts[4];
17527
17528 /* The DFmode expanders may ask us to move double.
17529 For 64bit target this is single move. By hiding the fact
17530 here we simplify i386.md splitters. */
17531 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
17532 {
17533 /* Optimize constant pool reference to immediates. This is used by
17534 fp moves, that force all constants to memory to allow combining. */
17535
17536 if (MEM_P (operands[1])
17537 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
17538 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
17539 operands[1] = get_pool_constant (XEXP (operands[1], 0));
17540 if (push_operand (operands[0], VOIDmode))
17541 {
17542 operands[0] = copy_rtx (operands[0]);
17543 PUT_MODE (operands[0], Pmode);
17544 }
17545 else
17546 operands[0] = gen_lowpart (DImode, operands[0]);
17547 operands[1] = gen_lowpart (DImode, operands[1]);
17548 emit_move_insn (operands[0], operands[1]);
17549 return;
17550 }
17551
17552 /* The only non-offsettable memory we handle is push. */
17553 if (push_operand (operands[0], VOIDmode))
17554 push = 1;
17555 else
17556 gcc_assert (!MEM_P (operands[0])
17557 || offsettable_memref_p (operands[0]));
17558
17559 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
17560 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
17561
17562 /* When emitting push, take care for source operands on the stack. */
17563 if (push && MEM_P (operands[1])
17564 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
17565 {
17566 rtx src_base = XEXP (part[1][nparts - 1], 0);
17567
17568 /* Compensate for the stack decrement by 4. */
17569 if (!TARGET_64BIT && nparts == 3
17570 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
17571 src_base = plus_constant (src_base, 4);
17572
17573 /* src_base refers to the stack pointer and is
17574 automatically decreased by emitted push. */
17575 for (i = 0; i < nparts; i++)
17576 part[1][i] = change_address (part[1][i],
17577 GET_MODE (part[1][i]), src_base);
17578 }
17579
17580 /* We need to do copy in the right order in case an address register
17581 of the source overlaps the destination. */
17582 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
17583 {
17584 rtx tmp;
17585
17586 for (i = 0; i < nparts; i++)
17587 {
17588 collisionparts[i]
17589 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
17590 if (collisionparts[i])
17591 collisions++;
17592 }
17593
17594 /* Collision in the middle part can be handled by reordering. */
17595 if (collisions == 1 && nparts == 3 && collisionparts [1])
17596 {
17597 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17598 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17599 }
17600 else if (collisions == 1
17601 && nparts == 4
17602 && (collisionparts [1] || collisionparts [2]))
17603 {
17604 if (collisionparts [1])
17605 {
17606 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17607 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17608 }
17609 else
17610 {
17611 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
17612 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
17613 }
17614 }
17615
17616 /* If there are more collisions, we can't handle it by reordering.
17617 Do an lea to the last part and use only one colliding move. */
17618 else if (collisions > 1)
17619 {
17620 rtx base;
17621
17622 collisions = 1;
17623
17624 base = part[0][nparts - 1];
17625
17626 /* Handle the case when the last part isn't valid for lea.
17627 Happens in 64-bit mode storing the 12-byte XFmode. */
17628 if (GET_MODE (base) != Pmode)
17629 base = gen_rtx_REG (Pmode, REGNO (base));
17630
17631 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
17632 part[1][0] = replace_equiv_address (part[1][0], base);
17633 for (i = 1; i < nparts; i++)
17634 {
17635 tmp = plus_constant (base, UNITS_PER_WORD * i);
17636 part[1][i] = replace_equiv_address (part[1][i], tmp);
17637 }
17638 }
17639 }
17640
17641 if (push)
17642 {
17643 if (!TARGET_64BIT)
17644 {
17645 if (nparts == 3)
17646 {
17647 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
17648 emit_insn (gen_addsi3 (stack_pointer_rtx,
17649 stack_pointer_rtx, GEN_INT (-4)));
17650 emit_move_insn (part[0][2], part[1][2]);
17651 }
17652 else if (nparts == 4)
17653 {
17654 emit_move_insn (part[0][3], part[1][3]);
17655 emit_move_insn (part[0][2], part[1][2]);
17656 }
17657 }
17658 else
17659 {
17660 /* In 64bit mode we don't have 32bit push available. In case this is
17661 register, it is OK - we will just use larger counterpart. We also
17662 retype memory - these comes from attempt to avoid REX prefix on
17663 moving of second half of TFmode value. */
17664 if (GET_MODE (part[1][1]) == SImode)
17665 {
17666 switch (GET_CODE (part[1][1]))
17667 {
17668 case MEM:
17669 part[1][1] = adjust_address (part[1][1], DImode, 0);
17670 break;
17671
17672 case REG:
17673 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
17674 break;
17675
17676 default:
17677 gcc_unreachable ();
17678 }
17679
17680 if (GET_MODE (part[1][0]) == SImode)
17681 part[1][0] = part[1][1];
17682 }
17683 }
17684 emit_move_insn (part[0][1], part[1][1]);
17685 emit_move_insn (part[0][0], part[1][0]);
17686 return;
17687 }
17688
17689 /* Choose correct order to not overwrite the source before it is copied. */
17690 if ((REG_P (part[0][0])
17691 && REG_P (part[1][1])
17692 && (REGNO (part[0][0]) == REGNO (part[1][1])
17693 || (nparts == 3
17694 && REGNO (part[0][0]) == REGNO (part[1][2]))
17695 || (nparts == 4
17696 && REGNO (part[0][0]) == REGNO (part[1][3]))))
17697 || (collisions > 0
17698 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
17699 {
17700 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
17701 {
17702 operands[2 + i] = part[0][j];
17703 operands[6 + i] = part[1][j];
17704 }
17705 }
17706 else
17707 {
17708 for (i = 0; i < nparts; i++)
17709 {
17710 operands[2 + i] = part[0][i];
17711 operands[6 + i] = part[1][i];
17712 }
17713 }
17714
17715 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
17716 if (optimize_insn_for_size_p ())
17717 {
17718 for (j = 0; j < nparts - 1; j++)
17719 if (CONST_INT_P (operands[6 + j])
17720 && operands[6 + j] != const0_rtx
17721 && REG_P (operands[2 + j]))
17722 for (i = j; i < nparts - 1; i++)
17723 if (CONST_INT_P (operands[7 + i])
17724 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
17725 operands[7 + i] = operands[2 + j];
17726 }
17727
17728 for (i = 0; i < nparts; i++)
17729 emit_move_insn (operands[2 + i], operands[6 + i]);
17730
17731 return;
17732 }
17733
17734 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
17735 left shift by a constant, either using a single shift or
17736 a sequence of add instructions. */
17737
17738 static void
17739 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
17740 {
17741 if (count == 1)
17742 {
17743 emit_insn ((mode == DImode
17744 ? gen_addsi3
17745 : gen_adddi3) (operand, operand, operand));
17746 }
17747 else if (!optimize_insn_for_size_p ()
17748 && count * ix86_cost->add <= ix86_cost->shift_const)
17749 {
17750 int i;
17751 for (i=0; i<count; i++)
17752 {
17753 emit_insn ((mode == DImode
17754 ? gen_addsi3
17755 : gen_adddi3) (operand, operand, operand));
17756 }
17757 }
17758 else
17759 emit_insn ((mode == DImode
17760 ? gen_ashlsi3
17761 : gen_ashldi3) (operand, operand, GEN_INT (count)));
17762 }
17763
17764 void
17765 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
17766 {
17767 rtx low[2], high[2];
17768 int count;
17769 const int single_width = mode == DImode ? 32 : 64;
17770
17771 if (CONST_INT_P (operands[2]))
17772 {
17773 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17774 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17775
17776 if (count >= single_width)
17777 {
17778 emit_move_insn (high[0], low[1]);
17779 emit_move_insn (low[0], const0_rtx);
17780
17781 if (count > single_width)
17782 ix86_expand_ashl_const (high[0], count - single_width, mode);
17783 }
17784 else
17785 {
17786 if (!rtx_equal_p (operands[0], operands[1]))
17787 emit_move_insn (operands[0], operands[1]);
17788 emit_insn ((mode == DImode
17789 ? gen_x86_shld
17790 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
17791 ix86_expand_ashl_const (low[0], count, mode);
17792 }
17793 return;
17794 }
17795
17796 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17797
17798 if (operands[1] == const1_rtx)
17799 {
17800 /* Assuming we've chosen a QImode capable registers, then 1 << N
17801 can be done with two 32/64-bit shifts, no branches, no cmoves. */
17802 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
17803 {
17804 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
17805
17806 ix86_expand_clear (low[0]);
17807 ix86_expand_clear (high[0]);
17808 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
17809
17810 d = gen_lowpart (QImode, low[0]);
17811 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17812 s = gen_rtx_EQ (QImode, flags, const0_rtx);
17813 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17814
17815 d = gen_lowpart (QImode, high[0]);
17816 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17817 s = gen_rtx_NE (QImode, flags, const0_rtx);
17818 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17819 }
17820
17821 /* Otherwise, we can get the same results by manually performing
17822 a bit extract operation on bit 5/6, and then performing the two
17823 shifts. The two methods of getting 0/1 into low/high are exactly
17824 the same size. Avoiding the shift in the bit extract case helps
17825 pentium4 a bit; no one else seems to care much either way. */
17826 else
17827 {
17828 rtx x;
17829
17830 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
17831 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
17832 else
17833 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
17834 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
17835
17836 emit_insn ((mode == DImode
17837 ? gen_lshrsi3
17838 : gen_lshrdi3) (high[0], high[0],
17839 GEN_INT (mode == DImode ? 5 : 6)));
17840 emit_insn ((mode == DImode
17841 ? gen_andsi3
17842 : gen_anddi3) (high[0], high[0], const1_rtx));
17843 emit_move_insn (low[0], high[0]);
17844 emit_insn ((mode == DImode
17845 ? gen_xorsi3
17846 : gen_xordi3) (low[0], low[0], const1_rtx));
17847 }
17848
17849 emit_insn ((mode == DImode
17850 ? gen_ashlsi3
17851 : gen_ashldi3) (low[0], low[0], operands[2]));
17852 emit_insn ((mode == DImode
17853 ? gen_ashlsi3
17854 : gen_ashldi3) (high[0], high[0], operands[2]));
17855 return;
17856 }
17857
17858 if (operands[1] == constm1_rtx)
17859 {
17860 /* For -1 << N, we can avoid the shld instruction, because we
17861 know that we're shifting 0...31/63 ones into a -1. */
17862 emit_move_insn (low[0], constm1_rtx);
17863 if (optimize_insn_for_size_p ())
17864 emit_move_insn (high[0], low[0]);
17865 else
17866 emit_move_insn (high[0], constm1_rtx);
17867 }
17868 else
17869 {
17870 if (!rtx_equal_p (operands[0], operands[1]))
17871 emit_move_insn (operands[0], operands[1]);
17872
17873 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17874 emit_insn ((mode == DImode
17875 ? gen_x86_shld
17876 : gen_x86_64_shld) (high[0], low[0], operands[2]));
17877 }
17878
17879 emit_insn ((mode == DImode
17880 ? gen_ashlsi3
17881 : gen_ashldi3) (low[0], low[0], operands[2]));
17882
17883 if (TARGET_CMOVE && scratch)
17884 {
17885 ix86_expand_clear (scratch);
17886 emit_insn ((mode == DImode
17887 ? gen_x86_shiftsi_adj_1
17888 : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2],
17889 scratch));
17890 }
17891 else
17892 emit_insn ((mode == DImode
17893 ? gen_x86_shiftsi_adj_2
17894 : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2]));
17895 }
17896
17897 void
17898 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17899 {
17900 rtx low[2], high[2];
17901 int count;
17902 const int single_width = mode == DImode ? 32 : 64;
17903
17904 if (CONST_INT_P (operands[2]))
17905 {
17906 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17907 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17908
17909 if (count == single_width * 2 - 1)
17910 {
17911 emit_move_insn (high[0], high[1]);
17912 emit_insn ((mode == DImode
17913 ? gen_ashrsi3
17914 : gen_ashrdi3) (high[0], high[0],
17915 GEN_INT (single_width - 1)));
17916 emit_move_insn (low[0], high[0]);
17917
17918 }
17919 else if (count >= single_width)
17920 {
17921 emit_move_insn (low[0], high[1]);
17922 emit_move_insn (high[0], low[0]);
17923 emit_insn ((mode == DImode
17924 ? gen_ashrsi3
17925 : gen_ashrdi3) (high[0], high[0],
17926 GEN_INT (single_width - 1)));
17927 if (count > single_width)
17928 emit_insn ((mode == DImode
17929 ? gen_ashrsi3
17930 : gen_ashrdi3) (low[0], low[0],
17931 GEN_INT (count - single_width)));
17932 }
17933 else
17934 {
17935 if (!rtx_equal_p (operands[0], operands[1]))
17936 emit_move_insn (operands[0], operands[1]);
17937 emit_insn ((mode == DImode
17938 ? gen_x86_shrd
17939 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17940 emit_insn ((mode == DImode
17941 ? gen_ashrsi3
17942 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17943 }
17944 }
17945 else
17946 {
17947 if (!rtx_equal_p (operands[0], operands[1]))
17948 emit_move_insn (operands[0], operands[1]);
17949
17950 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17951
17952 emit_insn ((mode == DImode
17953 ? gen_x86_shrd
17954 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17955 emit_insn ((mode == DImode
17956 ? gen_ashrsi3
17957 : gen_ashrdi3) (high[0], high[0], operands[2]));
17958
17959 if (TARGET_CMOVE && scratch)
17960 {
17961 emit_move_insn (scratch, high[0]);
17962 emit_insn ((mode == DImode
17963 ? gen_ashrsi3
17964 : gen_ashrdi3) (scratch, scratch,
17965 GEN_INT (single_width - 1)));
17966 emit_insn ((mode == DImode
17967 ? gen_x86_shiftsi_adj_1
17968 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
17969 scratch));
17970 }
17971 else
17972 emit_insn ((mode == DImode
17973 ? gen_x86_shiftsi_adj_3
17974 : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2]));
17975 }
17976 }
17977
17978 void
17979 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17980 {
17981 rtx low[2], high[2];
17982 int count;
17983 const int single_width = mode == DImode ? 32 : 64;
17984
17985 if (CONST_INT_P (operands[2]))
17986 {
17987 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17988 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17989
17990 if (count >= single_width)
17991 {
17992 emit_move_insn (low[0], high[1]);
17993 ix86_expand_clear (high[0]);
17994
17995 if (count > single_width)
17996 emit_insn ((mode == DImode
17997 ? gen_lshrsi3
17998 : gen_lshrdi3) (low[0], low[0],
17999 GEN_INT (count - single_width)));
18000 }
18001 else
18002 {
18003 if (!rtx_equal_p (operands[0], operands[1]))
18004 emit_move_insn (operands[0], operands[1]);
18005 emit_insn ((mode == DImode
18006 ? gen_x86_shrd
18007 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18008 emit_insn ((mode == DImode
18009 ? gen_lshrsi3
18010 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
18011 }
18012 }
18013 else
18014 {
18015 if (!rtx_equal_p (operands[0], operands[1]))
18016 emit_move_insn (operands[0], operands[1]);
18017
18018 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18019
18020 emit_insn ((mode == DImode
18021 ? gen_x86_shrd
18022 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18023 emit_insn ((mode == DImode
18024 ? gen_lshrsi3
18025 : gen_lshrdi3) (high[0], high[0], operands[2]));
18026
18027 /* Heh. By reversing the arguments, we can reuse this pattern. */
18028 if (TARGET_CMOVE && scratch)
18029 {
18030 ix86_expand_clear (scratch);
18031 emit_insn ((mode == DImode
18032 ? gen_x86_shiftsi_adj_1
18033 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18034 scratch));
18035 }
18036 else
18037 emit_insn ((mode == DImode
18038 ? gen_x86_shiftsi_adj_2
18039 : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2]));
18040 }
18041 }
18042
18043 /* Predict just emitted jump instruction to be taken with probability PROB. */
18044 static void
18045 predict_jump (int prob)
18046 {
18047 rtx insn = get_last_insn ();
18048 gcc_assert (JUMP_P (insn));
18049 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18050 }
18051
18052 /* Helper function for the string operations below. Dest VARIABLE whether
18053 it is aligned to VALUE bytes. If true, jump to the label. */
18054 static rtx
18055 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18056 {
18057 rtx label = gen_label_rtx ();
18058 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18059 if (GET_MODE (variable) == DImode)
18060 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18061 else
18062 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18063 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18064 1, label);
18065 if (epilogue)
18066 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18067 else
18068 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18069 return label;
18070 }
18071
18072 /* Adjust COUNTER by the VALUE. */
18073 static void
18074 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18075 {
18076 if (GET_MODE (countreg) == DImode)
18077 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
18078 else
18079 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
18080 }
18081
18082 /* Zero extend possibly SImode EXP to Pmode register. */
18083 rtx
18084 ix86_zero_extend_to_Pmode (rtx exp)
18085 {
18086 rtx r;
18087 if (GET_MODE (exp) == VOIDmode)
18088 return force_reg (Pmode, exp);
18089 if (GET_MODE (exp) == Pmode)
18090 return copy_to_mode_reg (Pmode, exp);
18091 r = gen_reg_rtx (Pmode);
18092 emit_insn (gen_zero_extendsidi2 (r, exp));
18093 return r;
18094 }
18095
18096 /* Divide COUNTREG by SCALE. */
18097 static rtx
18098 scale_counter (rtx countreg, int scale)
18099 {
18100 rtx sc;
18101
18102 if (scale == 1)
18103 return countreg;
18104 if (CONST_INT_P (countreg))
18105 return GEN_INT (INTVAL (countreg) / scale);
18106 gcc_assert (REG_P (countreg));
18107
18108 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18109 GEN_INT (exact_log2 (scale)),
18110 NULL, 1, OPTAB_DIRECT);
18111 return sc;
18112 }
18113
18114 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18115 DImode for constant loop counts. */
18116
18117 static enum machine_mode
18118 counter_mode (rtx count_exp)
18119 {
18120 if (GET_MODE (count_exp) != VOIDmode)
18121 return GET_MODE (count_exp);
18122 if (!CONST_INT_P (count_exp))
18123 return Pmode;
18124 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18125 return DImode;
18126 return SImode;
18127 }
18128
18129 /* When SRCPTR is non-NULL, output simple loop to move memory
18130 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18131 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18132 equivalent loop to set memory by VALUE (supposed to be in MODE).
18133
18134 The size is rounded down to whole number of chunk size moved at once.
18135 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18136
18137
18138 static void
18139 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18140 rtx destptr, rtx srcptr, rtx value,
18141 rtx count, enum machine_mode mode, int unroll,
18142 int expected_size)
18143 {
18144 rtx out_label, top_label, iter, tmp;
18145 enum machine_mode iter_mode = counter_mode (count);
18146 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18147 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18148 rtx size;
18149 rtx x_addr;
18150 rtx y_addr;
18151 int i;
18152
18153 top_label = gen_label_rtx ();
18154 out_label = gen_label_rtx ();
18155 iter = gen_reg_rtx (iter_mode);
18156
18157 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18158 NULL, 1, OPTAB_DIRECT);
18159 /* Those two should combine. */
18160 if (piece_size == const1_rtx)
18161 {
18162 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18163 true, out_label);
18164 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18165 }
18166 emit_move_insn (iter, const0_rtx);
18167
18168 emit_label (top_label);
18169
18170 tmp = convert_modes (Pmode, iter_mode, iter, true);
18171 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18172 destmem = change_address (destmem, mode, x_addr);
18173
18174 if (srcmem)
18175 {
18176 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18177 srcmem = change_address (srcmem, mode, y_addr);
18178
18179 /* When unrolling for chips that reorder memory reads and writes,
18180 we can save registers by using single temporary.
18181 Also using 4 temporaries is overkill in 32bit mode. */
18182 if (!TARGET_64BIT && 0)
18183 {
18184 for (i = 0; i < unroll; i++)
18185 {
18186 if (i)
18187 {
18188 destmem =
18189 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18190 srcmem =
18191 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18192 }
18193 emit_move_insn (destmem, srcmem);
18194 }
18195 }
18196 else
18197 {
18198 rtx tmpreg[4];
18199 gcc_assert (unroll <= 4);
18200 for (i = 0; i < unroll; i++)
18201 {
18202 tmpreg[i] = gen_reg_rtx (mode);
18203 if (i)
18204 {
18205 srcmem =
18206 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18207 }
18208 emit_move_insn (tmpreg[i], srcmem);
18209 }
18210 for (i = 0; i < unroll; i++)
18211 {
18212 if (i)
18213 {
18214 destmem =
18215 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18216 }
18217 emit_move_insn (destmem, tmpreg[i]);
18218 }
18219 }
18220 }
18221 else
18222 for (i = 0; i < unroll; i++)
18223 {
18224 if (i)
18225 destmem =
18226 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18227 emit_move_insn (destmem, value);
18228 }
18229
18230 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
18231 true, OPTAB_LIB_WIDEN);
18232 if (tmp != iter)
18233 emit_move_insn (iter, tmp);
18234
18235 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
18236 true, top_label);
18237 if (expected_size != -1)
18238 {
18239 expected_size /= GET_MODE_SIZE (mode) * unroll;
18240 if (expected_size == 0)
18241 predict_jump (0);
18242 else if (expected_size > REG_BR_PROB_BASE)
18243 predict_jump (REG_BR_PROB_BASE - 1);
18244 else
18245 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
18246 }
18247 else
18248 predict_jump (REG_BR_PROB_BASE * 80 / 100);
18249 iter = ix86_zero_extend_to_Pmode (iter);
18250 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
18251 true, OPTAB_LIB_WIDEN);
18252 if (tmp != destptr)
18253 emit_move_insn (destptr, tmp);
18254 if (srcptr)
18255 {
18256 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
18257 true, OPTAB_LIB_WIDEN);
18258 if (tmp != srcptr)
18259 emit_move_insn (srcptr, tmp);
18260 }
18261 emit_label (out_label);
18262 }
18263
18264 /* Output "rep; mov" instruction.
18265 Arguments have same meaning as for previous function */
18266 static void
18267 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
18268 rtx destptr, rtx srcptr,
18269 rtx count,
18270 enum machine_mode mode)
18271 {
18272 rtx destexp;
18273 rtx srcexp;
18274 rtx countreg;
18275
18276 /* If the size is known, it is shorter to use rep movs. */
18277 if (mode == QImode && CONST_INT_P (count)
18278 && !(INTVAL (count) & 3))
18279 mode = SImode;
18280
18281 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18282 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18283 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
18284 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
18285 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18286 if (mode != QImode)
18287 {
18288 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18289 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18290 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18291 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
18292 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18293 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
18294 }
18295 else
18296 {
18297 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18298 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
18299 }
18300 if (CONST_INT_P (count))
18301 {
18302 count = GEN_INT (INTVAL (count)
18303 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18304 destmem = shallow_copy_rtx (destmem);
18305 srcmem = shallow_copy_rtx (srcmem);
18306 set_mem_size (destmem, count);
18307 set_mem_size (srcmem, count);
18308 }
18309 else
18310 {
18311 if (MEM_SIZE (destmem))
18312 set_mem_size (destmem, NULL_RTX);
18313 if (MEM_SIZE (srcmem))
18314 set_mem_size (srcmem, NULL_RTX);
18315 }
18316 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
18317 destexp, srcexp));
18318 }
18319
18320 /* Output "rep; stos" instruction.
18321 Arguments have same meaning as for previous function */
18322 static void
18323 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
18324 rtx count, enum machine_mode mode,
18325 rtx orig_value)
18326 {
18327 rtx destexp;
18328 rtx countreg;
18329
18330 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18331 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18332 value = force_reg (mode, gen_lowpart (mode, value));
18333 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18334 if (mode != QImode)
18335 {
18336 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18337 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18338 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18339 }
18340 else
18341 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18342 if (orig_value == const0_rtx && CONST_INT_P (count))
18343 {
18344 count = GEN_INT (INTVAL (count)
18345 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18346 destmem = shallow_copy_rtx (destmem);
18347 set_mem_size (destmem, count);
18348 }
18349 else if (MEM_SIZE (destmem))
18350 set_mem_size (destmem, NULL_RTX);
18351 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
18352 }
18353
18354 static void
18355 emit_strmov (rtx destmem, rtx srcmem,
18356 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
18357 {
18358 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
18359 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
18360 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18361 }
18362
18363 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
18364 static void
18365 expand_movmem_epilogue (rtx destmem, rtx srcmem,
18366 rtx destptr, rtx srcptr, rtx count, int max_size)
18367 {
18368 rtx src, dest;
18369 if (CONST_INT_P (count))
18370 {
18371 HOST_WIDE_INT countval = INTVAL (count);
18372 int offset = 0;
18373
18374 if ((countval & 0x10) && max_size > 16)
18375 {
18376 if (TARGET_64BIT)
18377 {
18378 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18379 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
18380 }
18381 else
18382 gcc_unreachable ();
18383 offset += 16;
18384 }
18385 if ((countval & 0x08) && max_size > 8)
18386 {
18387 if (TARGET_64BIT)
18388 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18389 else
18390 {
18391 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18392 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
18393 }
18394 offset += 8;
18395 }
18396 if ((countval & 0x04) && max_size > 4)
18397 {
18398 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18399 offset += 4;
18400 }
18401 if ((countval & 0x02) && max_size > 2)
18402 {
18403 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
18404 offset += 2;
18405 }
18406 if ((countval & 0x01) && max_size > 1)
18407 {
18408 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
18409 offset += 1;
18410 }
18411 return;
18412 }
18413 if (max_size > 8)
18414 {
18415 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
18416 count, 1, OPTAB_DIRECT);
18417 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
18418 count, QImode, 1, 4);
18419 return;
18420 }
18421
18422 /* When there are stringops, we can cheaply increase dest and src pointers.
18423 Otherwise we save code size by maintaining offset (zero is readily
18424 available from preceding rep operation) and using x86 addressing modes.
18425 */
18426 if (TARGET_SINGLE_STRINGOP)
18427 {
18428 if (max_size > 4)
18429 {
18430 rtx label = ix86_expand_aligntest (count, 4, true);
18431 src = change_address (srcmem, SImode, srcptr);
18432 dest = change_address (destmem, SImode, destptr);
18433 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18434 emit_label (label);
18435 LABEL_NUSES (label) = 1;
18436 }
18437 if (max_size > 2)
18438 {
18439 rtx label = ix86_expand_aligntest (count, 2, true);
18440 src = change_address (srcmem, HImode, srcptr);
18441 dest = change_address (destmem, HImode, destptr);
18442 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18443 emit_label (label);
18444 LABEL_NUSES (label) = 1;
18445 }
18446 if (max_size > 1)
18447 {
18448 rtx label = ix86_expand_aligntest (count, 1, true);
18449 src = change_address (srcmem, QImode, srcptr);
18450 dest = change_address (destmem, QImode, destptr);
18451 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18452 emit_label (label);
18453 LABEL_NUSES (label) = 1;
18454 }
18455 }
18456 else
18457 {
18458 rtx offset = force_reg (Pmode, const0_rtx);
18459 rtx tmp;
18460
18461 if (max_size > 4)
18462 {
18463 rtx label = ix86_expand_aligntest (count, 4, true);
18464 src = change_address (srcmem, SImode, srcptr);
18465 dest = change_address (destmem, SImode, destptr);
18466 emit_move_insn (dest, src);
18467 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
18468 true, OPTAB_LIB_WIDEN);
18469 if (tmp != offset)
18470 emit_move_insn (offset, tmp);
18471 emit_label (label);
18472 LABEL_NUSES (label) = 1;
18473 }
18474 if (max_size > 2)
18475 {
18476 rtx label = ix86_expand_aligntest (count, 2, true);
18477 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18478 src = change_address (srcmem, HImode, tmp);
18479 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18480 dest = change_address (destmem, HImode, tmp);
18481 emit_move_insn (dest, src);
18482 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
18483 true, OPTAB_LIB_WIDEN);
18484 if (tmp != offset)
18485 emit_move_insn (offset, tmp);
18486 emit_label (label);
18487 LABEL_NUSES (label) = 1;
18488 }
18489 if (max_size > 1)
18490 {
18491 rtx label = ix86_expand_aligntest (count, 1, true);
18492 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18493 src = change_address (srcmem, QImode, tmp);
18494 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18495 dest = change_address (destmem, QImode, tmp);
18496 emit_move_insn (dest, src);
18497 emit_label (label);
18498 LABEL_NUSES (label) = 1;
18499 }
18500 }
18501 }
18502
18503 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18504 static void
18505 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
18506 rtx count, int max_size)
18507 {
18508 count =
18509 expand_simple_binop (counter_mode (count), AND, count,
18510 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
18511 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
18512 gen_lowpart (QImode, value), count, QImode,
18513 1, max_size / 2);
18514 }
18515
18516 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18517 static void
18518 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
18519 {
18520 rtx dest;
18521
18522 if (CONST_INT_P (count))
18523 {
18524 HOST_WIDE_INT countval = INTVAL (count);
18525 int offset = 0;
18526
18527 if ((countval & 0x10) && max_size > 16)
18528 {
18529 if (TARGET_64BIT)
18530 {
18531 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18532 emit_insn (gen_strset (destptr, dest, value));
18533 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
18534 emit_insn (gen_strset (destptr, dest, value));
18535 }
18536 else
18537 gcc_unreachable ();
18538 offset += 16;
18539 }
18540 if ((countval & 0x08) && max_size > 8)
18541 {
18542 if (TARGET_64BIT)
18543 {
18544 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18545 emit_insn (gen_strset (destptr, dest, value));
18546 }
18547 else
18548 {
18549 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18550 emit_insn (gen_strset (destptr, dest, value));
18551 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
18552 emit_insn (gen_strset (destptr, dest, value));
18553 }
18554 offset += 8;
18555 }
18556 if ((countval & 0x04) && max_size > 4)
18557 {
18558 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18559 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18560 offset += 4;
18561 }
18562 if ((countval & 0x02) && max_size > 2)
18563 {
18564 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
18565 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18566 offset += 2;
18567 }
18568 if ((countval & 0x01) && max_size > 1)
18569 {
18570 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
18571 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18572 offset += 1;
18573 }
18574 return;
18575 }
18576 if (max_size > 32)
18577 {
18578 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
18579 return;
18580 }
18581 if (max_size > 16)
18582 {
18583 rtx label = ix86_expand_aligntest (count, 16, true);
18584 if (TARGET_64BIT)
18585 {
18586 dest = change_address (destmem, DImode, destptr);
18587 emit_insn (gen_strset (destptr, dest, value));
18588 emit_insn (gen_strset (destptr, dest, value));
18589 }
18590 else
18591 {
18592 dest = change_address (destmem, SImode, destptr);
18593 emit_insn (gen_strset (destptr, dest, value));
18594 emit_insn (gen_strset (destptr, dest, value));
18595 emit_insn (gen_strset (destptr, dest, value));
18596 emit_insn (gen_strset (destptr, dest, value));
18597 }
18598 emit_label (label);
18599 LABEL_NUSES (label) = 1;
18600 }
18601 if (max_size > 8)
18602 {
18603 rtx label = ix86_expand_aligntest (count, 8, true);
18604 if (TARGET_64BIT)
18605 {
18606 dest = change_address (destmem, DImode, destptr);
18607 emit_insn (gen_strset (destptr, dest, value));
18608 }
18609 else
18610 {
18611 dest = change_address (destmem, SImode, destptr);
18612 emit_insn (gen_strset (destptr, dest, value));
18613 emit_insn (gen_strset (destptr, dest, value));
18614 }
18615 emit_label (label);
18616 LABEL_NUSES (label) = 1;
18617 }
18618 if (max_size > 4)
18619 {
18620 rtx label = ix86_expand_aligntest (count, 4, true);
18621 dest = change_address (destmem, SImode, destptr);
18622 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18623 emit_label (label);
18624 LABEL_NUSES (label) = 1;
18625 }
18626 if (max_size > 2)
18627 {
18628 rtx label = ix86_expand_aligntest (count, 2, true);
18629 dest = change_address (destmem, HImode, destptr);
18630 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18631 emit_label (label);
18632 LABEL_NUSES (label) = 1;
18633 }
18634 if (max_size > 1)
18635 {
18636 rtx label = ix86_expand_aligntest (count, 1, true);
18637 dest = change_address (destmem, QImode, destptr);
18638 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18639 emit_label (label);
18640 LABEL_NUSES (label) = 1;
18641 }
18642 }
18643
18644 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
18645 DESIRED_ALIGNMENT. */
18646 static void
18647 expand_movmem_prologue (rtx destmem, rtx srcmem,
18648 rtx destptr, rtx srcptr, rtx count,
18649 int align, int desired_alignment)
18650 {
18651 if (align <= 1 && desired_alignment > 1)
18652 {
18653 rtx label = ix86_expand_aligntest (destptr, 1, false);
18654 srcmem = change_address (srcmem, QImode, srcptr);
18655 destmem = change_address (destmem, QImode, destptr);
18656 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18657 ix86_adjust_counter (count, 1);
18658 emit_label (label);
18659 LABEL_NUSES (label) = 1;
18660 }
18661 if (align <= 2 && desired_alignment > 2)
18662 {
18663 rtx label = ix86_expand_aligntest (destptr, 2, false);
18664 srcmem = change_address (srcmem, HImode, srcptr);
18665 destmem = change_address (destmem, HImode, destptr);
18666 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18667 ix86_adjust_counter (count, 2);
18668 emit_label (label);
18669 LABEL_NUSES (label) = 1;
18670 }
18671 if (align <= 4 && desired_alignment > 4)
18672 {
18673 rtx label = ix86_expand_aligntest (destptr, 4, false);
18674 srcmem = change_address (srcmem, SImode, srcptr);
18675 destmem = change_address (destmem, SImode, destptr);
18676 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18677 ix86_adjust_counter (count, 4);
18678 emit_label (label);
18679 LABEL_NUSES (label) = 1;
18680 }
18681 gcc_assert (desired_alignment <= 8);
18682 }
18683
18684 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
18685 ALIGN_BYTES is how many bytes need to be copied. */
18686 static rtx
18687 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
18688 int desired_align, int align_bytes)
18689 {
18690 rtx src = *srcp;
18691 rtx src_size, dst_size;
18692 int off = 0;
18693 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
18694 if (src_align_bytes >= 0)
18695 src_align_bytes = desired_align - src_align_bytes;
18696 src_size = MEM_SIZE (src);
18697 dst_size = MEM_SIZE (dst);
18698 if (align_bytes & 1)
18699 {
18700 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18701 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
18702 off = 1;
18703 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18704 }
18705 if (align_bytes & 2)
18706 {
18707 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18708 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
18709 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18710 set_mem_align (dst, 2 * BITS_PER_UNIT);
18711 if (src_align_bytes >= 0
18712 && (src_align_bytes & 1) == (align_bytes & 1)
18713 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
18714 set_mem_align (src, 2 * BITS_PER_UNIT);
18715 off = 2;
18716 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18717 }
18718 if (align_bytes & 4)
18719 {
18720 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18721 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
18722 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18723 set_mem_align (dst, 4 * BITS_PER_UNIT);
18724 if (src_align_bytes >= 0)
18725 {
18726 unsigned int src_align = 0;
18727 if ((src_align_bytes & 3) == (align_bytes & 3))
18728 src_align = 4;
18729 else if ((src_align_bytes & 1) == (align_bytes & 1))
18730 src_align = 2;
18731 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18732 set_mem_align (src, src_align * BITS_PER_UNIT);
18733 }
18734 off = 4;
18735 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18736 }
18737 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18738 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
18739 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18740 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18741 if (src_align_bytes >= 0)
18742 {
18743 unsigned int src_align = 0;
18744 if ((src_align_bytes & 7) == (align_bytes & 7))
18745 src_align = 8;
18746 else if ((src_align_bytes & 3) == (align_bytes & 3))
18747 src_align = 4;
18748 else if ((src_align_bytes & 1) == (align_bytes & 1))
18749 src_align = 2;
18750 if (src_align > (unsigned int) desired_align)
18751 src_align = desired_align;
18752 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18753 set_mem_align (src, src_align * BITS_PER_UNIT);
18754 }
18755 if (dst_size)
18756 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18757 if (src_size)
18758 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
18759 *srcp = src;
18760 return dst;
18761 }
18762
18763 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
18764 DESIRED_ALIGNMENT. */
18765 static void
18766 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
18767 int align, int desired_alignment)
18768 {
18769 if (align <= 1 && desired_alignment > 1)
18770 {
18771 rtx label = ix86_expand_aligntest (destptr, 1, false);
18772 destmem = change_address (destmem, QImode, destptr);
18773 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
18774 ix86_adjust_counter (count, 1);
18775 emit_label (label);
18776 LABEL_NUSES (label) = 1;
18777 }
18778 if (align <= 2 && desired_alignment > 2)
18779 {
18780 rtx label = ix86_expand_aligntest (destptr, 2, false);
18781 destmem = change_address (destmem, HImode, destptr);
18782 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
18783 ix86_adjust_counter (count, 2);
18784 emit_label (label);
18785 LABEL_NUSES (label) = 1;
18786 }
18787 if (align <= 4 && desired_alignment > 4)
18788 {
18789 rtx label = ix86_expand_aligntest (destptr, 4, false);
18790 destmem = change_address (destmem, SImode, destptr);
18791 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
18792 ix86_adjust_counter (count, 4);
18793 emit_label (label);
18794 LABEL_NUSES (label) = 1;
18795 }
18796 gcc_assert (desired_alignment <= 8);
18797 }
18798
18799 /* Set enough from DST to align DST known to by aligned by ALIGN to
18800 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
18801 static rtx
18802 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
18803 int desired_align, int align_bytes)
18804 {
18805 int off = 0;
18806 rtx dst_size = MEM_SIZE (dst);
18807 if (align_bytes & 1)
18808 {
18809 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18810 off = 1;
18811 emit_insn (gen_strset (destreg, dst,
18812 gen_lowpart (QImode, value)));
18813 }
18814 if (align_bytes & 2)
18815 {
18816 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18817 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18818 set_mem_align (dst, 2 * BITS_PER_UNIT);
18819 off = 2;
18820 emit_insn (gen_strset (destreg, dst,
18821 gen_lowpart (HImode, value)));
18822 }
18823 if (align_bytes & 4)
18824 {
18825 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18826 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18827 set_mem_align (dst, 4 * BITS_PER_UNIT);
18828 off = 4;
18829 emit_insn (gen_strset (destreg, dst,
18830 gen_lowpart (SImode, value)));
18831 }
18832 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18833 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18834 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18835 if (dst_size)
18836 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18837 return dst;
18838 }
18839
18840 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
18841 static enum stringop_alg
18842 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
18843 int *dynamic_check)
18844 {
18845 const struct stringop_algs * algs;
18846 bool optimize_for_speed;
18847 /* Algorithms using the rep prefix want at least edi and ecx;
18848 additionally, memset wants eax and memcpy wants esi. Don't
18849 consider such algorithms if the user has appropriated those
18850 registers for their own purposes. */
18851 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
18852 || (memset
18853 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
18854
18855 #define ALG_USABLE_P(alg) (rep_prefix_usable \
18856 || (alg != rep_prefix_1_byte \
18857 && alg != rep_prefix_4_byte \
18858 && alg != rep_prefix_8_byte))
18859 const struct processor_costs *cost;
18860
18861 /* Even if the string operation call is cold, we still might spend a lot
18862 of time processing large blocks. */
18863 if (optimize_function_for_size_p (cfun)
18864 || (optimize_insn_for_size_p ()
18865 && expected_size != -1 && expected_size < 256))
18866 optimize_for_speed = false;
18867 else
18868 optimize_for_speed = true;
18869
18870 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
18871
18872 *dynamic_check = -1;
18873 if (memset)
18874 algs = &cost->memset[TARGET_64BIT != 0];
18875 else
18876 algs = &cost->memcpy[TARGET_64BIT != 0];
18877 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
18878 return stringop_alg;
18879 /* rep; movq or rep; movl is the smallest variant. */
18880 else if (!optimize_for_speed)
18881 {
18882 if (!count || (count & 3))
18883 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18884 else
18885 return rep_prefix_usable ? rep_prefix_4_byte : loop;
18886 }
18887 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18888 */
18889 else if (expected_size != -1 && expected_size < 4)
18890 return loop_1_byte;
18891 else if (expected_size != -1)
18892 {
18893 unsigned int i;
18894 enum stringop_alg alg = libcall;
18895 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18896 {
18897 /* We get here if the algorithms that were not libcall-based
18898 were rep-prefix based and we are unable to use rep prefixes
18899 based on global register usage. Break out of the loop and
18900 use the heuristic below. */
18901 if (algs->size[i].max == 0)
18902 break;
18903 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18904 {
18905 enum stringop_alg candidate = algs->size[i].alg;
18906
18907 if (candidate != libcall && ALG_USABLE_P (candidate))
18908 alg = candidate;
18909 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18910 last non-libcall inline algorithm. */
18911 if (TARGET_INLINE_ALL_STRINGOPS)
18912 {
18913 /* When the current size is best to be copied by a libcall,
18914 but we are still forced to inline, run the heuristic below
18915 that will pick code for medium sized blocks. */
18916 if (alg != libcall)
18917 return alg;
18918 break;
18919 }
18920 else if (ALG_USABLE_P (candidate))
18921 return candidate;
18922 }
18923 }
18924 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18925 }
18926 /* When asked to inline the call anyway, try to pick meaningful choice.
18927 We look for maximal size of block that is faster to copy by hand and
18928 take blocks of at most of that size guessing that average size will
18929 be roughly half of the block.
18930
18931 If this turns out to be bad, we might simply specify the preferred
18932 choice in ix86_costs. */
18933 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18934 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18935 {
18936 int max = -1;
18937 enum stringop_alg alg;
18938 int i;
18939 bool any_alg_usable_p = true;
18940
18941 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18942 {
18943 enum stringop_alg candidate = algs->size[i].alg;
18944 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18945
18946 if (candidate != libcall && candidate
18947 && ALG_USABLE_P (candidate))
18948 max = algs->size[i].max;
18949 }
18950 /* If there aren't any usable algorithms, then recursing on
18951 smaller sizes isn't going to find anything. Just return the
18952 simple byte-at-a-time copy loop. */
18953 if (!any_alg_usable_p)
18954 {
18955 /* Pick something reasonable. */
18956 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18957 *dynamic_check = 128;
18958 return loop_1_byte;
18959 }
18960 if (max == -1)
18961 max = 4096;
18962 alg = decide_alg (count, max / 2, memset, dynamic_check);
18963 gcc_assert (*dynamic_check == -1);
18964 gcc_assert (alg != libcall);
18965 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18966 *dynamic_check = max;
18967 return alg;
18968 }
18969 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18970 #undef ALG_USABLE_P
18971 }
18972
18973 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18974 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18975 static int
18976 decide_alignment (int align,
18977 enum stringop_alg alg,
18978 int expected_size)
18979 {
18980 int desired_align = 0;
18981 switch (alg)
18982 {
18983 case no_stringop:
18984 gcc_unreachable ();
18985 case loop:
18986 case unrolled_loop:
18987 desired_align = GET_MODE_SIZE (Pmode);
18988 break;
18989 case rep_prefix_8_byte:
18990 desired_align = 8;
18991 break;
18992 case rep_prefix_4_byte:
18993 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18994 copying whole cacheline at once. */
18995 if (TARGET_PENTIUMPRO)
18996 desired_align = 8;
18997 else
18998 desired_align = 4;
18999 break;
19000 case rep_prefix_1_byte:
19001 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19002 copying whole cacheline at once. */
19003 if (TARGET_PENTIUMPRO)
19004 desired_align = 8;
19005 else
19006 desired_align = 1;
19007 break;
19008 case loop_1_byte:
19009 desired_align = 1;
19010 break;
19011 case libcall:
19012 return 0;
19013 }
19014
19015 if (optimize_size)
19016 desired_align = 1;
19017 if (desired_align < align)
19018 desired_align = align;
19019 if (expected_size != -1 && expected_size < 4)
19020 desired_align = align;
19021 return desired_align;
19022 }
19023
19024 /* Return the smallest power of 2 greater than VAL. */
19025 static int
19026 smallest_pow2_greater_than (int val)
19027 {
19028 int ret = 1;
19029 while (ret <= val)
19030 ret <<= 1;
19031 return ret;
19032 }
19033
19034 /* Expand string move (memcpy) operation. Use i386 string operations when
19035 profitable. expand_setmem contains similar code. The code depends upon
19036 architecture, block size and alignment, but always has the same
19037 overall structure:
19038
19039 1) Prologue guard: Conditional that jumps up to epilogues for small
19040 blocks that can be handled by epilogue alone. This is faster but
19041 also needed for correctness, since prologue assume the block is larger
19042 than the desired alignment.
19043
19044 Optional dynamic check for size and libcall for large
19045 blocks is emitted here too, with -minline-stringops-dynamically.
19046
19047 2) Prologue: copy first few bytes in order to get destination aligned
19048 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19049 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19050 We emit either a jump tree on power of two sized blocks, or a byte loop.
19051
19052 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19053 with specified algorithm.
19054
19055 4) Epilogue: code copying tail of the block that is too small to be
19056 handled by main body (or up to size guarded by prologue guard). */
19057
19058 int
19059 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19060 rtx expected_align_exp, rtx expected_size_exp)
19061 {
19062 rtx destreg;
19063 rtx srcreg;
19064 rtx label = NULL;
19065 rtx tmp;
19066 rtx jump_around_label = NULL;
19067 HOST_WIDE_INT align = 1;
19068 unsigned HOST_WIDE_INT count = 0;
19069 HOST_WIDE_INT expected_size = -1;
19070 int size_needed = 0, epilogue_size_needed;
19071 int desired_align = 0, align_bytes = 0;
19072 enum stringop_alg alg;
19073 int dynamic_check;
19074 bool need_zero_guard = false;
19075
19076 if (CONST_INT_P (align_exp))
19077 align = INTVAL (align_exp);
19078 /* i386 can do misaligned access on reasonably increased cost. */
19079 if (CONST_INT_P (expected_align_exp)
19080 && INTVAL (expected_align_exp) > align)
19081 align = INTVAL (expected_align_exp);
19082 /* ALIGN is the minimum of destination and source alignment, but we care here
19083 just about destination alignment. */
19084 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19085 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19086
19087 if (CONST_INT_P (count_exp))
19088 count = expected_size = INTVAL (count_exp);
19089 if (CONST_INT_P (expected_size_exp) && count == 0)
19090 expected_size = INTVAL (expected_size_exp);
19091
19092 /* Make sure we don't need to care about overflow later on. */
19093 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19094 return 0;
19095
19096 /* Step 0: Decide on preferred algorithm, desired alignment and
19097 size of chunks to be copied by main loop. */
19098
19099 alg = decide_alg (count, expected_size, false, &dynamic_check);
19100 desired_align = decide_alignment (align, alg, expected_size);
19101
19102 if (!TARGET_ALIGN_STRINGOPS)
19103 align = desired_align;
19104
19105 if (alg == libcall)
19106 return 0;
19107 gcc_assert (alg != no_stringop);
19108 if (!count)
19109 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19110 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19111 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19112 switch (alg)
19113 {
19114 case libcall:
19115 case no_stringop:
19116 gcc_unreachable ();
19117 case loop:
19118 need_zero_guard = true;
19119 size_needed = GET_MODE_SIZE (Pmode);
19120 break;
19121 case unrolled_loop:
19122 need_zero_guard = true;
19123 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19124 break;
19125 case rep_prefix_8_byte:
19126 size_needed = 8;
19127 break;
19128 case rep_prefix_4_byte:
19129 size_needed = 4;
19130 break;
19131 case rep_prefix_1_byte:
19132 size_needed = 1;
19133 break;
19134 case loop_1_byte:
19135 need_zero_guard = true;
19136 size_needed = 1;
19137 break;
19138 }
19139
19140 epilogue_size_needed = size_needed;
19141
19142 /* Step 1: Prologue guard. */
19143
19144 /* Alignment code needs count to be in register. */
19145 if (CONST_INT_P (count_exp) && desired_align > align)
19146 {
19147 if (INTVAL (count_exp) > desired_align
19148 && INTVAL (count_exp) > size_needed)
19149 {
19150 align_bytes
19151 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19152 if (align_bytes <= 0)
19153 align_bytes = 0;
19154 else
19155 align_bytes = desired_align - align_bytes;
19156 }
19157 if (align_bytes == 0)
19158 count_exp = force_reg (counter_mode (count_exp), count_exp);
19159 }
19160 gcc_assert (desired_align >= 1 && align >= 1);
19161
19162 /* Ensure that alignment prologue won't copy past end of block. */
19163 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19164 {
19165 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19166 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19167 Make sure it is power of 2. */
19168 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19169
19170 if (count)
19171 {
19172 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19173 {
19174 /* If main algorithm works on QImode, no epilogue is needed.
19175 For small sizes just don't align anything. */
19176 if (size_needed == 1)
19177 desired_align = align;
19178 else
19179 goto epilogue;
19180 }
19181 }
19182 else
19183 {
19184 label = gen_label_rtx ();
19185 emit_cmp_and_jump_insns (count_exp,
19186 GEN_INT (epilogue_size_needed),
19187 LTU, 0, counter_mode (count_exp), 1, label);
19188 if (expected_size == -1 || expected_size < epilogue_size_needed)
19189 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19190 else
19191 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19192 }
19193 }
19194
19195 /* Emit code to decide on runtime whether library call or inline should be
19196 used. */
19197 if (dynamic_check != -1)
19198 {
19199 if (CONST_INT_P (count_exp))
19200 {
19201 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
19202 {
19203 emit_block_move_via_libcall (dst, src, count_exp, false);
19204 count_exp = const0_rtx;
19205 goto epilogue;
19206 }
19207 }
19208 else
19209 {
19210 rtx hot_label = gen_label_rtx ();
19211 jump_around_label = gen_label_rtx ();
19212 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19213 LEU, 0, GET_MODE (count_exp), 1, hot_label);
19214 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19215 emit_block_move_via_libcall (dst, src, count_exp, false);
19216 emit_jump (jump_around_label);
19217 emit_label (hot_label);
19218 }
19219 }
19220
19221 /* Step 2: Alignment prologue. */
19222
19223 if (desired_align > align)
19224 {
19225 if (align_bytes == 0)
19226 {
19227 /* Except for the first move in epilogue, we no longer know
19228 constant offset in aliasing info. It don't seems to worth
19229 the pain to maintain it for the first move, so throw away
19230 the info early. */
19231 src = change_address (src, BLKmode, srcreg);
19232 dst = change_address (dst, BLKmode, destreg);
19233 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
19234 desired_align);
19235 }
19236 else
19237 {
19238 /* If we know how many bytes need to be stored before dst is
19239 sufficiently aligned, maintain aliasing info accurately. */
19240 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
19241 desired_align, align_bytes);
19242 count_exp = plus_constant (count_exp, -align_bytes);
19243 count -= align_bytes;
19244 }
19245 if (need_zero_guard
19246 && (count < (unsigned HOST_WIDE_INT) size_needed
19247 || (align_bytes == 0
19248 && count < ((unsigned HOST_WIDE_INT) size_needed
19249 + desired_align - align))))
19250 {
19251 /* It is possible that we copied enough so the main loop will not
19252 execute. */
19253 gcc_assert (size_needed > 1);
19254 if (label == NULL_RTX)
19255 label = gen_label_rtx ();
19256 emit_cmp_and_jump_insns (count_exp,
19257 GEN_INT (size_needed),
19258 LTU, 0, counter_mode (count_exp), 1, label);
19259 if (expected_size == -1
19260 || expected_size < (desired_align - align) / 2 + size_needed)
19261 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19262 else
19263 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19264 }
19265 }
19266 if (label && size_needed == 1)
19267 {
19268 emit_label (label);
19269 LABEL_NUSES (label) = 1;
19270 label = NULL;
19271 epilogue_size_needed = 1;
19272 }
19273 else if (label == NULL_RTX)
19274 epilogue_size_needed = size_needed;
19275
19276 /* Step 3: Main loop. */
19277
19278 switch (alg)
19279 {
19280 case libcall:
19281 case no_stringop:
19282 gcc_unreachable ();
19283 case loop_1_byte:
19284 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19285 count_exp, QImode, 1, expected_size);
19286 break;
19287 case loop:
19288 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19289 count_exp, Pmode, 1, expected_size);
19290 break;
19291 case unrolled_loop:
19292 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
19293 registers for 4 temporaries anyway. */
19294 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19295 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
19296 expected_size);
19297 break;
19298 case rep_prefix_8_byte:
19299 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19300 DImode);
19301 break;
19302 case rep_prefix_4_byte:
19303 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19304 SImode);
19305 break;
19306 case rep_prefix_1_byte:
19307 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19308 QImode);
19309 break;
19310 }
19311 /* Adjust properly the offset of src and dest memory for aliasing. */
19312 if (CONST_INT_P (count_exp))
19313 {
19314 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
19315 (count / size_needed) * size_needed);
19316 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19317 (count / size_needed) * size_needed);
19318 }
19319 else
19320 {
19321 src = change_address (src, BLKmode, srcreg);
19322 dst = change_address (dst, BLKmode, destreg);
19323 }
19324
19325 /* Step 4: Epilogue to copy the remaining bytes. */
19326 epilogue:
19327 if (label)
19328 {
19329 /* When the main loop is done, COUNT_EXP might hold original count,
19330 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19331 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19332 bytes. Compensate if needed. */
19333
19334 if (size_needed < epilogue_size_needed)
19335 {
19336 tmp =
19337 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19338 GEN_INT (size_needed - 1), count_exp, 1,
19339 OPTAB_DIRECT);
19340 if (tmp != count_exp)
19341 emit_move_insn (count_exp, tmp);
19342 }
19343 emit_label (label);
19344 LABEL_NUSES (label) = 1;
19345 }
19346
19347 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19348 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
19349 epilogue_size_needed);
19350 if (jump_around_label)
19351 emit_label (jump_around_label);
19352 return 1;
19353 }
19354
19355 /* Helper function for memcpy. For QImode value 0xXY produce
19356 0xXYXYXYXY of wide specified by MODE. This is essentially
19357 a * 0x10101010, but we can do slightly better than
19358 synth_mult by unwinding the sequence by hand on CPUs with
19359 slow multiply. */
19360 static rtx
19361 promote_duplicated_reg (enum machine_mode mode, rtx val)
19362 {
19363 enum machine_mode valmode = GET_MODE (val);
19364 rtx tmp;
19365 int nops = mode == DImode ? 3 : 2;
19366
19367 gcc_assert (mode == SImode || mode == DImode);
19368 if (val == const0_rtx)
19369 return copy_to_mode_reg (mode, const0_rtx);
19370 if (CONST_INT_P (val))
19371 {
19372 HOST_WIDE_INT v = INTVAL (val) & 255;
19373
19374 v |= v << 8;
19375 v |= v << 16;
19376 if (mode == DImode)
19377 v |= (v << 16) << 16;
19378 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
19379 }
19380
19381 if (valmode == VOIDmode)
19382 valmode = QImode;
19383 if (valmode != QImode)
19384 val = gen_lowpart (QImode, val);
19385 if (mode == QImode)
19386 return val;
19387 if (!TARGET_PARTIAL_REG_STALL)
19388 nops--;
19389 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
19390 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
19391 <= (ix86_cost->shift_const + ix86_cost->add) * nops
19392 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
19393 {
19394 rtx reg = convert_modes (mode, QImode, val, true);
19395 tmp = promote_duplicated_reg (mode, const1_rtx);
19396 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
19397 OPTAB_DIRECT);
19398 }
19399 else
19400 {
19401 rtx reg = convert_modes (mode, QImode, val, true);
19402
19403 if (!TARGET_PARTIAL_REG_STALL)
19404 if (mode == SImode)
19405 emit_insn (gen_movsi_insv_1 (reg, reg));
19406 else
19407 emit_insn (gen_movdi_insv_1 (reg, reg));
19408 else
19409 {
19410 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
19411 NULL, 1, OPTAB_DIRECT);
19412 reg =
19413 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19414 }
19415 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
19416 NULL, 1, OPTAB_DIRECT);
19417 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19418 if (mode == SImode)
19419 return reg;
19420 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
19421 NULL, 1, OPTAB_DIRECT);
19422 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19423 return reg;
19424 }
19425 }
19426
19427 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
19428 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
19429 alignment from ALIGN to DESIRED_ALIGN. */
19430 static rtx
19431 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
19432 {
19433 rtx promoted_val;
19434
19435 if (TARGET_64BIT
19436 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
19437 promoted_val = promote_duplicated_reg (DImode, val);
19438 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
19439 promoted_val = promote_duplicated_reg (SImode, val);
19440 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
19441 promoted_val = promote_duplicated_reg (HImode, val);
19442 else
19443 promoted_val = val;
19444
19445 return promoted_val;
19446 }
19447
19448 /* Expand string clear operation (bzero). Use i386 string operations when
19449 profitable. See expand_movmem comment for explanation of individual
19450 steps performed. */
19451 int
19452 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
19453 rtx expected_align_exp, rtx expected_size_exp)
19454 {
19455 rtx destreg;
19456 rtx label = NULL;
19457 rtx tmp;
19458 rtx jump_around_label = NULL;
19459 HOST_WIDE_INT align = 1;
19460 unsigned HOST_WIDE_INT count = 0;
19461 HOST_WIDE_INT expected_size = -1;
19462 int size_needed = 0, epilogue_size_needed;
19463 int desired_align = 0, align_bytes = 0;
19464 enum stringop_alg alg;
19465 rtx promoted_val = NULL;
19466 bool force_loopy_epilogue = false;
19467 int dynamic_check;
19468 bool need_zero_guard = false;
19469
19470 if (CONST_INT_P (align_exp))
19471 align = INTVAL (align_exp);
19472 /* i386 can do misaligned access on reasonably increased cost. */
19473 if (CONST_INT_P (expected_align_exp)
19474 && INTVAL (expected_align_exp) > align)
19475 align = INTVAL (expected_align_exp);
19476 if (CONST_INT_P (count_exp))
19477 count = expected_size = INTVAL (count_exp);
19478 if (CONST_INT_P (expected_size_exp) && count == 0)
19479 expected_size = INTVAL (expected_size_exp);
19480
19481 /* Make sure we don't need to care about overflow later on. */
19482 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19483 return 0;
19484
19485 /* Step 0: Decide on preferred algorithm, desired alignment and
19486 size of chunks to be copied by main loop. */
19487
19488 alg = decide_alg (count, expected_size, true, &dynamic_check);
19489 desired_align = decide_alignment (align, alg, expected_size);
19490
19491 if (!TARGET_ALIGN_STRINGOPS)
19492 align = desired_align;
19493
19494 if (alg == libcall)
19495 return 0;
19496 gcc_assert (alg != no_stringop);
19497 if (!count)
19498 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
19499 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19500 switch (alg)
19501 {
19502 case libcall:
19503 case no_stringop:
19504 gcc_unreachable ();
19505 case loop:
19506 need_zero_guard = true;
19507 size_needed = GET_MODE_SIZE (Pmode);
19508 break;
19509 case unrolled_loop:
19510 need_zero_guard = true;
19511 size_needed = GET_MODE_SIZE (Pmode) * 4;
19512 break;
19513 case rep_prefix_8_byte:
19514 size_needed = 8;
19515 break;
19516 case rep_prefix_4_byte:
19517 size_needed = 4;
19518 break;
19519 case rep_prefix_1_byte:
19520 size_needed = 1;
19521 break;
19522 case loop_1_byte:
19523 need_zero_guard = true;
19524 size_needed = 1;
19525 break;
19526 }
19527 epilogue_size_needed = size_needed;
19528
19529 /* Step 1: Prologue guard. */
19530
19531 /* Alignment code needs count to be in register. */
19532 if (CONST_INT_P (count_exp) && desired_align > align)
19533 {
19534 if (INTVAL (count_exp) > desired_align
19535 && INTVAL (count_exp) > size_needed)
19536 {
19537 align_bytes
19538 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19539 if (align_bytes <= 0)
19540 align_bytes = 0;
19541 else
19542 align_bytes = desired_align - align_bytes;
19543 }
19544 if (align_bytes == 0)
19545 {
19546 enum machine_mode mode = SImode;
19547 if (TARGET_64BIT && (count & ~0xffffffff))
19548 mode = DImode;
19549 count_exp = force_reg (mode, count_exp);
19550 }
19551 }
19552 /* Do the cheap promotion to allow better CSE across the
19553 main loop and epilogue (ie one load of the big constant in the
19554 front of all code. */
19555 if (CONST_INT_P (val_exp))
19556 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19557 desired_align, align);
19558 /* Ensure that alignment prologue won't copy past end of block. */
19559 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19560 {
19561 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19562 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
19563 Make sure it is power of 2. */
19564 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19565
19566 /* To improve performance of small blocks, we jump around the VAL
19567 promoting mode. This mean that if the promoted VAL is not constant,
19568 we might not use it in the epilogue and have to use byte
19569 loop variant. */
19570 if (epilogue_size_needed > 2 && !promoted_val)
19571 force_loopy_epilogue = true;
19572 if (count)
19573 {
19574 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19575 {
19576 /* If main algorithm works on QImode, no epilogue is needed.
19577 For small sizes just don't align anything. */
19578 if (size_needed == 1)
19579 desired_align = align;
19580 else
19581 goto epilogue;
19582 }
19583 }
19584 else
19585 {
19586 label = gen_label_rtx ();
19587 emit_cmp_and_jump_insns (count_exp,
19588 GEN_INT (epilogue_size_needed),
19589 LTU, 0, counter_mode (count_exp), 1, label);
19590 if (expected_size == -1 || expected_size <= epilogue_size_needed)
19591 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19592 else
19593 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19594 }
19595 }
19596 if (dynamic_check != -1)
19597 {
19598 rtx hot_label = gen_label_rtx ();
19599 jump_around_label = gen_label_rtx ();
19600 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19601 LEU, 0, counter_mode (count_exp), 1, hot_label);
19602 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19603 set_storage_via_libcall (dst, count_exp, val_exp, false);
19604 emit_jump (jump_around_label);
19605 emit_label (hot_label);
19606 }
19607
19608 /* Step 2: Alignment prologue. */
19609
19610 /* Do the expensive promotion once we branched off the small blocks. */
19611 if (!promoted_val)
19612 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19613 desired_align, align);
19614 gcc_assert (desired_align >= 1 && align >= 1);
19615
19616 if (desired_align > align)
19617 {
19618 if (align_bytes == 0)
19619 {
19620 /* Except for the first move in epilogue, we no longer know
19621 constant offset in aliasing info. It don't seems to worth
19622 the pain to maintain it for the first move, so throw away
19623 the info early. */
19624 dst = change_address (dst, BLKmode, destreg);
19625 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
19626 desired_align);
19627 }
19628 else
19629 {
19630 /* If we know how many bytes need to be stored before dst is
19631 sufficiently aligned, maintain aliasing info accurately. */
19632 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
19633 desired_align, align_bytes);
19634 count_exp = plus_constant (count_exp, -align_bytes);
19635 count -= align_bytes;
19636 }
19637 if (need_zero_guard
19638 && (count < (unsigned HOST_WIDE_INT) size_needed
19639 || (align_bytes == 0
19640 && count < ((unsigned HOST_WIDE_INT) size_needed
19641 + desired_align - align))))
19642 {
19643 /* It is possible that we copied enough so the main loop will not
19644 execute. */
19645 gcc_assert (size_needed > 1);
19646 if (label == NULL_RTX)
19647 label = gen_label_rtx ();
19648 emit_cmp_and_jump_insns (count_exp,
19649 GEN_INT (size_needed),
19650 LTU, 0, counter_mode (count_exp), 1, label);
19651 if (expected_size == -1
19652 || expected_size < (desired_align - align) / 2 + size_needed)
19653 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19654 else
19655 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19656 }
19657 }
19658 if (label && size_needed == 1)
19659 {
19660 emit_label (label);
19661 LABEL_NUSES (label) = 1;
19662 label = NULL;
19663 promoted_val = val_exp;
19664 epilogue_size_needed = 1;
19665 }
19666 else if (label == NULL_RTX)
19667 epilogue_size_needed = size_needed;
19668
19669 /* Step 3: Main loop. */
19670
19671 switch (alg)
19672 {
19673 case libcall:
19674 case no_stringop:
19675 gcc_unreachable ();
19676 case loop_1_byte:
19677 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19678 count_exp, QImode, 1, expected_size);
19679 break;
19680 case loop:
19681 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19682 count_exp, Pmode, 1, expected_size);
19683 break;
19684 case unrolled_loop:
19685 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19686 count_exp, Pmode, 4, expected_size);
19687 break;
19688 case rep_prefix_8_byte:
19689 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19690 DImode, val_exp);
19691 break;
19692 case rep_prefix_4_byte:
19693 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19694 SImode, val_exp);
19695 break;
19696 case rep_prefix_1_byte:
19697 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19698 QImode, val_exp);
19699 break;
19700 }
19701 /* Adjust properly the offset of src and dest memory for aliasing. */
19702 if (CONST_INT_P (count_exp))
19703 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19704 (count / size_needed) * size_needed);
19705 else
19706 dst = change_address (dst, BLKmode, destreg);
19707
19708 /* Step 4: Epilogue to copy the remaining bytes. */
19709
19710 if (label)
19711 {
19712 /* When the main loop is done, COUNT_EXP might hold original count,
19713 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19714 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19715 bytes. Compensate if needed. */
19716
19717 if (size_needed < epilogue_size_needed)
19718 {
19719 tmp =
19720 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19721 GEN_INT (size_needed - 1), count_exp, 1,
19722 OPTAB_DIRECT);
19723 if (tmp != count_exp)
19724 emit_move_insn (count_exp, tmp);
19725 }
19726 emit_label (label);
19727 LABEL_NUSES (label) = 1;
19728 }
19729 epilogue:
19730 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19731 {
19732 if (force_loopy_epilogue)
19733 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
19734 epilogue_size_needed);
19735 else
19736 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
19737 epilogue_size_needed);
19738 }
19739 if (jump_around_label)
19740 emit_label (jump_around_label);
19741 return 1;
19742 }
19743
19744 /* Expand the appropriate insns for doing strlen if not just doing
19745 repnz; scasb
19746
19747 out = result, initialized with the start address
19748 align_rtx = alignment of the address.
19749 scratch = scratch register, initialized with the startaddress when
19750 not aligned, otherwise undefined
19751
19752 This is just the body. It needs the initializations mentioned above and
19753 some address computing at the end. These things are done in i386.md. */
19754
19755 static void
19756 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
19757 {
19758 int align;
19759 rtx tmp;
19760 rtx align_2_label = NULL_RTX;
19761 rtx align_3_label = NULL_RTX;
19762 rtx align_4_label = gen_label_rtx ();
19763 rtx end_0_label = gen_label_rtx ();
19764 rtx mem;
19765 rtx tmpreg = gen_reg_rtx (SImode);
19766 rtx scratch = gen_reg_rtx (SImode);
19767 rtx cmp;
19768
19769 align = 0;
19770 if (CONST_INT_P (align_rtx))
19771 align = INTVAL (align_rtx);
19772
19773 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
19774
19775 /* Is there a known alignment and is it less than 4? */
19776 if (align < 4)
19777 {
19778 rtx scratch1 = gen_reg_rtx (Pmode);
19779 emit_move_insn (scratch1, out);
19780 /* Is there a known alignment and is it not 2? */
19781 if (align != 2)
19782 {
19783 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
19784 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
19785
19786 /* Leave just the 3 lower bits. */
19787 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
19788 NULL_RTX, 0, OPTAB_WIDEN);
19789
19790 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19791 Pmode, 1, align_4_label);
19792 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
19793 Pmode, 1, align_2_label);
19794 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
19795 Pmode, 1, align_3_label);
19796 }
19797 else
19798 {
19799 /* Since the alignment is 2, we have to check 2 or 0 bytes;
19800 check if is aligned to 4 - byte. */
19801
19802 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
19803 NULL_RTX, 0, OPTAB_WIDEN);
19804
19805 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19806 Pmode, 1, align_4_label);
19807 }
19808
19809 mem = change_address (src, QImode, out);
19810
19811 /* Now compare the bytes. */
19812
19813 /* Compare the first n unaligned byte on a byte per byte basis. */
19814 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
19815 QImode, 1, end_0_label);
19816
19817 /* Increment the address. */
19818 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
19819
19820 /* Not needed with an alignment of 2 */
19821 if (align != 2)
19822 {
19823 emit_label (align_2_label);
19824
19825 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19826 end_0_label);
19827
19828 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
19829
19830 emit_label (align_3_label);
19831 }
19832
19833 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19834 end_0_label);
19835
19836 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
19837 }
19838
19839 /* Generate loop to check 4 bytes at a time. It is not a good idea to
19840 align this loop. It gives only huge programs, but does not help to
19841 speed up. */
19842 emit_label (align_4_label);
19843
19844 mem = change_address (src, SImode, out);
19845 emit_move_insn (scratch, mem);
19846 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
19847
19848 /* This formula yields a nonzero result iff one of the bytes is zero.
19849 This saves three branches inside loop and many cycles. */
19850
19851 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
19852 emit_insn (gen_one_cmplsi2 (scratch, scratch));
19853 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
19854 emit_insn (gen_andsi3 (tmpreg, tmpreg,
19855 gen_int_mode (0x80808080, SImode)));
19856 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
19857 align_4_label);
19858
19859 if (TARGET_CMOVE)
19860 {
19861 rtx reg = gen_reg_rtx (SImode);
19862 rtx reg2 = gen_reg_rtx (Pmode);
19863 emit_move_insn (reg, tmpreg);
19864 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
19865
19866 /* If zero is not in the first two bytes, move two bytes forward. */
19867 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19868 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19869 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19870 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
19871 gen_rtx_IF_THEN_ELSE (SImode, tmp,
19872 reg,
19873 tmpreg)));
19874 /* Emit lea manually to avoid clobbering of flags. */
19875 emit_insn (gen_rtx_SET (SImode, reg2,
19876 gen_rtx_PLUS (Pmode, out, const2_rtx)));
19877
19878 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19879 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19880 emit_insn (gen_rtx_SET (VOIDmode, out,
19881 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19882 reg2,
19883 out)));
19884 }
19885 else
19886 {
19887 rtx end_2_label = gen_label_rtx ();
19888 /* Is zero in the first two bytes? */
19889
19890 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19891 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19892 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19893 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19894 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19895 pc_rtx);
19896 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19897 JUMP_LABEL (tmp) = end_2_label;
19898
19899 /* Not in the first two. Move two bytes forward. */
19900 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19901 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
19902
19903 emit_label (end_2_label);
19904
19905 }
19906
19907 /* Avoid branch in fixing the byte. */
19908 tmpreg = gen_lowpart (QImode, tmpreg);
19909 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19910 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
19911 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
19912 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
19913
19914 emit_label (end_0_label);
19915 }
19916
19917 /* Expand strlen. */
19918
19919 int
19920 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19921 {
19922 rtx addr, scratch1, scratch2, scratch3, scratch4;
19923
19924 /* The generic case of strlen expander is long. Avoid it's
19925 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19926
19927 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19928 && !TARGET_INLINE_ALL_STRINGOPS
19929 && !optimize_insn_for_size_p ()
19930 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19931 return 0;
19932
19933 addr = force_reg (Pmode, XEXP (src, 0));
19934 scratch1 = gen_reg_rtx (Pmode);
19935
19936 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19937 && !optimize_insn_for_size_p ())
19938 {
19939 /* Well it seems that some optimizer does not combine a call like
19940 foo(strlen(bar), strlen(bar));
19941 when the move and the subtraction is done here. It does calculate
19942 the length just once when these instructions are done inside of
19943 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19944 often used and I use one fewer register for the lifetime of
19945 output_strlen_unroll() this is better. */
19946
19947 emit_move_insn (out, addr);
19948
19949 ix86_expand_strlensi_unroll_1 (out, src, align);
19950
19951 /* strlensi_unroll_1 returns the address of the zero at the end of
19952 the string, like memchr(), so compute the length by subtracting
19953 the start address. */
19954 emit_insn (ix86_gen_sub3 (out, out, addr));
19955 }
19956 else
19957 {
19958 rtx unspec;
19959
19960 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19961 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19962 return false;
19963
19964 scratch2 = gen_reg_rtx (Pmode);
19965 scratch3 = gen_reg_rtx (Pmode);
19966 scratch4 = force_reg (Pmode, constm1_rtx);
19967
19968 emit_move_insn (scratch3, addr);
19969 eoschar = force_reg (QImode, eoschar);
19970
19971 src = replace_equiv_address_nv (src, scratch3);
19972
19973 /* If .md starts supporting :P, this can be done in .md. */
19974 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19975 scratch4), UNSPEC_SCAS);
19976 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19977 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
19978 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
19979 }
19980 return 1;
19981 }
19982
19983 /* For given symbol (function) construct code to compute address of it's PLT
19984 entry in large x86-64 PIC model. */
19985 rtx
19986 construct_plt_address (rtx symbol)
19987 {
19988 rtx tmp = gen_reg_rtx (Pmode);
19989 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19990
19991 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19992 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19993
19994 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19995 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19996 return tmp;
19997 }
19998
19999 void
20000 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20001 rtx callarg2,
20002 rtx pop, int sibcall)
20003 {
20004 rtx use = NULL, call;
20005
20006 if (pop == const0_rtx)
20007 pop = NULL;
20008 gcc_assert (!TARGET_64BIT || !pop);
20009
20010 if (TARGET_MACHO && !TARGET_64BIT)
20011 {
20012 #if TARGET_MACHO
20013 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20014 fnaddr = machopic_indirect_call_target (fnaddr);
20015 #endif
20016 }
20017 else
20018 {
20019 /* Static functions and indirect calls don't need the pic register. */
20020 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20021 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20022 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20023 use_reg (&use, pic_offset_table_rtx);
20024 }
20025
20026 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20027 {
20028 rtx al = gen_rtx_REG (QImode, AX_REG);
20029 emit_move_insn (al, callarg2);
20030 use_reg (&use, al);
20031 }
20032
20033 if (ix86_cmodel == CM_LARGE_PIC
20034 && MEM_P (fnaddr)
20035 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20036 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20037 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20038 else if (sibcall
20039 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20040 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20041 {
20042 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20043 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20044 }
20045
20046 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20047 if (retval)
20048 call = gen_rtx_SET (VOIDmode, retval, call);
20049 if (pop)
20050 {
20051 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20052 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20053 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20054 }
20055 if (TARGET_64BIT
20056 && ix86_cfun_abi () == MS_ABI
20057 && (!callarg2 || INTVAL (callarg2) != -2))
20058 {
20059 /* We need to represent that SI and DI registers are clobbered
20060 by SYSV calls. */
20061 static int clobbered_registers[] = {
20062 XMM6_REG, XMM7_REG, XMM8_REG,
20063 XMM9_REG, XMM10_REG, XMM11_REG,
20064 XMM12_REG, XMM13_REG, XMM14_REG,
20065 XMM15_REG, SI_REG, DI_REG
20066 };
20067 unsigned int i;
20068 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20069 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20070 UNSPEC_MS_TO_SYSV_CALL);
20071
20072 vec[0] = call;
20073 vec[1] = unspec;
20074 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20075 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20076 ? TImode : DImode,
20077 gen_rtx_REG
20078 (SSE_REGNO_P (clobbered_registers[i])
20079 ? TImode : DImode,
20080 clobbered_registers[i]));
20081
20082 call = gen_rtx_PARALLEL (VOIDmode,
20083 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20084 + 2, vec));
20085 }
20086
20087 call = emit_call_insn (call);
20088 if (use)
20089 CALL_INSN_FUNCTION_USAGE (call) = use;
20090 }
20091
20092 \f
20093 /* Clear stack slot assignments remembered from previous functions.
20094 This is called from INIT_EXPANDERS once before RTL is emitted for each
20095 function. */
20096
20097 static struct machine_function *
20098 ix86_init_machine_status (void)
20099 {
20100 struct machine_function *f;
20101
20102 f = ggc_alloc_cleared_machine_function ();
20103 f->use_fast_prologue_epilogue_nregs = -1;
20104 f->tls_descriptor_call_expanded_p = 0;
20105 f->call_abi = ix86_abi;
20106
20107 return f;
20108 }
20109
20110 /* Return a MEM corresponding to a stack slot with mode MODE.
20111 Allocate a new slot if necessary.
20112
20113 The RTL for a function can have several slots available: N is
20114 which slot to use. */
20115
20116 rtx
20117 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20118 {
20119 struct stack_local_entry *s;
20120
20121 gcc_assert (n < MAX_386_STACK_LOCALS);
20122
20123 /* Virtual slot is valid only before vregs are instantiated. */
20124 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20125
20126 for (s = ix86_stack_locals; s; s = s->next)
20127 if (s->mode == mode && s->n == n)
20128 return copy_rtx (s->rtl);
20129
20130 s = ggc_alloc_stack_local_entry ();
20131 s->n = n;
20132 s->mode = mode;
20133 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20134
20135 s->next = ix86_stack_locals;
20136 ix86_stack_locals = s;
20137 return s->rtl;
20138 }
20139
20140 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20141
20142 static GTY(()) rtx ix86_tls_symbol;
20143 rtx
20144 ix86_tls_get_addr (void)
20145 {
20146
20147 if (!ix86_tls_symbol)
20148 {
20149 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20150 (TARGET_ANY_GNU_TLS
20151 && !TARGET_64BIT)
20152 ? "___tls_get_addr"
20153 : "__tls_get_addr");
20154 }
20155
20156 return ix86_tls_symbol;
20157 }
20158
20159 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20160
20161 static GTY(()) rtx ix86_tls_module_base_symbol;
20162 rtx
20163 ix86_tls_module_base (void)
20164 {
20165
20166 if (!ix86_tls_module_base_symbol)
20167 {
20168 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20169 "_TLS_MODULE_BASE_");
20170 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20171 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20172 }
20173
20174 return ix86_tls_module_base_symbol;
20175 }
20176 \f
20177 /* Calculate the length of the memory address in the instruction
20178 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20179
20180 int
20181 memory_address_length (rtx addr)
20182 {
20183 struct ix86_address parts;
20184 rtx base, index, disp;
20185 int len;
20186 int ok;
20187
20188 if (GET_CODE (addr) == PRE_DEC
20189 || GET_CODE (addr) == POST_INC
20190 || GET_CODE (addr) == PRE_MODIFY
20191 || GET_CODE (addr) == POST_MODIFY)
20192 return 0;
20193
20194 ok = ix86_decompose_address (addr, &parts);
20195 gcc_assert (ok);
20196
20197 if (parts.base && GET_CODE (parts.base) == SUBREG)
20198 parts.base = SUBREG_REG (parts.base);
20199 if (parts.index && GET_CODE (parts.index) == SUBREG)
20200 parts.index = SUBREG_REG (parts.index);
20201
20202 base = parts.base;
20203 index = parts.index;
20204 disp = parts.disp;
20205 len = 0;
20206
20207 /* Rule of thumb:
20208 - esp as the base always wants an index,
20209 - ebp as the base always wants a displacement,
20210 - r12 as the base always wants an index,
20211 - r13 as the base always wants a displacement. */
20212
20213 /* Register Indirect. */
20214 if (base && !index && !disp)
20215 {
20216 /* esp (for its index) and ebp (for its displacement) need
20217 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
20218 code. */
20219 if (REG_P (addr)
20220 && (addr == arg_pointer_rtx
20221 || addr == frame_pointer_rtx
20222 || REGNO (addr) == SP_REG
20223 || REGNO (addr) == BP_REG
20224 || REGNO (addr) == R12_REG
20225 || REGNO (addr) == R13_REG))
20226 len = 1;
20227 }
20228
20229 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
20230 is not disp32, but disp32(%rip), so for disp32
20231 SIB byte is needed, unless print_operand_address
20232 optimizes it into disp32(%rip) or (%rip) is implied
20233 by UNSPEC. */
20234 else if (disp && !base && !index)
20235 {
20236 len = 4;
20237 if (TARGET_64BIT)
20238 {
20239 rtx symbol = disp;
20240
20241 if (GET_CODE (disp) == CONST)
20242 symbol = XEXP (disp, 0);
20243 if (GET_CODE (symbol) == PLUS
20244 && CONST_INT_P (XEXP (symbol, 1)))
20245 symbol = XEXP (symbol, 0);
20246
20247 if (GET_CODE (symbol) != LABEL_REF
20248 && (GET_CODE (symbol) != SYMBOL_REF
20249 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
20250 && (GET_CODE (symbol) != UNSPEC
20251 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
20252 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
20253 len += 1;
20254 }
20255 }
20256
20257 else
20258 {
20259 /* Find the length of the displacement constant. */
20260 if (disp)
20261 {
20262 if (base && satisfies_constraint_K (disp))
20263 len = 1;
20264 else
20265 len = 4;
20266 }
20267 /* ebp always wants a displacement. Similarly r13. */
20268 else if (base && REG_P (base)
20269 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
20270 len = 1;
20271
20272 /* An index requires the two-byte modrm form.... */
20273 if (index
20274 /* ...like esp (or r12), which always wants an index. */
20275 || base == arg_pointer_rtx
20276 || base == frame_pointer_rtx
20277 || (base && REG_P (base)
20278 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
20279 len += 1;
20280 }
20281
20282 switch (parts.seg)
20283 {
20284 case SEG_FS:
20285 case SEG_GS:
20286 len += 1;
20287 break;
20288 default:
20289 break;
20290 }
20291
20292 return len;
20293 }
20294
20295 /* Compute default value for "length_immediate" attribute. When SHORTFORM
20296 is set, expect that insn have 8bit immediate alternative. */
20297 int
20298 ix86_attr_length_immediate_default (rtx insn, int shortform)
20299 {
20300 int len = 0;
20301 int i;
20302 extract_insn_cached (insn);
20303 for (i = recog_data.n_operands - 1; i >= 0; --i)
20304 if (CONSTANT_P (recog_data.operand[i]))
20305 {
20306 enum attr_mode mode = get_attr_mode (insn);
20307
20308 gcc_assert (!len);
20309 if (shortform && CONST_INT_P (recog_data.operand[i]))
20310 {
20311 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
20312 switch (mode)
20313 {
20314 case MODE_QI:
20315 len = 1;
20316 continue;
20317 case MODE_HI:
20318 ival = trunc_int_for_mode (ival, HImode);
20319 break;
20320 case MODE_SI:
20321 ival = trunc_int_for_mode (ival, SImode);
20322 break;
20323 default:
20324 break;
20325 }
20326 if (IN_RANGE (ival, -128, 127))
20327 {
20328 len = 1;
20329 continue;
20330 }
20331 }
20332 switch (mode)
20333 {
20334 case MODE_QI:
20335 len = 1;
20336 break;
20337 case MODE_HI:
20338 len = 2;
20339 break;
20340 case MODE_SI:
20341 len = 4;
20342 break;
20343 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
20344 case MODE_DI:
20345 len = 4;
20346 break;
20347 default:
20348 fatal_insn ("unknown insn mode", insn);
20349 }
20350 }
20351 return len;
20352 }
20353 /* Compute default value for "length_address" attribute. */
20354 int
20355 ix86_attr_length_address_default (rtx insn)
20356 {
20357 int i;
20358
20359 if (get_attr_type (insn) == TYPE_LEA)
20360 {
20361 rtx set = PATTERN (insn), addr;
20362
20363 if (GET_CODE (set) == PARALLEL)
20364 set = XVECEXP (set, 0, 0);
20365
20366 gcc_assert (GET_CODE (set) == SET);
20367
20368 addr = SET_SRC (set);
20369 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
20370 {
20371 if (GET_CODE (addr) == ZERO_EXTEND)
20372 addr = XEXP (addr, 0);
20373 if (GET_CODE (addr) == SUBREG)
20374 addr = SUBREG_REG (addr);
20375 }
20376
20377 return memory_address_length (addr);
20378 }
20379
20380 extract_insn_cached (insn);
20381 for (i = recog_data.n_operands - 1; i >= 0; --i)
20382 if (MEM_P (recog_data.operand[i]))
20383 {
20384 constrain_operands_cached (reload_completed);
20385 if (which_alternative != -1)
20386 {
20387 const char *constraints = recog_data.constraints[i];
20388 int alt = which_alternative;
20389
20390 while (*constraints == '=' || *constraints == '+')
20391 constraints++;
20392 while (alt-- > 0)
20393 while (*constraints++ != ',')
20394 ;
20395 /* Skip ignored operands. */
20396 if (*constraints == 'X')
20397 continue;
20398 }
20399 return memory_address_length (XEXP (recog_data.operand[i], 0));
20400 }
20401 return 0;
20402 }
20403
20404 /* Compute default value for "length_vex" attribute. It includes
20405 2 or 3 byte VEX prefix and 1 opcode byte. */
20406
20407 int
20408 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
20409 int has_vex_w)
20410 {
20411 int i;
20412
20413 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
20414 byte VEX prefix. */
20415 if (!has_0f_opcode || has_vex_w)
20416 return 3 + 1;
20417
20418 /* We can always use 2 byte VEX prefix in 32bit. */
20419 if (!TARGET_64BIT)
20420 return 2 + 1;
20421
20422 extract_insn_cached (insn);
20423
20424 for (i = recog_data.n_operands - 1; i >= 0; --i)
20425 if (REG_P (recog_data.operand[i]))
20426 {
20427 /* REX.W bit uses 3 byte VEX prefix. */
20428 if (GET_MODE (recog_data.operand[i]) == DImode
20429 && GENERAL_REG_P (recog_data.operand[i]))
20430 return 3 + 1;
20431 }
20432 else
20433 {
20434 /* REX.X or REX.B bits use 3 byte VEX prefix. */
20435 if (MEM_P (recog_data.operand[i])
20436 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
20437 return 3 + 1;
20438 }
20439
20440 return 2 + 1;
20441 }
20442 \f
20443 /* Return the maximum number of instructions a cpu can issue. */
20444
20445 static int
20446 ix86_issue_rate (void)
20447 {
20448 switch (ix86_tune)
20449 {
20450 case PROCESSOR_PENTIUM:
20451 case PROCESSOR_ATOM:
20452 case PROCESSOR_K6:
20453 return 2;
20454
20455 case PROCESSOR_PENTIUMPRO:
20456 case PROCESSOR_PENTIUM4:
20457 case PROCESSOR_ATHLON:
20458 case PROCESSOR_K8:
20459 case PROCESSOR_AMDFAM10:
20460 case PROCESSOR_NOCONA:
20461 case PROCESSOR_GENERIC32:
20462 case PROCESSOR_GENERIC64:
20463 case PROCESSOR_BDVER1:
20464 return 3;
20465
20466 case PROCESSOR_CORE2:
20467 return 4;
20468
20469 default:
20470 return 1;
20471 }
20472 }
20473
20474 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
20475 by DEP_INSN and nothing set by DEP_INSN. */
20476
20477 static int
20478 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
20479 {
20480 rtx set, set2;
20481
20482 /* Simplify the test for uninteresting insns. */
20483 if (insn_type != TYPE_SETCC
20484 && insn_type != TYPE_ICMOV
20485 && insn_type != TYPE_FCMOV
20486 && insn_type != TYPE_IBR)
20487 return 0;
20488
20489 if ((set = single_set (dep_insn)) != 0)
20490 {
20491 set = SET_DEST (set);
20492 set2 = NULL_RTX;
20493 }
20494 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
20495 && XVECLEN (PATTERN (dep_insn), 0) == 2
20496 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
20497 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
20498 {
20499 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20500 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20501 }
20502 else
20503 return 0;
20504
20505 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
20506 return 0;
20507
20508 /* This test is true if the dependent insn reads the flags but
20509 not any other potentially set register. */
20510 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
20511 return 0;
20512
20513 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
20514 return 0;
20515
20516 return 1;
20517 }
20518
20519 /* Return true iff USE_INSN has a memory address with operands set by
20520 SET_INSN. */
20521
20522 bool
20523 ix86_agi_dependent (rtx set_insn, rtx use_insn)
20524 {
20525 int i;
20526 extract_insn_cached (use_insn);
20527 for (i = recog_data.n_operands - 1; i >= 0; --i)
20528 if (MEM_P (recog_data.operand[i]))
20529 {
20530 rtx addr = XEXP (recog_data.operand[i], 0);
20531 return modified_in_p (addr, set_insn) != 0;
20532 }
20533 return false;
20534 }
20535
20536 static int
20537 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
20538 {
20539 enum attr_type insn_type, dep_insn_type;
20540 enum attr_memory memory;
20541 rtx set, set2;
20542 int dep_insn_code_number;
20543
20544 /* Anti and output dependencies have zero cost on all CPUs. */
20545 if (REG_NOTE_KIND (link) != 0)
20546 return 0;
20547
20548 dep_insn_code_number = recog_memoized (dep_insn);
20549
20550 /* If we can't recognize the insns, we can't really do anything. */
20551 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
20552 return cost;
20553
20554 insn_type = get_attr_type (insn);
20555 dep_insn_type = get_attr_type (dep_insn);
20556
20557 switch (ix86_tune)
20558 {
20559 case PROCESSOR_PENTIUM:
20560 /* Address Generation Interlock adds a cycle of latency. */
20561 if (insn_type == TYPE_LEA)
20562 {
20563 rtx addr = PATTERN (insn);
20564
20565 if (GET_CODE (addr) == PARALLEL)
20566 addr = XVECEXP (addr, 0, 0);
20567
20568 gcc_assert (GET_CODE (addr) == SET);
20569
20570 addr = SET_SRC (addr);
20571 if (modified_in_p (addr, dep_insn))
20572 cost += 1;
20573 }
20574 else if (ix86_agi_dependent (dep_insn, insn))
20575 cost += 1;
20576
20577 /* ??? Compares pair with jump/setcc. */
20578 if (ix86_flags_dependent (insn, dep_insn, insn_type))
20579 cost = 0;
20580
20581 /* Floating point stores require value to be ready one cycle earlier. */
20582 if (insn_type == TYPE_FMOV
20583 && get_attr_memory (insn) == MEMORY_STORE
20584 && !ix86_agi_dependent (dep_insn, insn))
20585 cost += 1;
20586 break;
20587
20588 case PROCESSOR_PENTIUMPRO:
20589 memory = get_attr_memory (insn);
20590
20591 /* INT->FP conversion is expensive. */
20592 if (get_attr_fp_int_src (dep_insn))
20593 cost += 5;
20594
20595 /* There is one cycle extra latency between an FP op and a store. */
20596 if (insn_type == TYPE_FMOV
20597 && (set = single_set (dep_insn)) != NULL_RTX
20598 && (set2 = single_set (insn)) != NULL_RTX
20599 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
20600 && MEM_P (SET_DEST (set2)))
20601 cost += 1;
20602
20603 /* Show ability of reorder buffer to hide latency of load by executing
20604 in parallel with previous instruction in case
20605 previous instruction is not needed to compute the address. */
20606 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20607 && !ix86_agi_dependent (dep_insn, insn))
20608 {
20609 /* Claim moves to take one cycle, as core can issue one load
20610 at time and the next load can start cycle later. */
20611 if (dep_insn_type == TYPE_IMOV
20612 || dep_insn_type == TYPE_FMOV)
20613 cost = 1;
20614 else if (cost > 1)
20615 cost--;
20616 }
20617 break;
20618
20619 case PROCESSOR_K6:
20620 memory = get_attr_memory (insn);
20621
20622 /* The esp dependency is resolved before the instruction is really
20623 finished. */
20624 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
20625 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
20626 return 1;
20627
20628 /* INT->FP conversion is expensive. */
20629 if (get_attr_fp_int_src (dep_insn))
20630 cost += 5;
20631
20632 /* Show ability of reorder buffer to hide latency of load by executing
20633 in parallel with previous instruction in case
20634 previous instruction is not needed to compute the address. */
20635 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20636 && !ix86_agi_dependent (dep_insn, insn))
20637 {
20638 /* Claim moves to take one cycle, as core can issue one load
20639 at time and the next load can start cycle later. */
20640 if (dep_insn_type == TYPE_IMOV
20641 || dep_insn_type == TYPE_FMOV)
20642 cost = 1;
20643 else if (cost > 2)
20644 cost -= 2;
20645 else
20646 cost = 1;
20647 }
20648 break;
20649
20650 case PROCESSOR_ATHLON:
20651 case PROCESSOR_K8:
20652 case PROCESSOR_AMDFAM10:
20653 case PROCESSOR_BDVER1:
20654 case PROCESSOR_ATOM:
20655 case PROCESSOR_GENERIC32:
20656 case PROCESSOR_GENERIC64:
20657 memory = get_attr_memory (insn);
20658
20659 /* Show ability of reorder buffer to hide latency of load by executing
20660 in parallel with previous instruction in case
20661 previous instruction is not needed to compute the address. */
20662 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20663 && !ix86_agi_dependent (dep_insn, insn))
20664 {
20665 enum attr_unit unit = get_attr_unit (insn);
20666 int loadcost = 3;
20667
20668 /* Because of the difference between the length of integer and
20669 floating unit pipeline preparation stages, the memory operands
20670 for floating point are cheaper.
20671
20672 ??? For Athlon it the difference is most probably 2. */
20673 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
20674 loadcost = 3;
20675 else
20676 loadcost = TARGET_ATHLON ? 2 : 0;
20677
20678 if (cost >= loadcost)
20679 cost -= loadcost;
20680 else
20681 cost = 0;
20682 }
20683
20684 default:
20685 break;
20686 }
20687
20688 return cost;
20689 }
20690
20691 /* How many alternative schedules to try. This should be as wide as the
20692 scheduling freedom in the DFA, but no wider. Making this value too
20693 large results extra work for the scheduler. */
20694
20695 static int
20696 ia32_multipass_dfa_lookahead (void)
20697 {
20698 switch (ix86_tune)
20699 {
20700 case PROCESSOR_PENTIUM:
20701 return 2;
20702
20703 case PROCESSOR_PENTIUMPRO:
20704 case PROCESSOR_K6:
20705 return 1;
20706
20707 default:
20708 return 0;
20709 }
20710 }
20711
20712 \f
20713 /* Compute the alignment given to a constant that is being placed in memory.
20714 EXP is the constant and ALIGN is the alignment that the object would
20715 ordinarily have.
20716 The value of this function is used instead of that alignment to align
20717 the object. */
20718
20719 int
20720 ix86_constant_alignment (tree exp, int align)
20721 {
20722 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
20723 || TREE_CODE (exp) == INTEGER_CST)
20724 {
20725 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
20726 return 64;
20727 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
20728 return 128;
20729 }
20730 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
20731 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
20732 return BITS_PER_WORD;
20733
20734 return align;
20735 }
20736
20737 /* Compute the alignment for a static variable.
20738 TYPE is the data type, and ALIGN is the alignment that
20739 the object would ordinarily have. The value of this function is used
20740 instead of that alignment to align the object. */
20741
20742 int
20743 ix86_data_alignment (tree type, int align)
20744 {
20745 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
20746
20747 if (AGGREGATE_TYPE_P (type)
20748 && TYPE_SIZE (type)
20749 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20750 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
20751 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
20752 && align < max_align)
20753 align = max_align;
20754
20755 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20756 to 16byte boundary. */
20757 if (TARGET_64BIT)
20758 {
20759 if (AGGREGATE_TYPE_P (type)
20760 && TYPE_SIZE (type)
20761 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20762 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
20763 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20764 return 128;
20765 }
20766
20767 if (TREE_CODE (type) == ARRAY_TYPE)
20768 {
20769 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20770 return 64;
20771 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20772 return 128;
20773 }
20774 else if (TREE_CODE (type) == COMPLEX_TYPE)
20775 {
20776
20777 if (TYPE_MODE (type) == DCmode && align < 64)
20778 return 64;
20779 if ((TYPE_MODE (type) == XCmode
20780 || TYPE_MODE (type) == TCmode) && align < 128)
20781 return 128;
20782 }
20783 else if ((TREE_CODE (type) == RECORD_TYPE
20784 || TREE_CODE (type) == UNION_TYPE
20785 || TREE_CODE (type) == QUAL_UNION_TYPE)
20786 && TYPE_FIELDS (type))
20787 {
20788 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20789 return 64;
20790 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20791 return 128;
20792 }
20793 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20794 || TREE_CODE (type) == INTEGER_TYPE)
20795 {
20796 if (TYPE_MODE (type) == DFmode && align < 64)
20797 return 64;
20798 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20799 return 128;
20800 }
20801
20802 return align;
20803 }
20804
20805 /* Compute the alignment for a local variable or a stack slot. EXP is
20806 the data type or decl itself, MODE is the widest mode available and
20807 ALIGN is the alignment that the object would ordinarily have. The
20808 value of this macro is used instead of that alignment to align the
20809 object. */
20810
20811 unsigned int
20812 ix86_local_alignment (tree exp, enum machine_mode mode,
20813 unsigned int align)
20814 {
20815 tree type, decl;
20816
20817 if (exp && DECL_P (exp))
20818 {
20819 type = TREE_TYPE (exp);
20820 decl = exp;
20821 }
20822 else
20823 {
20824 type = exp;
20825 decl = NULL;
20826 }
20827
20828 /* Don't do dynamic stack realignment for long long objects with
20829 -mpreferred-stack-boundary=2. */
20830 if (!TARGET_64BIT
20831 && align == 64
20832 && ix86_preferred_stack_boundary < 64
20833 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
20834 && (!type || !TYPE_USER_ALIGN (type))
20835 && (!decl || !DECL_USER_ALIGN (decl)))
20836 align = 32;
20837
20838 /* If TYPE is NULL, we are allocating a stack slot for caller-save
20839 register in MODE. We will return the largest alignment of XF
20840 and DF. */
20841 if (!type)
20842 {
20843 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
20844 align = GET_MODE_ALIGNMENT (DFmode);
20845 return align;
20846 }
20847
20848 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20849 to 16byte boundary. Exact wording is:
20850
20851 An array uses the same alignment as its elements, except that a local or
20852 global array variable of length at least 16 bytes or
20853 a C99 variable-length array variable always has alignment of at least 16 bytes.
20854
20855 This was added to allow use of aligned SSE instructions at arrays. This
20856 rule is meant for static storage (where compiler can not do the analysis
20857 by itself). We follow it for automatic variables only when convenient.
20858 We fully control everything in the function compiled and functions from
20859 other unit can not rely on the alignment.
20860
20861 Exclude va_list type. It is the common case of local array where
20862 we can not benefit from the alignment. */
20863 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
20864 && TARGET_SSE)
20865 {
20866 if (AGGREGATE_TYPE_P (type)
20867 && (TYPE_MAIN_VARIANT (type)
20868 != TYPE_MAIN_VARIANT (va_list_type_node))
20869 && TYPE_SIZE (type)
20870 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20871 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
20872 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20873 return 128;
20874 }
20875 if (TREE_CODE (type) == ARRAY_TYPE)
20876 {
20877 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20878 return 64;
20879 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20880 return 128;
20881 }
20882 else if (TREE_CODE (type) == COMPLEX_TYPE)
20883 {
20884 if (TYPE_MODE (type) == DCmode && align < 64)
20885 return 64;
20886 if ((TYPE_MODE (type) == XCmode
20887 || TYPE_MODE (type) == TCmode) && align < 128)
20888 return 128;
20889 }
20890 else if ((TREE_CODE (type) == RECORD_TYPE
20891 || TREE_CODE (type) == UNION_TYPE
20892 || TREE_CODE (type) == QUAL_UNION_TYPE)
20893 && TYPE_FIELDS (type))
20894 {
20895 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20896 return 64;
20897 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20898 return 128;
20899 }
20900 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20901 || TREE_CODE (type) == INTEGER_TYPE)
20902 {
20903
20904 if (TYPE_MODE (type) == DFmode && align < 64)
20905 return 64;
20906 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20907 return 128;
20908 }
20909 return align;
20910 }
20911
20912 /* Compute the minimum required alignment for dynamic stack realignment
20913 purposes for a local variable, parameter or a stack slot. EXP is
20914 the data type or decl itself, MODE is its mode and ALIGN is the
20915 alignment that the object would ordinarily have. */
20916
20917 unsigned int
20918 ix86_minimum_alignment (tree exp, enum machine_mode mode,
20919 unsigned int align)
20920 {
20921 tree type, decl;
20922
20923 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
20924 return align;
20925
20926 if (exp && DECL_P (exp))
20927 {
20928 type = TREE_TYPE (exp);
20929 decl = exp;
20930 }
20931 else
20932 {
20933 type = exp;
20934 decl = NULL;
20935 }
20936
20937 /* Don't do dynamic stack realignment for long long objects with
20938 -mpreferred-stack-boundary=2. */
20939 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
20940 && (!type || !TYPE_USER_ALIGN (type))
20941 && (!decl || !DECL_USER_ALIGN (decl)))
20942 return 32;
20943
20944 return align;
20945 }
20946 \f
20947 /* Find a location for the static chain incoming to a nested function.
20948 This is a register, unless all free registers are used by arguments. */
20949
20950 static rtx
20951 ix86_static_chain (const_tree fndecl, bool incoming_p)
20952 {
20953 unsigned regno;
20954
20955 if (!DECL_STATIC_CHAIN (fndecl))
20956 return NULL;
20957
20958 if (TARGET_64BIT)
20959 {
20960 /* We always use R10 in 64-bit mode. */
20961 regno = R10_REG;
20962 }
20963 else
20964 {
20965 tree fntype;
20966 /* By default in 32-bit mode we use ECX to pass the static chain. */
20967 regno = CX_REG;
20968
20969 fntype = TREE_TYPE (fndecl);
20970 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
20971 {
20972 /* Fastcall functions use ecx/edx for arguments, which leaves
20973 us with EAX for the static chain. */
20974 regno = AX_REG;
20975 }
20976 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
20977 {
20978 /* Thiscall functions use ecx for arguments, which leaves
20979 us with EAX for the static chain. */
20980 regno = AX_REG;
20981 }
20982 else if (ix86_function_regparm (fntype, fndecl) == 3)
20983 {
20984 /* For regparm 3, we have no free call-clobbered registers in
20985 which to store the static chain. In order to implement this,
20986 we have the trampoline push the static chain to the stack.
20987 However, we can't push a value below the return address when
20988 we call the nested function directly, so we have to use an
20989 alternate entry point. For this we use ESI, and have the
20990 alternate entry point push ESI, so that things appear the
20991 same once we're executing the nested function. */
20992 if (incoming_p)
20993 {
20994 if (fndecl == current_function_decl)
20995 ix86_static_chain_on_stack = true;
20996 return gen_frame_mem (SImode,
20997 plus_constant (arg_pointer_rtx, -8));
20998 }
20999 regno = SI_REG;
21000 }
21001 }
21002
21003 return gen_rtx_REG (Pmode, regno);
21004 }
21005
21006 /* Emit RTL insns to initialize the variable parts of a trampoline.
21007 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21008 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21009 to be passed to the target function. */
21010
21011 static void
21012 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21013 {
21014 rtx mem, fnaddr;
21015
21016 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21017
21018 if (!TARGET_64BIT)
21019 {
21020 rtx disp, chain;
21021 int opcode;
21022
21023 /* Depending on the static chain location, either load a register
21024 with a constant, or push the constant to the stack. All of the
21025 instructions are the same size. */
21026 chain = ix86_static_chain (fndecl, true);
21027 if (REG_P (chain))
21028 {
21029 if (REGNO (chain) == CX_REG)
21030 opcode = 0xb9;
21031 else if (REGNO (chain) == AX_REG)
21032 opcode = 0xb8;
21033 else
21034 gcc_unreachable ();
21035 }
21036 else
21037 opcode = 0x68;
21038
21039 mem = adjust_address (m_tramp, QImode, 0);
21040 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21041
21042 mem = adjust_address (m_tramp, SImode, 1);
21043 emit_move_insn (mem, chain_value);
21044
21045 /* Compute offset from the end of the jmp to the target function.
21046 In the case in which the trampoline stores the static chain on
21047 the stack, we need to skip the first insn which pushes the
21048 (call-saved) register static chain; this push is 1 byte. */
21049 disp = expand_binop (SImode, sub_optab, fnaddr,
21050 plus_constant (XEXP (m_tramp, 0),
21051 MEM_P (chain) ? 9 : 10),
21052 NULL_RTX, 1, OPTAB_DIRECT);
21053
21054 mem = adjust_address (m_tramp, QImode, 5);
21055 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21056
21057 mem = adjust_address (m_tramp, SImode, 6);
21058 emit_move_insn (mem, disp);
21059 }
21060 else
21061 {
21062 int offset = 0;
21063
21064 /* Load the function address to r11. Try to load address using
21065 the shorter movl instead of movabs. We may want to support
21066 movq for kernel mode, but kernel does not use trampolines at
21067 the moment. */
21068 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21069 {
21070 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21071
21072 mem = adjust_address (m_tramp, HImode, offset);
21073 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21074
21075 mem = adjust_address (m_tramp, SImode, offset + 2);
21076 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21077 offset += 6;
21078 }
21079 else
21080 {
21081 mem = adjust_address (m_tramp, HImode, offset);
21082 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21083
21084 mem = adjust_address (m_tramp, DImode, offset + 2);
21085 emit_move_insn (mem, fnaddr);
21086 offset += 10;
21087 }
21088
21089 /* Load static chain using movabs to r10. */
21090 mem = adjust_address (m_tramp, HImode, offset);
21091 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21092
21093 mem = adjust_address (m_tramp, DImode, offset + 2);
21094 emit_move_insn (mem, chain_value);
21095 offset += 10;
21096
21097 /* Jump to r11; the last (unused) byte is a nop, only there to
21098 pad the write out to a single 32-bit store. */
21099 mem = adjust_address (m_tramp, SImode, offset);
21100 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21101 offset += 4;
21102
21103 gcc_assert (offset <= TRAMPOLINE_SIZE);
21104 }
21105
21106 #ifdef ENABLE_EXECUTE_STACK
21107 #ifdef CHECK_EXECUTE_STACK_ENABLED
21108 if (CHECK_EXECUTE_STACK_ENABLED)
21109 #endif
21110 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21111 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21112 #endif
21113 }
21114 \f
21115 /* The following file contains several enumerations and data structures
21116 built from the definitions in i386-builtin-types.def. */
21117
21118 #include "i386-builtin-types.inc"
21119
21120 /* Table for the ix86 builtin non-function types. */
21121 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21122
21123 /* Retrieve an element from the above table, building some of
21124 the types lazily. */
21125
21126 static tree
21127 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21128 {
21129 unsigned int index;
21130 tree type, itype;
21131
21132 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21133
21134 type = ix86_builtin_type_tab[(int) tcode];
21135 if (type != NULL)
21136 return type;
21137
21138 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21139 if (tcode <= IX86_BT_LAST_VECT)
21140 {
21141 enum machine_mode mode;
21142
21143 index = tcode - IX86_BT_LAST_PRIM - 1;
21144 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21145 mode = ix86_builtin_type_vect_mode[index];
21146
21147 type = build_vector_type_for_mode (itype, mode);
21148 }
21149 else
21150 {
21151 int quals;
21152
21153 index = tcode - IX86_BT_LAST_VECT - 1;
21154 if (tcode <= IX86_BT_LAST_PTR)
21155 quals = TYPE_UNQUALIFIED;
21156 else
21157 quals = TYPE_QUAL_CONST;
21158
21159 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21160 if (quals != TYPE_UNQUALIFIED)
21161 itype = build_qualified_type (itype, quals);
21162
21163 type = build_pointer_type (itype);
21164 }
21165
21166 ix86_builtin_type_tab[(int) tcode] = type;
21167 return type;
21168 }
21169
21170 /* Table for the ix86 builtin function types. */
21171 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21172
21173 /* Retrieve an element from the above table, building some of
21174 the types lazily. */
21175
21176 static tree
21177 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21178 {
21179 tree type;
21180
21181 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21182
21183 type = ix86_builtin_func_type_tab[(int) tcode];
21184 if (type != NULL)
21185 return type;
21186
21187 if (tcode <= IX86_BT_LAST_FUNC)
21188 {
21189 unsigned start = ix86_builtin_func_start[(int) tcode];
21190 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21191 tree rtype, atype, args = void_list_node;
21192 unsigned i;
21193
21194 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21195 for (i = after - 1; i > start; --i)
21196 {
21197 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
21198 args = tree_cons (NULL, atype, args);
21199 }
21200
21201 type = build_function_type (rtype, args);
21202 }
21203 else
21204 {
21205 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
21206 enum ix86_builtin_func_type icode;
21207
21208 icode = ix86_builtin_func_alias_base[index];
21209 type = ix86_get_builtin_func_type (icode);
21210 }
21211
21212 ix86_builtin_func_type_tab[(int) tcode] = type;
21213 return type;
21214 }
21215
21216
21217 /* Codes for all the SSE/MMX builtins. */
21218 enum ix86_builtins
21219 {
21220 IX86_BUILTIN_ADDPS,
21221 IX86_BUILTIN_ADDSS,
21222 IX86_BUILTIN_DIVPS,
21223 IX86_BUILTIN_DIVSS,
21224 IX86_BUILTIN_MULPS,
21225 IX86_BUILTIN_MULSS,
21226 IX86_BUILTIN_SUBPS,
21227 IX86_BUILTIN_SUBSS,
21228
21229 IX86_BUILTIN_CMPEQPS,
21230 IX86_BUILTIN_CMPLTPS,
21231 IX86_BUILTIN_CMPLEPS,
21232 IX86_BUILTIN_CMPGTPS,
21233 IX86_BUILTIN_CMPGEPS,
21234 IX86_BUILTIN_CMPNEQPS,
21235 IX86_BUILTIN_CMPNLTPS,
21236 IX86_BUILTIN_CMPNLEPS,
21237 IX86_BUILTIN_CMPNGTPS,
21238 IX86_BUILTIN_CMPNGEPS,
21239 IX86_BUILTIN_CMPORDPS,
21240 IX86_BUILTIN_CMPUNORDPS,
21241 IX86_BUILTIN_CMPEQSS,
21242 IX86_BUILTIN_CMPLTSS,
21243 IX86_BUILTIN_CMPLESS,
21244 IX86_BUILTIN_CMPNEQSS,
21245 IX86_BUILTIN_CMPNLTSS,
21246 IX86_BUILTIN_CMPNLESS,
21247 IX86_BUILTIN_CMPNGTSS,
21248 IX86_BUILTIN_CMPNGESS,
21249 IX86_BUILTIN_CMPORDSS,
21250 IX86_BUILTIN_CMPUNORDSS,
21251
21252 IX86_BUILTIN_COMIEQSS,
21253 IX86_BUILTIN_COMILTSS,
21254 IX86_BUILTIN_COMILESS,
21255 IX86_BUILTIN_COMIGTSS,
21256 IX86_BUILTIN_COMIGESS,
21257 IX86_BUILTIN_COMINEQSS,
21258 IX86_BUILTIN_UCOMIEQSS,
21259 IX86_BUILTIN_UCOMILTSS,
21260 IX86_BUILTIN_UCOMILESS,
21261 IX86_BUILTIN_UCOMIGTSS,
21262 IX86_BUILTIN_UCOMIGESS,
21263 IX86_BUILTIN_UCOMINEQSS,
21264
21265 IX86_BUILTIN_CVTPI2PS,
21266 IX86_BUILTIN_CVTPS2PI,
21267 IX86_BUILTIN_CVTSI2SS,
21268 IX86_BUILTIN_CVTSI642SS,
21269 IX86_BUILTIN_CVTSS2SI,
21270 IX86_BUILTIN_CVTSS2SI64,
21271 IX86_BUILTIN_CVTTPS2PI,
21272 IX86_BUILTIN_CVTTSS2SI,
21273 IX86_BUILTIN_CVTTSS2SI64,
21274
21275 IX86_BUILTIN_MAXPS,
21276 IX86_BUILTIN_MAXSS,
21277 IX86_BUILTIN_MINPS,
21278 IX86_BUILTIN_MINSS,
21279
21280 IX86_BUILTIN_LOADUPS,
21281 IX86_BUILTIN_STOREUPS,
21282 IX86_BUILTIN_MOVSS,
21283
21284 IX86_BUILTIN_MOVHLPS,
21285 IX86_BUILTIN_MOVLHPS,
21286 IX86_BUILTIN_LOADHPS,
21287 IX86_BUILTIN_LOADLPS,
21288 IX86_BUILTIN_STOREHPS,
21289 IX86_BUILTIN_STORELPS,
21290
21291 IX86_BUILTIN_MASKMOVQ,
21292 IX86_BUILTIN_MOVMSKPS,
21293 IX86_BUILTIN_PMOVMSKB,
21294
21295 IX86_BUILTIN_MOVNTPS,
21296 IX86_BUILTIN_MOVNTQ,
21297
21298 IX86_BUILTIN_LOADDQU,
21299 IX86_BUILTIN_STOREDQU,
21300
21301 IX86_BUILTIN_PACKSSWB,
21302 IX86_BUILTIN_PACKSSDW,
21303 IX86_BUILTIN_PACKUSWB,
21304
21305 IX86_BUILTIN_PADDB,
21306 IX86_BUILTIN_PADDW,
21307 IX86_BUILTIN_PADDD,
21308 IX86_BUILTIN_PADDQ,
21309 IX86_BUILTIN_PADDSB,
21310 IX86_BUILTIN_PADDSW,
21311 IX86_BUILTIN_PADDUSB,
21312 IX86_BUILTIN_PADDUSW,
21313 IX86_BUILTIN_PSUBB,
21314 IX86_BUILTIN_PSUBW,
21315 IX86_BUILTIN_PSUBD,
21316 IX86_BUILTIN_PSUBQ,
21317 IX86_BUILTIN_PSUBSB,
21318 IX86_BUILTIN_PSUBSW,
21319 IX86_BUILTIN_PSUBUSB,
21320 IX86_BUILTIN_PSUBUSW,
21321
21322 IX86_BUILTIN_PAND,
21323 IX86_BUILTIN_PANDN,
21324 IX86_BUILTIN_POR,
21325 IX86_BUILTIN_PXOR,
21326
21327 IX86_BUILTIN_PAVGB,
21328 IX86_BUILTIN_PAVGW,
21329
21330 IX86_BUILTIN_PCMPEQB,
21331 IX86_BUILTIN_PCMPEQW,
21332 IX86_BUILTIN_PCMPEQD,
21333 IX86_BUILTIN_PCMPGTB,
21334 IX86_BUILTIN_PCMPGTW,
21335 IX86_BUILTIN_PCMPGTD,
21336
21337 IX86_BUILTIN_PMADDWD,
21338
21339 IX86_BUILTIN_PMAXSW,
21340 IX86_BUILTIN_PMAXUB,
21341 IX86_BUILTIN_PMINSW,
21342 IX86_BUILTIN_PMINUB,
21343
21344 IX86_BUILTIN_PMULHUW,
21345 IX86_BUILTIN_PMULHW,
21346 IX86_BUILTIN_PMULLW,
21347
21348 IX86_BUILTIN_PSADBW,
21349 IX86_BUILTIN_PSHUFW,
21350
21351 IX86_BUILTIN_PSLLW,
21352 IX86_BUILTIN_PSLLD,
21353 IX86_BUILTIN_PSLLQ,
21354 IX86_BUILTIN_PSRAW,
21355 IX86_BUILTIN_PSRAD,
21356 IX86_BUILTIN_PSRLW,
21357 IX86_BUILTIN_PSRLD,
21358 IX86_BUILTIN_PSRLQ,
21359 IX86_BUILTIN_PSLLWI,
21360 IX86_BUILTIN_PSLLDI,
21361 IX86_BUILTIN_PSLLQI,
21362 IX86_BUILTIN_PSRAWI,
21363 IX86_BUILTIN_PSRADI,
21364 IX86_BUILTIN_PSRLWI,
21365 IX86_BUILTIN_PSRLDI,
21366 IX86_BUILTIN_PSRLQI,
21367
21368 IX86_BUILTIN_PUNPCKHBW,
21369 IX86_BUILTIN_PUNPCKHWD,
21370 IX86_BUILTIN_PUNPCKHDQ,
21371 IX86_BUILTIN_PUNPCKLBW,
21372 IX86_BUILTIN_PUNPCKLWD,
21373 IX86_BUILTIN_PUNPCKLDQ,
21374
21375 IX86_BUILTIN_SHUFPS,
21376
21377 IX86_BUILTIN_RCPPS,
21378 IX86_BUILTIN_RCPSS,
21379 IX86_BUILTIN_RSQRTPS,
21380 IX86_BUILTIN_RSQRTPS_NR,
21381 IX86_BUILTIN_RSQRTSS,
21382 IX86_BUILTIN_RSQRTF,
21383 IX86_BUILTIN_SQRTPS,
21384 IX86_BUILTIN_SQRTPS_NR,
21385 IX86_BUILTIN_SQRTSS,
21386
21387 IX86_BUILTIN_UNPCKHPS,
21388 IX86_BUILTIN_UNPCKLPS,
21389
21390 IX86_BUILTIN_ANDPS,
21391 IX86_BUILTIN_ANDNPS,
21392 IX86_BUILTIN_ORPS,
21393 IX86_BUILTIN_XORPS,
21394
21395 IX86_BUILTIN_EMMS,
21396 IX86_BUILTIN_LDMXCSR,
21397 IX86_BUILTIN_STMXCSR,
21398 IX86_BUILTIN_SFENCE,
21399
21400 /* 3DNow! Original */
21401 IX86_BUILTIN_FEMMS,
21402 IX86_BUILTIN_PAVGUSB,
21403 IX86_BUILTIN_PF2ID,
21404 IX86_BUILTIN_PFACC,
21405 IX86_BUILTIN_PFADD,
21406 IX86_BUILTIN_PFCMPEQ,
21407 IX86_BUILTIN_PFCMPGE,
21408 IX86_BUILTIN_PFCMPGT,
21409 IX86_BUILTIN_PFMAX,
21410 IX86_BUILTIN_PFMIN,
21411 IX86_BUILTIN_PFMUL,
21412 IX86_BUILTIN_PFRCP,
21413 IX86_BUILTIN_PFRCPIT1,
21414 IX86_BUILTIN_PFRCPIT2,
21415 IX86_BUILTIN_PFRSQIT1,
21416 IX86_BUILTIN_PFRSQRT,
21417 IX86_BUILTIN_PFSUB,
21418 IX86_BUILTIN_PFSUBR,
21419 IX86_BUILTIN_PI2FD,
21420 IX86_BUILTIN_PMULHRW,
21421
21422 /* 3DNow! Athlon Extensions */
21423 IX86_BUILTIN_PF2IW,
21424 IX86_BUILTIN_PFNACC,
21425 IX86_BUILTIN_PFPNACC,
21426 IX86_BUILTIN_PI2FW,
21427 IX86_BUILTIN_PSWAPDSI,
21428 IX86_BUILTIN_PSWAPDSF,
21429
21430 /* SSE2 */
21431 IX86_BUILTIN_ADDPD,
21432 IX86_BUILTIN_ADDSD,
21433 IX86_BUILTIN_DIVPD,
21434 IX86_BUILTIN_DIVSD,
21435 IX86_BUILTIN_MULPD,
21436 IX86_BUILTIN_MULSD,
21437 IX86_BUILTIN_SUBPD,
21438 IX86_BUILTIN_SUBSD,
21439
21440 IX86_BUILTIN_CMPEQPD,
21441 IX86_BUILTIN_CMPLTPD,
21442 IX86_BUILTIN_CMPLEPD,
21443 IX86_BUILTIN_CMPGTPD,
21444 IX86_BUILTIN_CMPGEPD,
21445 IX86_BUILTIN_CMPNEQPD,
21446 IX86_BUILTIN_CMPNLTPD,
21447 IX86_BUILTIN_CMPNLEPD,
21448 IX86_BUILTIN_CMPNGTPD,
21449 IX86_BUILTIN_CMPNGEPD,
21450 IX86_BUILTIN_CMPORDPD,
21451 IX86_BUILTIN_CMPUNORDPD,
21452 IX86_BUILTIN_CMPEQSD,
21453 IX86_BUILTIN_CMPLTSD,
21454 IX86_BUILTIN_CMPLESD,
21455 IX86_BUILTIN_CMPNEQSD,
21456 IX86_BUILTIN_CMPNLTSD,
21457 IX86_BUILTIN_CMPNLESD,
21458 IX86_BUILTIN_CMPORDSD,
21459 IX86_BUILTIN_CMPUNORDSD,
21460
21461 IX86_BUILTIN_COMIEQSD,
21462 IX86_BUILTIN_COMILTSD,
21463 IX86_BUILTIN_COMILESD,
21464 IX86_BUILTIN_COMIGTSD,
21465 IX86_BUILTIN_COMIGESD,
21466 IX86_BUILTIN_COMINEQSD,
21467 IX86_BUILTIN_UCOMIEQSD,
21468 IX86_BUILTIN_UCOMILTSD,
21469 IX86_BUILTIN_UCOMILESD,
21470 IX86_BUILTIN_UCOMIGTSD,
21471 IX86_BUILTIN_UCOMIGESD,
21472 IX86_BUILTIN_UCOMINEQSD,
21473
21474 IX86_BUILTIN_MAXPD,
21475 IX86_BUILTIN_MAXSD,
21476 IX86_BUILTIN_MINPD,
21477 IX86_BUILTIN_MINSD,
21478
21479 IX86_BUILTIN_ANDPD,
21480 IX86_BUILTIN_ANDNPD,
21481 IX86_BUILTIN_ORPD,
21482 IX86_BUILTIN_XORPD,
21483
21484 IX86_BUILTIN_SQRTPD,
21485 IX86_BUILTIN_SQRTSD,
21486
21487 IX86_BUILTIN_UNPCKHPD,
21488 IX86_BUILTIN_UNPCKLPD,
21489
21490 IX86_BUILTIN_SHUFPD,
21491
21492 IX86_BUILTIN_LOADUPD,
21493 IX86_BUILTIN_STOREUPD,
21494 IX86_BUILTIN_MOVSD,
21495
21496 IX86_BUILTIN_LOADHPD,
21497 IX86_BUILTIN_LOADLPD,
21498
21499 IX86_BUILTIN_CVTDQ2PD,
21500 IX86_BUILTIN_CVTDQ2PS,
21501
21502 IX86_BUILTIN_CVTPD2DQ,
21503 IX86_BUILTIN_CVTPD2PI,
21504 IX86_BUILTIN_CVTPD2PS,
21505 IX86_BUILTIN_CVTTPD2DQ,
21506 IX86_BUILTIN_CVTTPD2PI,
21507
21508 IX86_BUILTIN_CVTPI2PD,
21509 IX86_BUILTIN_CVTSI2SD,
21510 IX86_BUILTIN_CVTSI642SD,
21511
21512 IX86_BUILTIN_CVTSD2SI,
21513 IX86_BUILTIN_CVTSD2SI64,
21514 IX86_BUILTIN_CVTSD2SS,
21515 IX86_BUILTIN_CVTSS2SD,
21516 IX86_BUILTIN_CVTTSD2SI,
21517 IX86_BUILTIN_CVTTSD2SI64,
21518
21519 IX86_BUILTIN_CVTPS2DQ,
21520 IX86_BUILTIN_CVTPS2PD,
21521 IX86_BUILTIN_CVTTPS2DQ,
21522
21523 IX86_BUILTIN_MOVNTI,
21524 IX86_BUILTIN_MOVNTPD,
21525 IX86_BUILTIN_MOVNTDQ,
21526
21527 IX86_BUILTIN_MOVQ128,
21528
21529 /* SSE2 MMX */
21530 IX86_BUILTIN_MASKMOVDQU,
21531 IX86_BUILTIN_MOVMSKPD,
21532 IX86_BUILTIN_PMOVMSKB128,
21533
21534 IX86_BUILTIN_PACKSSWB128,
21535 IX86_BUILTIN_PACKSSDW128,
21536 IX86_BUILTIN_PACKUSWB128,
21537
21538 IX86_BUILTIN_PADDB128,
21539 IX86_BUILTIN_PADDW128,
21540 IX86_BUILTIN_PADDD128,
21541 IX86_BUILTIN_PADDQ128,
21542 IX86_BUILTIN_PADDSB128,
21543 IX86_BUILTIN_PADDSW128,
21544 IX86_BUILTIN_PADDUSB128,
21545 IX86_BUILTIN_PADDUSW128,
21546 IX86_BUILTIN_PSUBB128,
21547 IX86_BUILTIN_PSUBW128,
21548 IX86_BUILTIN_PSUBD128,
21549 IX86_BUILTIN_PSUBQ128,
21550 IX86_BUILTIN_PSUBSB128,
21551 IX86_BUILTIN_PSUBSW128,
21552 IX86_BUILTIN_PSUBUSB128,
21553 IX86_BUILTIN_PSUBUSW128,
21554
21555 IX86_BUILTIN_PAND128,
21556 IX86_BUILTIN_PANDN128,
21557 IX86_BUILTIN_POR128,
21558 IX86_BUILTIN_PXOR128,
21559
21560 IX86_BUILTIN_PAVGB128,
21561 IX86_BUILTIN_PAVGW128,
21562
21563 IX86_BUILTIN_PCMPEQB128,
21564 IX86_BUILTIN_PCMPEQW128,
21565 IX86_BUILTIN_PCMPEQD128,
21566 IX86_BUILTIN_PCMPGTB128,
21567 IX86_BUILTIN_PCMPGTW128,
21568 IX86_BUILTIN_PCMPGTD128,
21569
21570 IX86_BUILTIN_PMADDWD128,
21571
21572 IX86_BUILTIN_PMAXSW128,
21573 IX86_BUILTIN_PMAXUB128,
21574 IX86_BUILTIN_PMINSW128,
21575 IX86_BUILTIN_PMINUB128,
21576
21577 IX86_BUILTIN_PMULUDQ,
21578 IX86_BUILTIN_PMULUDQ128,
21579 IX86_BUILTIN_PMULHUW128,
21580 IX86_BUILTIN_PMULHW128,
21581 IX86_BUILTIN_PMULLW128,
21582
21583 IX86_BUILTIN_PSADBW128,
21584 IX86_BUILTIN_PSHUFHW,
21585 IX86_BUILTIN_PSHUFLW,
21586 IX86_BUILTIN_PSHUFD,
21587
21588 IX86_BUILTIN_PSLLDQI128,
21589 IX86_BUILTIN_PSLLWI128,
21590 IX86_BUILTIN_PSLLDI128,
21591 IX86_BUILTIN_PSLLQI128,
21592 IX86_BUILTIN_PSRAWI128,
21593 IX86_BUILTIN_PSRADI128,
21594 IX86_BUILTIN_PSRLDQI128,
21595 IX86_BUILTIN_PSRLWI128,
21596 IX86_BUILTIN_PSRLDI128,
21597 IX86_BUILTIN_PSRLQI128,
21598
21599 IX86_BUILTIN_PSLLDQ128,
21600 IX86_BUILTIN_PSLLW128,
21601 IX86_BUILTIN_PSLLD128,
21602 IX86_BUILTIN_PSLLQ128,
21603 IX86_BUILTIN_PSRAW128,
21604 IX86_BUILTIN_PSRAD128,
21605 IX86_BUILTIN_PSRLW128,
21606 IX86_BUILTIN_PSRLD128,
21607 IX86_BUILTIN_PSRLQ128,
21608
21609 IX86_BUILTIN_PUNPCKHBW128,
21610 IX86_BUILTIN_PUNPCKHWD128,
21611 IX86_BUILTIN_PUNPCKHDQ128,
21612 IX86_BUILTIN_PUNPCKHQDQ128,
21613 IX86_BUILTIN_PUNPCKLBW128,
21614 IX86_BUILTIN_PUNPCKLWD128,
21615 IX86_BUILTIN_PUNPCKLDQ128,
21616 IX86_BUILTIN_PUNPCKLQDQ128,
21617
21618 IX86_BUILTIN_CLFLUSH,
21619 IX86_BUILTIN_MFENCE,
21620 IX86_BUILTIN_LFENCE,
21621
21622 IX86_BUILTIN_BSRSI,
21623 IX86_BUILTIN_BSRDI,
21624 IX86_BUILTIN_RDPMC,
21625 IX86_BUILTIN_RDTSC,
21626 IX86_BUILTIN_RDTSCP,
21627 IX86_BUILTIN_ROLQI,
21628 IX86_BUILTIN_ROLHI,
21629 IX86_BUILTIN_RORQI,
21630 IX86_BUILTIN_RORHI,
21631
21632 /* SSE3. */
21633 IX86_BUILTIN_ADDSUBPS,
21634 IX86_BUILTIN_HADDPS,
21635 IX86_BUILTIN_HSUBPS,
21636 IX86_BUILTIN_MOVSHDUP,
21637 IX86_BUILTIN_MOVSLDUP,
21638 IX86_BUILTIN_ADDSUBPD,
21639 IX86_BUILTIN_HADDPD,
21640 IX86_BUILTIN_HSUBPD,
21641 IX86_BUILTIN_LDDQU,
21642
21643 IX86_BUILTIN_MONITOR,
21644 IX86_BUILTIN_MWAIT,
21645
21646 /* SSSE3. */
21647 IX86_BUILTIN_PHADDW,
21648 IX86_BUILTIN_PHADDD,
21649 IX86_BUILTIN_PHADDSW,
21650 IX86_BUILTIN_PHSUBW,
21651 IX86_BUILTIN_PHSUBD,
21652 IX86_BUILTIN_PHSUBSW,
21653 IX86_BUILTIN_PMADDUBSW,
21654 IX86_BUILTIN_PMULHRSW,
21655 IX86_BUILTIN_PSHUFB,
21656 IX86_BUILTIN_PSIGNB,
21657 IX86_BUILTIN_PSIGNW,
21658 IX86_BUILTIN_PSIGND,
21659 IX86_BUILTIN_PALIGNR,
21660 IX86_BUILTIN_PABSB,
21661 IX86_BUILTIN_PABSW,
21662 IX86_BUILTIN_PABSD,
21663
21664 IX86_BUILTIN_PHADDW128,
21665 IX86_BUILTIN_PHADDD128,
21666 IX86_BUILTIN_PHADDSW128,
21667 IX86_BUILTIN_PHSUBW128,
21668 IX86_BUILTIN_PHSUBD128,
21669 IX86_BUILTIN_PHSUBSW128,
21670 IX86_BUILTIN_PMADDUBSW128,
21671 IX86_BUILTIN_PMULHRSW128,
21672 IX86_BUILTIN_PSHUFB128,
21673 IX86_BUILTIN_PSIGNB128,
21674 IX86_BUILTIN_PSIGNW128,
21675 IX86_BUILTIN_PSIGND128,
21676 IX86_BUILTIN_PALIGNR128,
21677 IX86_BUILTIN_PABSB128,
21678 IX86_BUILTIN_PABSW128,
21679 IX86_BUILTIN_PABSD128,
21680
21681 /* AMDFAM10 - SSE4A New Instructions. */
21682 IX86_BUILTIN_MOVNTSD,
21683 IX86_BUILTIN_MOVNTSS,
21684 IX86_BUILTIN_EXTRQI,
21685 IX86_BUILTIN_EXTRQ,
21686 IX86_BUILTIN_INSERTQI,
21687 IX86_BUILTIN_INSERTQ,
21688
21689 /* SSE4.1. */
21690 IX86_BUILTIN_BLENDPD,
21691 IX86_BUILTIN_BLENDPS,
21692 IX86_BUILTIN_BLENDVPD,
21693 IX86_BUILTIN_BLENDVPS,
21694 IX86_BUILTIN_PBLENDVB128,
21695 IX86_BUILTIN_PBLENDW128,
21696
21697 IX86_BUILTIN_DPPD,
21698 IX86_BUILTIN_DPPS,
21699
21700 IX86_BUILTIN_INSERTPS128,
21701
21702 IX86_BUILTIN_MOVNTDQA,
21703 IX86_BUILTIN_MPSADBW128,
21704 IX86_BUILTIN_PACKUSDW128,
21705 IX86_BUILTIN_PCMPEQQ,
21706 IX86_BUILTIN_PHMINPOSUW128,
21707
21708 IX86_BUILTIN_PMAXSB128,
21709 IX86_BUILTIN_PMAXSD128,
21710 IX86_BUILTIN_PMAXUD128,
21711 IX86_BUILTIN_PMAXUW128,
21712
21713 IX86_BUILTIN_PMINSB128,
21714 IX86_BUILTIN_PMINSD128,
21715 IX86_BUILTIN_PMINUD128,
21716 IX86_BUILTIN_PMINUW128,
21717
21718 IX86_BUILTIN_PMOVSXBW128,
21719 IX86_BUILTIN_PMOVSXBD128,
21720 IX86_BUILTIN_PMOVSXBQ128,
21721 IX86_BUILTIN_PMOVSXWD128,
21722 IX86_BUILTIN_PMOVSXWQ128,
21723 IX86_BUILTIN_PMOVSXDQ128,
21724
21725 IX86_BUILTIN_PMOVZXBW128,
21726 IX86_BUILTIN_PMOVZXBD128,
21727 IX86_BUILTIN_PMOVZXBQ128,
21728 IX86_BUILTIN_PMOVZXWD128,
21729 IX86_BUILTIN_PMOVZXWQ128,
21730 IX86_BUILTIN_PMOVZXDQ128,
21731
21732 IX86_BUILTIN_PMULDQ128,
21733 IX86_BUILTIN_PMULLD128,
21734
21735 IX86_BUILTIN_ROUNDPD,
21736 IX86_BUILTIN_ROUNDPS,
21737 IX86_BUILTIN_ROUNDSD,
21738 IX86_BUILTIN_ROUNDSS,
21739
21740 IX86_BUILTIN_PTESTZ,
21741 IX86_BUILTIN_PTESTC,
21742 IX86_BUILTIN_PTESTNZC,
21743
21744 IX86_BUILTIN_VEC_INIT_V2SI,
21745 IX86_BUILTIN_VEC_INIT_V4HI,
21746 IX86_BUILTIN_VEC_INIT_V8QI,
21747 IX86_BUILTIN_VEC_EXT_V2DF,
21748 IX86_BUILTIN_VEC_EXT_V2DI,
21749 IX86_BUILTIN_VEC_EXT_V4SF,
21750 IX86_BUILTIN_VEC_EXT_V4SI,
21751 IX86_BUILTIN_VEC_EXT_V8HI,
21752 IX86_BUILTIN_VEC_EXT_V2SI,
21753 IX86_BUILTIN_VEC_EXT_V4HI,
21754 IX86_BUILTIN_VEC_EXT_V16QI,
21755 IX86_BUILTIN_VEC_SET_V2DI,
21756 IX86_BUILTIN_VEC_SET_V4SF,
21757 IX86_BUILTIN_VEC_SET_V4SI,
21758 IX86_BUILTIN_VEC_SET_V8HI,
21759 IX86_BUILTIN_VEC_SET_V4HI,
21760 IX86_BUILTIN_VEC_SET_V16QI,
21761
21762 IX86_BUILTIN_VEC_PACK_SFIX,
21763
21764 /* SSE4.2. */
21765 IX86_BUILTIN_CRC32QI,
21766 IX86_BUILTIN_CRC32HI,
21767 IX86_BUILTIN_CRC32SI,
21768 IX86_BUILTIN_CRC32DI,
21769
21770 IX86_BUILTIN_PCMPESTRI128,
21771 IX86_BUILTIN_PCMPESTRM128,
21772 IX86_BUILTIN_PCMPESTRA128,
21773 IX86_BUILTIN_PCMPESTRC128,
21774 IX86_BUILTIN_PCMPESTRO128,
21775 IX86_BUILTIN_PCMPESTRS128,
21776 IX86_BUILTIN_PCMPESTRZ128,
21777 IX86_BUILTIN_PCMPISTRI128,
21778 IX86_BUILTIN_PCMPISTRM128,
21779 IX86_BUILTIN_PCMPISTRA128,
21780 IX86_BUILTIN_PCMPISTRC128,
21781 IX86_BUILTIN_PCMPISTRO128,
21782 IX86_BUILTIN_PCMPISTRS128,
21783 IX86_BUILTIN_PCMPISTRZ128,
21784
21785 IX86_BUILTIN_PCMPGTQ,
21786
21787 /* AES instructions */
21788 IX86_BUILTIN_AESENC128,
21789 IX86_BUILTIN_AESENCLAST128,
21790 IX86_BUILTIN_AESDEC128,
21791 IX86_BUILTIN_AESDECLAST128,
21792 IX86_BUILTIN_AESIMC128,
21793 IX86_BUILTIN_AESKEYGENASSIST128,
21794
21795 /* PCLMUL instruction */
21796 IX86_BUILTIN_PCLMULQDQ128,
21797
21798 /* AVX */
21799 IX86_BUILTIN_ADDPD256,
21800 IX86_BUILTIN_ADDPS256,
21801 IX86_BUILTIN_ADDSUBPD256,
21802 IX86_BUILTIN_ADDSUBPS256,
21803 IX86_BUILTIN_ANDPD256,
21804 IX86_BUILTIN_ANDPS256,
21805 IX86_BUILTIN_ANDNPD256,
21806 IX86_BUILTIN_ANDNPS256,
21807 IX86_BUILTIN_BLENDPD256,
21808 IX86_BUILTIN_BLENDPS256,
21809 IX86_BUILTIN_BLENDVPD256,
21810 IX86_BUILTIN_BLENDVPS256,
21811 IX86_BUILTIN_DIVPD256,
21812 IX86_BUILTIN_DIVPS256,
21813 IX86_BUILTIN_DPPS256,
21814 IX86_BUILTIN_HADDPD256,
21815 IX86_BUILTIN_HADDPS256,
21816 IX86_BUILTIN_HSUBPD256,
21817 IX86_BUILTIN_HSUBPS256,
21818 IX86_BUILTIN_MAXPD256,
21819 IX86_BUILTIN_MAXPS256,
21820 IX86_BUILTIN_MINPD256,
21821 IX86_BUILTIN_MINPS256,
21822 IX86_BUILTIN_MULPD256,
21823 IX86_BUILTIN_MULPS256,
21824 IX86_BUILTIN_ORPD256,
21825 IX86_BUILTIN_ORPS256,
21826 IX86_BUILTIN_SHUFPD256,
21827 IX86_BUILTIN_SHUFPS256,
21828 IX86_BUILTIN_SUBPD256,
21829 IX86_BUILTIN_SUBPS256,
21830 IX86_BUILTIN_XORPD256,
21831 IX86_BUILTIN_XORPS256,
21832 IX86_BUILTIN_CMPSD,
21833 IX86_BUILTIN_CMPSS,
21834 IX86_BUILTIN_CMPPD,
21835 IX86_BUILTIN_CMPPS,
21836 IX86_BUILTIN_CMPPD256,
21837 IX86_BUILTIN_CMPPS256,
21838 IX86_BUILTIN_CVTDQ2PD256,
21839 IX86_BUILTIN_CVTDQ2PS256,
21840 IX86_BUILTIN_CVTPD2PS256,
21841 IX86_BUILTIN_CVTPS2DQ256,
21842 IX86_BUILTIN_CVTPS2PD256,
21843 IX86_BUILTIN_CVTTPD2DQ256,
21844 IX86_BUILTIN_CVTPD2DQ256,
21845 IX86_BUILTIN_CVTTPS2DQ256,
21846 IX86_BUILTIN_EXTRACTF128PD256,
21847 IX86_BUILTIN_EXTRACTF128PS256,
21848 IX86_BUILTIN_EXTRACTF128SI256,
21849 IX86_BUILTIN_VZEROALL,
21850 IX86_BUILTIN_VZEROUPPER,
21851 IX86_BUILTIN_VPERMILVARPD,
21852 IX86_BUILTIN_VPERMILVARPS,
21853 IX86_BUILTIN_VPERMILVARPD256,
21854 IX86_BUILTIN_VPERMILVARPS256,
21855 IX86_BUILTIN_VPERMILPD,
21856 IX86_BUILTIN_VPERMILPS,
21857 IX86_BUILTIN_VPERMILPD256,
21858 IX86_BUILTIN_VPERMILPS256,
21859 IX86_BUILTIN_VPERMIL2PD,
21860 IX86_BUILTIN_VPERMIL2PS,
21861 IX86_BUILTIN_VPERMIL2PD256,
21862 IX86_BUILTIN_VPERMIL2PS256,
21863 IX86_BUILTIN_VPERM2F128PD256,
21864 IX86_BUILTIN_VPERM2F128PS256,
21865 IX86_BUILTIN_VPERM2F128SI256,
21866 IX86_BUILTIN_VBROADCASTSS,
21867 IX86_BUILTIN_VBROADCASTSD256,
21868 IX86_BUILTIN_VBROADCASTSS256,
21869 IX86_BUILTIN_VBROADCASTPD256,
21870 IX86_BUILTIN_VBROADCASTPS256,
21871 IX86_BUILTIN_VINSERTF128PD256,
21872 IX86_BUILTIN_VINSERTF128PS256,
21873 IX86_BUILTIN_VINSERTF128SI256,
21874 IX86_BUILTIN_LOADUPD256,
21875 IX86_BUILTIN_LOADUPS256,
21876 IX86_BUILTIN_STOREUPD256,
21877 IX86_BUILTIN_STOREUPS256,
21878 IX86_BUILTIN_LDDQU256,
21879 IX86_BUILTIN_MOVNTDQ256,
21880 IX86_BUILTIN_MOVNTPD256,
21881 IX86_BUILTIN_MOVNTPS256,
21882 IX86_BUILTIN_LOADDQU256,
21883 IX86_BUILTIN_STOREDQU256,
21884 IX86_BUILTIN_MASKLOADPD,
21885 IX86_BUILTIN_MASKLOADPS,
21886 IX86_BUILTIN_MASKSTOREPD,
21887 IX86_BUILTIN_MASKSTOREPS,
21888 IX86_BUILTIN_MASKLOADPD256,
21889 IX86_BUILTIN_MASKLOADPS256,
21890 IX86_BUILTIN_MASKSTOREPD256,
21891 IX86_BUILTIN_MASKSTOREPS256,
21892 IX86_BUILTIN_MOVSHDUP256,
21893 IX86_BUILTIN_MOVSLDUP256,
21894 IX86_BUILTIN_MOVDDUP256,
21895
21896 IX86_BUILTIN_SQRTPD256,
21897 IX86_BUILTIN_SQRTPS256,
21898 IX86_BUILTIN_SQRTPS_NR256,
21899 IX86_BUILTIN_RSQRTPS256,
21900 IX86_BUILTIN_RSQRTPS_NR256,
21901
21902 IX86_BUILTIN_RCPPS256,
21903
21904 IX86_BUILTIN_ROUNDPD256,
21905 IX86_BUILTIN_ROUNDPS256,
21906
21907 IX86_BUILTIN_UNPCKHPD256,
21908 IX86_BUILTIN_UNPCKLPD256,
21909 IX86_BUILTIN_UNPCKHPS256,
21910 IX86_BUILTIN_UNPCKLPS256,
21911
21912 IX86_BUILTIN_SI256_SI,
21913 IX86_BUILTIN_PS256_PS,
21914 IX86_BUILTIN_PD256_PD,
21915 IX86_BUILTIN_SI_SI256,
21916 IX86_BUILTIN_PS_PS256,
21917 IX86_BUILTIN_PD_PD256,
21918
21919 IX86_BUILTIN_VTESTZPD,
21920 IX86_BUILTIN_VTESTCPD,
21921 IX86_BUILTIN_VTESTNZCPD,
21922 IX86_BUILTIN_VTESTZPS,
21923 IX86_BUILTIN_VTESTCPS,
21924 IX86_BUILTIN_VTESTNZCPS,
21925 IX86_BUILTIN_VTESTZPD256,
21926 IX86_BUILTIN_VTESTCPD256,
21927 IX86_BUILTIN_VTESTNZCPD256,
21928 IX86_BUILTIN_VTESTZPS256,
21929 IX86_BUILTIN_VTESTCPS256,
21930 IX86_BUILTIN_VTESTNZCPS256,
21931 IX86_BUILTIN_PTESTZ256,
21932 IX86_BUILTIN_PTESTC256,
21933 IX86_BUILTIN_PTESTNZC256,
21934
21935 IX86_BUILTIN_MOVMSKPD256,
21936 IX86_BUILTIN_MOVMSKPS256,
21937
21938 /* TFmode support builtins. */
21939 IX86_BUILTIN_INFQ,
21940 IX86_BUILTIN_HUGE_VALQ,
21941 IX86_BUILTIN_FABSQ,
21942 IX86_BUILTIN_COPYSIGNQ,
21943
21944 /* Vectorizer support builtins. */
21945 IX86_BUILTIN_CPYSGNPS,
21946 IX86_BUILTIN_CPYSGNPD,
21947
21948 IX86_BUILTIN_CVTUDQ2PS,
21949
21950 IX86_BUILTIN_VEC_PERM_V2DF,
21951 IX86_BUILTIN_VEC_PERM_V4SF,
21952 IX86_BUILTIN_VEC_PERM_V2DI,
21953 IX86_BUILTIN_VEC_PERM_V4SI,
21954 IX86_BUILTIN_VEC_PERM_V8HI,
21955 IX86_BUILTIN_VEC_PERM_V16QI,
21956 IX86_BUILTIN_VEC_PERM_V2DI_U,
21957 IX86_BUILTIN_VEC_PERM_V4SI_U,
21958 IX86_BUILTIN_VEC_PERM_V8HI_U,
21959 IX86_BUILTIN_VEC_PERM_V16QI_U,
21960 IX86_BUILTIN_VEC_PERM_V4DF,
21961 IX86_BUILTIN_VEC_PERM_V8SF,
21962
21963 /* FMA4 and XOP instructions. */
21964 IX86_BUILTIN_VFMADDSS,
21965 IX86_BUILTIN_VFMADDSD,
21966 IX86_BUILTIN_VFMADDPS,
21967 IX86_BUILTIN_VFMADDPD,
21968 IX86_BUILTIN_VFMSUBSS,
21969 IX86_BUILTIN_VFMSUBSD,
21970 IX86_BUILTIN_VFMSUBPS,
21971 IX86_BUILTIN_VFMSUBPD,
21972 IX86_BUILTIN_VFMADDSUBPS,
21973 IX86_BUILTIN_VFMADDSUBPD,
21974 IX86_BUILTIN_VFMSUBADDPS,
21975 IX86_BUILTIN_VFMSUBADDPD,
21976 IX86_BUILTIN_VFNMADDSS,
21977 IX86_BUILTIN_VFNMADDSD,
21978 IX86_BUILTIN_VFNMADDPS,
21979 IX86_BUILTIN_VFNMADDPD,
21980 IX86_BUILTIN_VFNMSUBSS,
21981 IX86_BUILTIN_VFNMSUBSD,
21982 IX86_BUILTIN_VFNMSUBPS,
21983 IX86_BUILTIN_VFNMSUBPD,
21984 IX86_BUILTIN_VFMADDPS256,
21985 IX86_BUILTIN_VFMADDPD256,
21986 IX86_BUILTIN_VFMSUBPS256,
21987 IX86_BUILTIN_VFMSUBPD256,
21988 IX86_BUILTIN_VFMADDSUBPS256,
21989 IX86_BUILTIN_VFMADDSUBPD256,
21990 IX86_BUILTIN_VFMSUBADDPS256,
21991 IX86_BUILTIN_VFMSUBADDPD256,
21992 IX86_BUILTIN_VFNMADDPS256,
21993 IX86_BUILTIN_VFNMADDPD256,
21994 IX86_BUILTIN_VFNMSUBPS256,
21995 IX86_BUILTIN_VFNMSUBPD256,
21996
21997 IX86_BUILTIN_VPCMOV,
21998 IX86_BUILTIN_VPCMOV_V2DI,
21999 IX86_BUILTIN_VPCMOV_V4SI,
22000 IX86_BUILTIN_VPCMOV_V8HI,
22001 IX86_BUILTIN_VPCMOV_V16QI,
22002 IX86_BUILTIN_VPCMOV_V4SF,
22003 IX86_BUILTIN_VPCMOV_V2DF,
22004 IX86_BUILTIN_VPCMOV256,
22005 IX86_BUILTIN_VPCMOV_V4DI256,
22006 IX86_BUILTIN_VPCMOV_V8SI256,
22007 IX86_BUILTIN_VPCMOV_V16HI256,
22008 IX86_BUILTIN_VPCMOV_V32QI256,
22009 IX86_BUILTIN_VPCMOV_V8SF256,
22010 IX86_BUILTIN_VPCMOV_V4DF256,
22011
22012 IX86_BUILTIN_VPPERM,
22013
22014 IX86_BUILTIN_VPMACSSWW,
22015 IX86_BUILTIN_VPMACSWW,
22016 IX86_BUILTIN_VPMACSSWD,
22017 IX86_BUILTIN_VPMACSWD,
22018 IX86_BUILTIN_VPMACSSDD,
22019 IX86_BUILTIN_VPMACSDD,
22020 IX86_BUILTIN_VPMACSSDQL,
22021 IX86_BUILTIN_VPMACSSDQH,
22022 IX86_BUILTIN_VPMACSDQL,
22023 IX86_BUILTIN_VPMACSDQH,
22024 IX86_BUILTIN_VPMADCSSWD,
22025 IX86_BUILTIN_VPMADCSWD,
22026
22027 IX86_BUILTIN_VPHADDBW,
22028 IX86_BUILTIN_VPHADDBD,
22029 IX86_BUILTIN_VPHADDBQ,
22030 IX86_BUILTIN_VPHADDWD,
22031 IX86_BUILTIN_VPHADDWQ,
22032 IX86_BUILTIN_VPHADDDQ,
22033 IX86_BUILTIN_VPHADDUBW,
22034 IX86_BUILTIN_VPHADDUBD,
22035 IX86_BUILTIN_VPHADDUBQ,
22036 IX86_BUILTIN_VPHADDUWD,
22037 IX86_BUILTIN_VPHADDUWQ,
22038 IX86_BUILTIN_VPHADDUDQ,
22039 IX86_BUILTIN_VPHSUBBW,
22040 IX86_BUILTIN_VPHSUBWD,
22041 IX86_BUILTIN_VPHSUBDQ,
22042
22043 IX86_BUILTIN_VPROTB,
22044 IX86_BUILTIN_VPROTW,
22045 IX86_BUILTIN_VPROTD,
22046 IX86_BUILTIN_VPROTQ,
22047 IX86_BUILTIN_VPROTB_IMM,
22048 IX86_BUILTIN_VPROTW_IMM,
22049 IX86_BUILTIN_VPROTD_IMM,
22050 IX86_BUILTIN_VPROTQ_IMM,
22051
22052 IX86_BUILTIN_VPSHLB,
22053 IX86_BUILTIN_VPSHLW,
22054 IX86_BUILTIN_VPSHLD,
22055 IX86_BUILTIN_VPSHLQ,
22056 IX86_BUILTIN_VPSHAB,
22057 IX86_BUILTIN_VPSHAW,
22058 IX86_BUILTIN_VPSHAD,
22059 IX86_BUILTIN_VPSHAQ,
22060
22061 IX86_BUILTIN_VFRCZSS,
22062 IX86_BUILTIN_VFRCZSD,
22063 IX86_BUILTIN_VFRCZPS,
22064 IX86_BUILTIN_VFRCZPD,
22065 IX86_BUILTIN_VFRCZPS256,
22066 IX86_BUILTIN_VFRCZPD256,
22067
22068 IX86_BUILTIN_VPCOMEQUB,
22069 IX86_BUILTIN_VPCOMNEUB,
22070 IX86_BUILTIN_VPCOMLTUB,
22071 IX86_BUILTIN_VPCOMLEUB,
22072 IX86_BUILTIN_VPCOMGTUB,
22073 IX86_BUILTIN_VPCOMGEUB,
22074 IX86_BUILTIN_VPCOMFALSEUB,
22075 IX86_BUILTIN_VPCOMTRUEUB,
22076
22077 IX86_BUILTIN_VPCOMEQUW,
22078 IX86_BUILTIN_VPCOMNEUW,
22079 IX86_BUILTIN_VPCOMLTUW,
22080 IX86_BUILTIN_VPCOMLEUW,
22081 IX86_BUILTIN_VPCOMGTUW,
22082 IX86_BUILTIN_VPCOMGEUW,
22083 IX86_BUILTIN_VPCOMFALSEUW,
22084 IX86_BUILTIN_VPCOMTRUEUW,
22085
22086 IX86_BUILTIN_VPCOMEQUD,
22087 IX86_BUILTIN_VPCOMNEUD,
22088 IX86_BUILTIN_VPCOMLTUD,
22089 IX86_BUILTIN_VPCOMLEUD,
22090 IX86_BUILTIN_VPCOMGTUD,
22091 IX86_BUILTIN_VPCOMGEUD,
22092 IX86_BUILTIN_VPCOMFALSEUD,
22093 IX86_BUILTIN_VPCOMTRUEUD,
22094
22095 IX86_BUILTIN_VPCOMEQUQ,
22096 IX86_BUILTIN_VPCOMNEUQ,
22097 IX86_BUILTIN_VPCOMLTUQ,
22098 IX86_BUILTIN_VPCOMLEUQ,
22099 IX86_BUILTIN_VPCOMGTUQ,
22100 IX86_BUILTIN_VPCOMGEUQ,
22101 IX86_BUILTIN_VPCOMFALSEUQ,
22102 IX86_BUILTIN_VPCOMTRUEUQ,
22103
22104 IX86_BUILTIN_VPCOMEQB,
22105 IX86_BUILTIN_VPCOMNEB,
22106 IX86_BUILTIN_VPCOMLTB,
22107 IX86_BUILTIN_VPCOMLEB,
22108 IX86_BUILTIN_VPCOMGTB,
22109 IX86_BUILTIN_VPCOMGEB,
22110 IX86_BUILTIN_VPCOMFALSEB,
22111 IX86_BUILTIN_VPCOMTRUEB,
22112
22113 IX86_BUILTIN_VPCOMEQW,
22114 IX86_BUILTIN_VPCOMNEW,
22115 IX86_BUILTIN_VPCOMLTW,
22116 IX86_BUILTIN_VPCOMLEW,
22117 IX86_BUILTIN_VPCOMGTW,
22118 IX86_BUILTIN_VPCOMGEW,
22119 IX86_BUILTIN_VPCOMFALSEW,
22120 IX86_BUILTIN_VPCOMTRUEW,
22121
22122 IX86_BUILTIN_VPCOMEQD,
22123 IX86_BUILTIN_VPCOMNED,
22124 IX86_BUILTIN_VPCOMLTD,
22125 IX86_BUILTIN_VPCOMLED,
22126 IX86_BUILTIN_VPCOMGTD,
22127 IX86_BUILTIN_VPCOMGED,
22128 IX86_BUILTIN_VPCOMFALSED,
22129 IX86_BUILTIN_VPCOMTRUED,
22130
22131 IX86_BUILTIN_VPCOMEQQ,
22132 IX86_BUILTIN_VPCOMNEQ,
22133 IX86_BUILTIN_VPCOMLTQ,
22134 IX86_BUILTIN_VPCOMLEQ,
22135 IX86_BUILTIN_VPCOMGTQ,
22136 IX86_BUILTIN_VPCOMGEQ,
22137 IX86_BUILTIN_VPCOMFALSEQ,
22138 IX86_BUILTIN_VPCOMTRUEQ,
22139
22140 /* LWP instructions. */
22141 IX86_BUILTIN_LLWPCB,
22142 IX86_BUILTIN_SLWPCB,
22143 IX86_BUILTIN_LWPVAL32,
22144 IX86_BUILTIN_LWPVAL64,
22145 IX86_BUILTIN_LWPINS32,
22146 IX86_BUILTIN_LWPINS64,
22147
22148 IX86_BUILTIN_CLZS,
22149
22150 /* FSGSBASE instructions. */
22151 IX86_BUILTIN_RDFSBASE32,
22152 IX86_BUILTIN_RDFSBASE64,
22153 IX86_BUILTIN_RDGSBASE32,
22154 IX86_BUILTIN_RDGSBASE64,
22155 IX86_BUILTIN_WRFSBASE32,
22156 IX86_BUILTIN_WRFSBASE64,
22157 IX86_BUILTIN_WRGSBASE32,
22158 IX86_BUILTIN_WRGSBASE64,
22159
22160 /* RDRND instructions. */
22161 IX86_BUILTIN_RDRAND16,
22162 IX86_BUILTIN_RDRAND32,
22163 IX86_BUILTIN_RDRAND64,
22164
22165 /* F16C instructions. */
22166 IX86_BUILTIN_CVTPH2PS,
22167 IX86_BUILTIN_CVTPH2PS256,
22168 IX86_BUILTIN_CVTPS2PH,
22169 IX86_BUILTIN_CVTPS2PH256,
22170
22171 IX86_BUILTIN_MAX
22172 };
22173
22174 /* Table for the ix86 builtin decls. */
22175 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22176
22177 /* Table of all of the builtin functions that are possible with different ISA's
22178 but are waiting to be built until a function is declared to use that
22179 ISA. */
22180 struct builtin_isa {
22181 const char *name; /* function name */
22182 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22183 int isa; /* isa_flags this builtin is defined for */
22184 bool const_p; /* true if the declaration is constant */
22185 bool set_and_not_built_p;
22186 };
22187
22188 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22189
22190
22191 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22192 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22193 function decl in the ix86_builtins array. Returns the function decl or
22194 NULL_TREE, if the builtin was not added.
22195
22196 If the front end has a special hook for builtin functions, delay adding
22197 builtin functions that aren't in the current ISA until the ISA is changed
22198 with function specific optimization. Doing so, can save about 300K for the
22199 default compiler. When the builtin is expanded, check at that time whether
22200 it is valid.
22201
22202 If the front end doesn't have a special hook, record all builtins, even if
22203 it isn't an instruction set in the current ISA in case the user uses
22204 function specific options for a different ISA, so that we don't get scope
22205 errors if a builtin is added in the middle of a function scope. */
22206
22207 static inline tree
22208 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22209 enum ix86_builtins code)
22210 {
22211 tree decl = NULL_TREE;
22212
22213 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22214 {
22215 ix86_builtins_isa[(int) code].isa = mask;
22216
22217 mask &= ~OPTION_MASK_ISA_64BIT;
22218 if (mask == 0
22219 || (mask & ix86_isa_flags) != 0
22220 || (lang_hooks.builtin_function
22221 == lang_hooks.builtin_function_ext_scope))
22222
22223 {
22224 tree type = ix86_get_builtin_func_type (tcode);
22225 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
22226 NULL, NULL_TREE);
22227 ix86_builtins[(int) code] = decl;
22228 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
22229 }
22230 else
22231 {
22232 ix86_builtins[(int) code] = NULL_TREE;
22233 ix86_builtins_isa[(int) code].tcode = tcode;
22234 ix86_builtins_isa[(int) code].name = name;
22235 ix86_builtins_isa[(int) code].const_p = false;
22236 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
22237 }
22238 }
22239
22240 return decl;
22241 }
22242
22243 /* Like def_builtin, but also marks the function decl "const". */
22244
22245 static inline tree
22246 def_builtin_const (int mask, const char *name,
22247 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
22248 {
22249 tree decl = def_builtin (mask, name, tcode, code);
22250 if (decl)
22251 TREE_READONLY (decl) = 1;
22252 else
22253 ix86_builtins_isa[(int) code].const_p = true;
22254
22255 return decl;
22256 }
22257
22258 /* Add any new builtin functions for a given ISA that may not have been
22259 declared. This saves a bit of space compared to adding all of the
22260 declarations to the tree, even if we didn't use them. */
22261
22262 static void
22263 ix86_add_new_builtins (int isa)
22264 {
22265 int i;
22266
22267 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
22268 {
22269 if ((ix86_builtins_isa[i].isa & isa) != 0
22270 && ix86_builtins_isa[i].set_and_not_built_p)
22271 {
22272 tree decl, type;
22273
22274 /* Don't define the builtin again. */
22275 ix86_builtins_isa[i].set_and_not_built_p = false;
22276
22277 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
22278 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
22279 type, i, BUILT_IN_MD, NULL,
22280 NULL_TREE);
22281
22282 ix86_builtins[i] = decl;
22283 if (ix86_builtins_isa[i].const_p)
22284 TREE_READONLY (decl) = 1;
22285 }
22286 }
22287 }
22288
22289 /* Bits for builtin_description.flag. */
22290
22291 /* Set when we don't support the comparison natively, and should
22292 swap_comparison in order to support it. */
22293 #define BUILTIN_DESC_SWAP_OPERANDS 1
22294
22295 struct builtin_description
22296 {
22297 const unsigned int mask;
22298 const enum insn_code icode;
22299 const char *const name;
22300 const enum ix86_builtins code;
22301 const enum rtx_code comparison;
22302 const int flag;
22303 };
22304
22305 static const struct builtin_description bdesc_comi[] =
22306 {
22307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
22308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
22309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
22310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
22311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
22312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
22313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
22314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
22315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
22316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
22317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
22318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
22319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
22320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
22321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
22322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
22323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
22324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
22325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
22326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
22327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
22328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
22329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
22330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
22331 };
22332
22333 static const struct builtin_description bdesc_pcmpestr[] =
22334 {
22335 /* SSE4.2 */
22336 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
22337 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
22338 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
22339 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
22340 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
22341 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
22342 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
22343 };
22344
22345 static const struct builtin_description bdesc_pcmpistr[] =
22346 {
22347 /* SSE4.2 */
22348 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
22349 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
22350 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
22351 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
22352 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
22353 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
22354 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
22355 };
22356
22357 /* Special builtins with variable number of arguments. */
22358 static const struct builtin_description bdesc_special_args[] =
22359 {
22360 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
22361 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
22362
22363 /* MMX */
22364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22365
22366 /* 3DNow! */
22367 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22368
22369 /* SSE */
22370 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22371 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22372 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22373
22374 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22375 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22376 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22378
22379 /* SSE or 3DNow!A */
22380 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22381 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
22382
22383 /* SSE2 */
22384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
22388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
22390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
22391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
22392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22393
22394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22396
22397 /* SSE3 */
22398 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22399
22400 /* SSE4.1 */
22401 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
22402
22403 /* SSE4A */
22404 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22405 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22406
22407 /* AVX */
22408 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
22409 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
22410
22411 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22412 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22413 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22414 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
22415 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
22416
22417 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22418 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22419 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22420 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22421 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22422 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
22423 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22424
22425 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
22426 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22427 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22428
22429 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
22430 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
22431 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
22432 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
22433 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
22434 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
22435 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
22436 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
22437
22438 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
22439 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
22440 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
22441 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
22442 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
22443 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
22444
22445 /* FSGSBASE */
22446 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22447 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22448 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22449 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22450 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22451 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22452 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22453 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22454
22455 /* RDRND */
22456 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
22457 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22458 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22459 };
22460
22461 /* Builtins with variable number of arguments. */
22462 static const struct builtin_description bdesc_args[] =
22463 {
22464 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
22465 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
22466 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
22467 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22468 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22469 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22470 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22471
22472 /* MMX */
22473 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22474 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22475 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22476 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22477 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22478 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22479
22480 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22481 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22482 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22483 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22484 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22485 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22486 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22487 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22488
22489 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22490 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22491
22492 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22493 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22494 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22495 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22496
22497 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22498 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22499 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22500 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22501 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22502 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22503
22504 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22505 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22506 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22507 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22508 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
22509 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
22510
22511 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22512 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
22513 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22514
22515 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
22516
22517 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22518 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22519 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22520 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22521 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22522 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22523
22524 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22525 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22526 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22527 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22528 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22529 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22530
22531 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22532 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22533 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22534 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22535
22536 /* 3DNow! */
22537 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22538 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22539 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22540 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22541
22542 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22543 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22544 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22545 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22546 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22547 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22548 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22549 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22550 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22551 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22552 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22553 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22554 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22555 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22556 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22557
22558 /* 3DNow!A */
22559 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22560 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22561 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22562 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22563 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22564 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22565
22566 /* SSE */
22567 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
22568 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22569 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22570 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22571 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22572 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22573 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22574 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22575 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22576 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22577 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22578 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22579
22580 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22581
22582 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22583 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22584 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22585 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22586 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22587 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22588 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22589 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22590
22591 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22592 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22593 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22594 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22595 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22596 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22597 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22598 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22599 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22600 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22601 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
22602 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22603 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22604 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22605 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22606 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22607 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22608 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22609 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22610 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22611 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22612 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22613
22614 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22615 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22617 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22618
22619 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22620 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22621 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22622 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22623
22624 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22625
22626 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22627 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22628 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22629 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22630 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22631
22632 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
22633 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
22634 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
22635
22636 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
22637
22638 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22639 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22640 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22641
22642 /* SSE MMX or 3Dnow!A */
22643 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22644 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22645 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22646
22647 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22648 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22649 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22650 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22651
22652 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
22653 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
22654
22655 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
22656
22657 /* SSE2 */
22658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22659
22660 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
22661 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
22662 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
22663 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
22664 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
22665 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22666 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
22667 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
22668 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
22669 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
22670 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
22671 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
22672
22673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
22674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
22675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
22676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
22677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22678 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22679
22680 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
22683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22685
22686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
22687
22688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22690 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22691 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22692
22693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22694 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
22695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22696
22697 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22698 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22699 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22700 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22701 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22705
22706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22708 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
22711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22713 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22714 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22715 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22716 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22717 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22719 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22720 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22721 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22722 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22725 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22726
22727 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22728 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22729 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22730 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22731
22732 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22734 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22735 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22736
22737 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22738
22739 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22740 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22741 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22742
22743 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
22744
22745 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22746 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22747 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22748 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22749 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22750 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22751 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22752 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22753
22754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22759 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22762
22763 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22764 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
22765
22766 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22767 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22768 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22769 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22770
22771 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22772 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22773
22774 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22777 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22779 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22780
22781 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22782 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22783 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22784 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22785
22786 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22787 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22788 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22789 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22790 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22791 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22792 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22793 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22794
22795 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22796 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22797 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22798
22799 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22800 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
22801
22802 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
22803 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22804
22805 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
22806
22807 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
22808 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
22809 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
22810 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
22811
22812 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
22813 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22814 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22815 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22816 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22817 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22818 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22819
22820 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
22821 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22822 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22823 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22824 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22825 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22826 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22827
22828 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22829 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22830 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22831 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22832
22833 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
22834 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
22835 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
22836
22837 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
22838
22839 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
22840 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
22841
22842 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22843
22844 /* SSE2 MMX */
22845 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22846 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22847
22848 /* SSE3 */
22849 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
22850 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22851
22852 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22853 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22854 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22855 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22856 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22857 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22858
22859 /* SSSE3 */
22860 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
22861 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
22862 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22863 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
22864 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
22865 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22866
22867 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22868 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22869 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22870 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22871 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22872 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22873 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22874 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22875 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22876 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22877 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22878 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22879 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
22880 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
22881 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22882 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22883 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22884 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22885 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22886 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22887 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22888 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22889 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22890 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22891
22892 /* SSSE3. */
22893 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
22894 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
22895
22896 /* SSE4.1 */
22897 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22898 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22899 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
22900 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
22901 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22902 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22903 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22904 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
22905 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22906 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
22907
22908 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22909 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22910 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22911 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22912 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22913 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22914 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22915 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22916 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22917 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22918 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22919 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22920 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22921
22922 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22923 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22924 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22925 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22926 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22927 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22928 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22929 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22930 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22931 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22932 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22933 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22934
22935 /* SSE4.1 */
22936 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22937 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22938 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22939 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22940
22941 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22942 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22943 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22944
22945 /* SSE4.2 */
22946 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22947 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
22948 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
22949 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
22950 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
22951
22952 /* SSE4A */
22953 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
22954 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
22955 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
22956 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22957
22958 /* AES */
22959 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
22960 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22961
22962 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22963 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22964 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22965 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22966
22967 /* PCLMUL */
22968 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
22969
22970 /* AVX */
22971 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22972 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22973 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22974 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22975 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22976 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22977 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22978 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22979 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22980 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22981 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22983 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22984 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22985 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22986 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22987 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22988 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22989 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22990 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22991 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22992 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22993 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22994 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22995 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22996 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22997
22998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
22999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23002
23003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23020 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23022 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23023 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23024 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23025 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23026 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23027 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23029 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23030 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23031 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23032 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23033 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23034 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23035 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23036 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23037
23038 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23039 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23040 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23041
23042 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23043 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23044 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23046 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23047
23048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23049
23050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23051 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23052
23053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23056 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23057
23058 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23061 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23062 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23063 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23064
23065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23080
23081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23083
23084 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23085
23086 /* F16C */
23087 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23088 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23089 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23090 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23091 };
23092
23093 /* FMA4 and XOP. */
23094 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23095 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23096 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23097 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23098 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23099 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23100 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23101 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23102 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23103 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23104 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23105 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23106 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23107 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23108 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23109 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23110 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23111 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23112 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23113 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23114 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23115 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23116 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23117 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23118 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23119 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23120 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23121 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23122 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23123 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23124 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23125 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23126 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23127 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23128 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23129 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23130 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23131 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23132 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23133 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23134 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23135 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23136 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23137 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23138 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23139 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23140 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23141 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23142 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23143 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23144 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23145 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23146
23147 static const struct builtin_description bdesc_multi_arg[] =
23148 {
23149 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23150 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23151 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23152 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23153 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23154 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23155 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23156 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23157
23158 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23159 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23160 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23161 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23162 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23163 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23164 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23165 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23166
23167 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23168 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23169 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23170 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23171
23172 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23173 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23174 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23175 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23176
23177 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23178 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23179 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23180 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23181
23182 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23183 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23184 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23185 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23186
23187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23194
23195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23202
23203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23204
23205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23217
23218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
23220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
23221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
23222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
23223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
23224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
23225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
23226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
23228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
23229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
23230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
23232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
23233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
23234
23235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
23236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
23237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
23238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
23239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
23240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
23241
23242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23257
23258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
23259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
23262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
23263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
23264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
23265
23266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
23267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
23270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
23271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
23272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
23273
23274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
23275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
23278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
23279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
23280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
23281
23282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
23286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
23287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
23288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
23289
23290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
23291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
23294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
23295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
23296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
23297
23298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
23299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
23302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
23303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
23304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
23305
23306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
23307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
23310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
23311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
23312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
23313
23314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
23318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
23319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
23320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
23321
23322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23330
23331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23339
23340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
23341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
23342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
23343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
23344
23345 };
23346
23347 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
23348 in the current target ISA to allow the user to compile particular modules
23349 with different target specific options that differ from the command line
23350 options. */
23351 static void
23352 ix86_init_mmx_sse_builtins (void)
23353 {
23354 const struct builtin_description * d;
23355 enum ix86_builtin_func_type ftype;
23356 size_t i;
23357
23358 /* Add all special builtins with variable number of operands. */
23359 for (i = 0, d = bdesc_special_args;
23360 i < ARRAY_SIZE (bdesc_special_args);
23361 i++, d++)
23362 {
23363 if (d->name == 0)
23364 continue;
23365
23366 ftype = (enum ix86_builtin_func_type) d->flag;
23367 def_builtin (d->mask, d->name, ftype, d->code);
23368 }
23369
23370 /* Add all builtins with variable number of operands. */
23371 for (i = 0, d = bdesc_args;
23372 i < ARRAY_SIZE (bdesc_args);
23373 i++, d++)
23374 {
23375 if (d->name == 0)
23376 continue;
23377
23378 ftype = (enum ix86_builtin_func_type) d->flag;
23379 def_builtin_const (d->mask, d->name, ftype, d->code);
23380 }
23381
23382 /* pcmpestr[im] insns. */
23383 for (i = 0, d = bdesc_pcmpestr;
23384 i < ARRAY_SIZE (bdesc_pcmpestr);
23385 i++, d++)
23386 {
23387 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23388 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
23389 else
23390 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
23391 def_builtin_const (d->mask, d->name, ftype, d->code);
23392 }
23393
23394 /* pcmpistr[im] insns. */
23395 for (i = 0, d = bdesc_pcmpistr;
23396 i < ARRAY_SIZE (bdesc_pcmpistr);
23397 i++, d++)
23398 {
23399 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23400 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
23401 else
23402 ftype = INT_FTYPE_V16QI_V16QI_INT;
23403 def_builtin_const (d->mask, d->name, ftype, d->code);
23404 }
23405
23406 /* comi/ucomi insns. */
23407 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23408 {
23409 if (d->mask == OPTION_MASK_ISA_SSE2)
23410 ftype = INT_FTYPE_V2DF_V2DF;
23411 else
23412 ftype = INT_FTYPE_V4SF_V4SF;
23413 def_builtin_const (d->mask, d->name, ftype, d->code);
23414 }
23415
23416 /* SSE */
23417 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
23418 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
23419 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
23420 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
23421
23422 /* SSE or 3DNow!A */
23423 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23424 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
23425 IX86_BUILTIN_MASKMOVQ);
23426
23427 /* SSE2 */
23428 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
23429 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
23430
23431 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
23432 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
23433 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
23434 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
23435
23436 /* SSE3. */
23437 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
23438 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
23439 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
23440 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
23441
23442 /* AES */
23443 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
23444 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
23445 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
23446 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
23447 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
23448 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
23449 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
23450 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
23451 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
23452 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
23453 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
23454 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
23455
23456 /* PCLMUL */
23457 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
23458 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
23459
23460 /* MMX access to the vec_init patterns. */
23461 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
23462 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
23463
23464 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
23465 V4HI_FTYPE_HI_HI_HI_HI,
23466 IX86_BUILTIN_VEC_INIT_V4HI);
23467
23468 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
23469 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
23470 IX86_BUILTIN_VEC_INIT_V8QI);
23471
23472 /* Access to the vec_extract patterns. */
23473 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
23474 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
23475 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
23476 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
23477 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
23478 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
23479 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
23480 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
23481 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
23482 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
23483
23484 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23485 "__builtin_ia32_vec_ext_v4hi",
23486 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
23487
23488 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
23489 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
23490
23491 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
23492 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
23493
23494 /* Access to the vec_set patterns. */
23495 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
23496 "__builtin_ia32_vec_set_v2di",
23497 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
23498
23499 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
23500 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
23501
23502 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
23503 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
23504
23505 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
23506 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
23507
23508 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23509 "__builtin_ia32_vec_set_v4hi",
23510 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
23511
23512 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
23513 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
23514
23515 /* Add FMA4 multi-arg argument instructions */
23516 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23517 {
23518 if (d->name == 0)
23519 continue;
23520
23521 ftype = (enum ix86_builtin_func_type) d->flag;
23522 def_builtin_const (d->mask, d->name, ftype, d->code);
23523 }
23524 }
23525
23526 /* Internal method for ix86_init_builtins. */
23527
23528 static void
23529 ix86_init_builtins_va_builtins_abi (void)
23530 {
23531 tree ms_va_ref, sysv_va_ref;
23532 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23533 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23534 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23535 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23536
23537 if (!TARGET_64BIT)
23538 return;
23539 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23540 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23541 ms_va_ref = build_reference_type (ms_va_list_type_node);
23542 sysv_va_ref =
23543 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23544
23545 fnvoid_va_end_ms =
23546 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23547 fnvoid_va_start_ms =
23548 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23549 fnvoid_va_end_sysv =
23550 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23551 fnvoid_va_start_sysv =
23552 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23553 NULL_TREE);
23554 fnvoid_va_copy_ms =
23555 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23556 NULL_TREE);
23557 fnvoid_va_copy_sysv =
23558 build_function_type_list (void_type_node, sysv_va_ref,
23559 sysv_va_ref, NULL_TREE);
23560
23561 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23562 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23563 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23564 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23565 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23566 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23567 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23568 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23569 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23570 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23571 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23572 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23573 }
23574
23575 static void
23576 ix86_init_builtin_types (void)
23577 {
23578 tree float128_type_node, float80_type_node;
23579
23580 /* The __float80 type. */
23581 float80_type_node = long_double_type_node;
23582 if (TYPE_MODE (float80_type_node) != XFmode)
23583 {
23584 /* The __float80 type. */
23585 float80_type_node = make_node (REAL_TYPE);
23586
23587 TYPE_PRECISION (float80_type_node) = 80;
23588 layout_type (float80_type_node);
23589 }
23590 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
23591
23592 /* The __float128 type. */
23593 float128_type_node = make_node (REAL_TYPE);
23594 TYPE_PRECISION (float128_type_node) = 128;
23595 layout_type (float128_type_node);
23596 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
23597
23598 /* This macro is built by i386-builtin-types.awk. */
23599 DEFINE_BUILTIN_PRIMITIVE_TYPES;
23600 }
23601
23602 static void
23603 ix86_init_builtins (void)
23604 {
23605 tree t;
23606
23607 ix86_init_builtin_types ();
23608
23609 /* TFmode support builtins. */
23610 def_builtin_const (0, "__builtin_infq",
23611 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
23612 def_builtin_const (0, "__builtin_huge_valq",
23613 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
23614
23615 /* We will expand them to normal call if SSE2 isn't available since
23616 they are used by libgcc. */
23617 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
23618 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
23619 BUILT_IN_MD, "__fabstf2", NULL_TREE);
23620 TREE_READONLY (t) = 1;
23621 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
23622
23623 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
23624 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
23625 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
23626 TREE_READONLY (t) = 1;
23627 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
23628
23629 ix86_init_mmx_sse_builtins ();
23630
23631 if (TARGET_64BIT)
23632 ix86_init_builtins_va_builtins_abi ();
23633 }
23634
23635 /* Return the ix86 builtin for CODE. */
23636
23637 static tree
23638 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
23639 {
23640 if (code >= IX86_BUILTIN_MAX)
23641 return error_mark_node;
23642
23643 return ix86_builtins[code];
23644 }
23645
23646 /* Errors in the source file can cause expand_expr to return const0_rtx
23647 where we expect a vector. To avoid crashing, use one of the vector
23648 clear instructions. */
23649 static rtx
23650 safe_vector_operand (rtx x, enum machine_mode mode)
23651 {
23652 if (x == const0_rtx)
23653 x = CONST0_RTX (mode);
23654 return x;
23655 }
23656
23657 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23658
23659 static rtx
23660 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23661 {
23662 rtx pat;
23663 tree arg0 = CALL_EXPR_ARG (exp, 0);
23664 tree arg1 = CALL_EXPR_ARG (exp, 1);
23665 rtx op0 = expand_normal (arg0);
23666 rtx op1 = expand_normal (arg1);
23667 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23668 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23669 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23670
23671 if (VECTOR_MODE_P (mode0))
23672 op0 = safe_vector_operand (op0, mode0);
23673 if (VECTOR_MODE_P (mode1))
23674 op1 = safe_vector_operand (op1, mode1);
23675
23676 if (optimize || !target
23677 || GET_MODE (target) != tmode
23678 || !insn_data[icode].operand[0].predicate (target, tmode))
23679 target = gen_reg_rtx (tmode);
23680
23681 if (GET_MODE (op1) == SImode && mode1 == TImode)
23682 {
23683 rtx x = gen_reg_rtx (V4SImode);
23684 emit_insn (gen_sse2_loadd (x, op1));
23685 op1 = gen_lowpart (TImode, x);
23686 }
23687
23688 if (!insn_data[icode].operand[1].predicate (op0, mode0))
23689 op0 = copy_to_mode_reg (mode0, op0);
23690 if (!insn_data[icode].operand[2].predicate (op1, mode1))
23691 op1 = copy_to_mode_reg (mode1, op1);
23692
23693 pat = GEN_FCN (icode) (target, op0, op1);
23694 if (! pat)
23695 return 0;
23696
23697 emit_insn (pat);
23698
23699 return target;
23700 }
23701
23702 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23703
23704 static rtx
23705 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23706 enum ix86_builtin_func_type m_type,
23707 enum rtx_code sub_code)
23708 {
23709 rtx pat;
23710 int i;
23711 int nargs;
23712 bool comparison_p = false;
23713 bool tf_p = false;
23714 bool last_arg_constant = false;
23715 int num_memory = 0;
23716 struct {
23717 rtx op;
23718 enum machine_mode mode;
23719 } args[4];
23720
23721 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23722
23723 switch (m_type)
23724 {
23725 case MULTI_ARG_4_DF2_DI_I:
23726 case MULTI_ARG_4_DF2_DI_I1:
23727 case MULTI_ARG_4_SF2_SI_I:
23728 case MULTI_ARG_4_SF2_SI_I1:
23729 nargs = 4;
23730 last_arg_constant = true;
23731 break;
23732
23733 case MULTI_ARG_3_SF:
23734 case MULTI_ARG_3_DF:
23735 case MULTI_ARG_3_SF2:
23736 case MULTI_ARG_3_DF2:
23737 case MULTI_ARG_3_DI:
23738 case MULTI_ARG_3_SI:
23739 case MULTI_ARG_3_SI_DI:
23740 case MULTI_ARG_3_HI:
23741 case MULTI_ARG_3_HI_SI:
23742 case MULTI_ARG_3_QI:
23743 case MULTI_ARG_3_DI2:
23744 case MULTI_ARG_3_SI2:
23745 case MULTI_ARG_3_HI2:
23746 case MULTI_ARG_3_QI2:
23747 nargs = 3;
23748 break;
23749
23750 case MULTI_ARG_2_SF:
23751 case MULTI_ARG_2_DF:
23752 case MULTI_ARG_2_DI:
23753 case MULTI_ARG_2_SI:
23754 case MULTI_ARG_2_HI:
23755 case MULTI_ARG_2_QI:
23756 nargs = 2;
23757 break;
23758
23759 case MULTI_ARG_2_DI_IMM:
23760 case MULTI_ARG_2_SI_IMM:
23761 case MULTI_ARG_2_HI_IMM:
23762 case MULTI_ARG_2_QI_IMM:
23763 nargs = 2;
23764 last_arg_constant = true;
23765 break;
23766
23767 case MULTI_ARG_1_SF:
23768 case MULTI_ARG_1_DF:
23769 case MULTI_ARG_1_SF2:
23770 case MULTI_ARG_1_DF2:
23771 case MULTI_ARG_1_DI:
23772 case MULTI_ARG_1_SI:
23773 case MULTI_ARG_1_HI:
23774 case MULTI_ARG_1_QI:
23775 case MULTI_ARG_1_SI_DI:
23776 case MULTI_ARG_1_HI_DI:
23777 case MULTI_ARG_1_HI_SI:
23778 case MULTI_ARG_1_QI_DI:
23779 case MULTI_ARG_1_QI_SI:
23780 case MULTI_ARG_1_QI_HI:
23781 nargs = 1;
23782 break;
23783
23784 case MULTI_ARG_2_DI_CMP:
23785 case MULTI_ARG_2_SI_CMP:
23786 case MULTI_ARG_2_HI_CMP:
23787 case MULTI_ARG_2_QI_CMP:
23788 nargs = 2;
23789 comparison_p = true;
23790 break;
23791
23792 case MULTI_ARG_2_SF_TF:
23793 case MULTI_ARG_2_DF_TF:
23794 case MULTI_ARG_2_DI_TF:
23795 case MULTI_ARG_2_SI_TF:
23796 case MULTI_ARG_2_HI_TF:
23797 case MULTI_ARG_2_QI_TF:
23798 nargs = 2;
23799 tf_p = true;
23800 break;
23801
23802 default:
23803 gcc_unreachable ();
23804 }
23805
23806 if (optimize || !target
23807 || GET_MODE (target) != tmode
23808 || !insn_data[icode].operand[0].predicate (target, tmode))
23809 target = gen_reg_rtx (tmode);
23810
23811 gcc_assert (nargs <= 4);
23812
23813 for (i = 0; i < nargs; i++)
23814 {
23815 tree arg = CALL_EXPR_ARG (exp, i);
23816 rtx op = expand_normal (arg);
23817 int adjust = (comparison_p) ? 1 : 0;
23818 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23819
23820 if (last_arg_constant && i == nargs-1)
23821 {
23822 if (!CONST_INT_P (op))
23823 {
23824 error ("last argument must be an immediate");
23825 return gen_reg_rtx (tmode);
23826 }
23827 }
23828 else
23829 {
23830 if (VECTOR_MODE_P (mode))
23831 op = safe_vector_operand (op, mode);
23832
23833 /* If we aren't optimizing, only allow one memory operand to be
23834 generated. */
23835 if (memory_operand (op, mode))
23836 num_memory++;
23837
23838 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23839
23840 if (optimize
23841 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
23842 || num_memory > 1)
23843 op = force_reg (mode, op);
23844 }
23845
23846 args[i].op = op;
23847 args[i].mode = mode;
23848 }
23849
23850 switch (nargs)
23851 {
23852 case 1:
23853 pat = GEN_FCN (icode) (target, args[0].op);
23854 break;
23855
23856 case 2:
23857 if (tf_p)
23858 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23859 GEN_INT ((int)sub_code));
23860 else if (! comparison_p)
23861 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23862 else
23863 {
23864 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23865 args[0].op,
23866 args[1].op);
23867
23868 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23869 }
23870 break;
23871
23872 case 3:
23873 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23874 break;
23875
23876 case 4:
23877 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
23878 break;
23879
23880 default:
23881 gcc_unreachable ();
23882 }
23883
23884 if (! pat)
23885 return 0;
23886
23887 emit_insn (pat);
23888 return target;
23889 }
23890
23891 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23892 insns with vec_merge. */
23893
23894 static rtx
23895 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23896 rtx target)
23897 {
23898 rtx pat;
23899 tree arg0 = CALL_EXPR_ARG (exp, 0);
23900 rtx op1, op0 = expand_normal (arg0);
23901 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23902 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23903
23904 if (optimize || !target
23905 || GET_MODE (target) != tmode
23906 || !insn_data[icode].operand[0].predicate (target, tmode))
23907 target = gen_reg_rtx (tmode);
23908
23909 if (VECTOR_MODE_P (mode0))
23910 op0 = safe_vector_operand (op0, mode0);
23911
23912 if ((optimize && !register_operand (op0, mode0))
23913 || !insn_data[icode].operand[1].predicate (op0, mode0))
23914 op0 = copy_to_mode_reg (mode0, op0);
23915
23916 op1 = op0;
23917 if (!insn_data[icode].operand[2].predicate (op1, mode0))
23918 op1 = copy_to_mode_reg (mode0, op1);
23919
23920 pat = GEN_FCN (icode) (target, op0, op1);
23921 if (! pat)
23922 return 0;
23923 emit_insn (pat);
23924 return target;
23925 }
23926
23927 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23928
23929 static rtx
23930 ix86_expand_sse_compare (const struct builtin_description *d,
23931 tree exp, rtx target, bool swap)
23932 {
23933 rtx pat;
23934 tree arg0 = CALL_EXPR_ARG (exp, 0);
23935 tree arg1 = CALL_EXPR_ARG (exp, 1);
23936 rtx op0 = expand_normal (arg0);
23937 rtx op1 = expand_normal (arg1);
23938 rtx op2;
23939 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23940 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23941 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23942 enum rtx_code comparison = d->comparison;
23943
23944 if (VECTOR_MODE_P (mode0))
23945 op0 = safe_vector_operand (op0, mode0);
23946 if (VECTOR_MODE_P (mode1))
23947 op1 = safe_vector_operand (op1, mode1);
23948
23949 /* Swap operands if we have a comparison that isn't available in
23950 hardware. */
23951 if (swap)
23952 {
23953 rtx tmp = gen_reg_rtx (mode1);
23954 emit_move_insn (tmp, op1);
23955 op1 = op0;
23956 op0 = tmp;
23957 }
23958
23959 if (optimize || !target
23960 || GET_MODE (target) != tmode
23961 || !insn_data[d->icode].operand[0].predicate (target, tmode))
23962 target = gen_reg_rtx (tmode);
23963
23964 if ((optimize && !register_operand (op0, mode0))
23965 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
23966 op0 = copy_to_mode_reg (mode0, op0);
23967 if ((optimize && !register_operand (op1, mode1))
23968 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
23969 op1 = copy_to_mode_reg (mode1, op1);
23970
23971 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23972 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23973 if (! pat)
23974 return 0;
23975 emit_insn (pat);
23976 return target;
23977 }
23978
23979 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
23980
23981 static rtx
23982 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23983 rtx target)
23984 {
23985 rtx pat;
23986 tree arg0 = CALL_EXPR_ARG (exp, 0);
23987 tree arg1 = CALL_EXPR_ARG (exp, 1);
23988 rtx op0 = expand_normal (arg0);
23989 rtx op1 = expand_normal (arg1);
23990 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23991 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23992 enum rtx_code comparison = d->comparison;
23993
23994 if (VECTOR_MODE_P (mode0))
23995 op0 = safe_vector_operand (op0, mode0);
23996 if (VECTOR_MODE_P (mode1))
23997 op1 = safe_vector_operand (op1, mode1);
23998
23999 /* Swap operands if we have a comparison that isn't available in
24000 hardware. */
24001 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24002 {
24003 rtx tmp = op1;
24004 op1 = op0;
24005 op0 = tmp;
24006 }
24007
24008 target = gen_reg_rtx (SImode);
24009 emit_move_insn (target, const0_rtx);
24010 target = gen_rtx_SUBREG (QImode, target, 0);
24011
24012 if ((optimize && !register_operand (op0, mode0))
24013 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24014 op0 = copy_to_mode_reg (mode0, op0);
24015 if ((optimize && !register_operand (op1, mode1))
24016 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24017 op1 = copy_to_mode_reg (mode1, op1);
24018
24019 pat = GEN_FCN (d->icode) (op0, op1);
24020 if (! pat)
24021 return 0;
24022 emit_insn (pat);
24023 emit_insn (gen_rtx_SET (VOIDmode,
24024 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24025 gen_rtx_fmt_ee (comparison, QImode,
24026 SET_DEST (pat),
24027 const0_rtx)));
24028
24029 return SUBREG_REG (target);
24030 }
24031
24032 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24033
24034 static rtx
24035 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24036 rtx target)
24037 {
24038 rtx pat;
24039 tree arg0 = CALL_EXPR_ARG (exp, 0);
24040 tree arg1 = CALL_EXPR_ARG (exp, 1);
24041 rtx op0 = expand_normal (arg0);
24042 rtx op1 = expand_normal (arg1);
24043 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24044 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24045 enum rtx_code comparison = d->comparison;
24046
24047 if (VECTOR_MODE_P (mode0))
24048 op0 = safe_vector_operand (op0, mode0);
24049 if (VECTOR_MODE_P (mode1))
24050 op1 = safe_vector_operand (op1, mode1);
24051
24052 target = gen_reg_rtx (SImode);
24053 emit_move_insn (target, const0_rtx);
24054 target = gen_rtx_SUBREG (QImode, target, 0);
24055
24056 if ((optimize && !register_operand (op0, mode0))
24057 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24058 op0 = copy_to_mode_reg (mode0, op0);
24059 if ((optimize && !register_operand (op1, mode1))
24060 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24061 op1 = copy_to_mode_reg (mode1, op1);
24062
24063 pat = GEN_FCN (d->icode) (op0, op1);
24064 if (! pat)
24065 return 0;
24066 emit_insn (pat);
24067 emit_insn (gen_rtx_SET (VOIDmode,
24068 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24069 gen_rtx_fmt_ee (comparison, QImode,
24070 SET_DEST (pat),
24071 const0_rtx)));
24072
24073 return SUBREG_REG (target);
24074 }
24075
24076 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24077
24078 static rtx
24079 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24080 tree exp, rtx target)
24081 {
24082 rtx pat;
24083 tree arg0 = CALL_EXPR_ARG (exp, 0);
24084 tree arg1 = CALL_EXPR_ARG (exp, 1);
24085 tree arg2 = CALL_EXPR_ARG (exp, 2);
24086 tree arg3 = CALL_EXPR_ARG (exp, 3);
24087 tree arg4 = CALL_EXPR_ARG (exp, 4);
24088 rtx scratch0, scratch1;
24089 rtx op0 = expand_normal (arg0);
24090 rtx op1 = expand_normal (arg1);
24091 rtx op2 = expand_normal (arg2);
24092 rtx op3 = expand_normal (arg3);
24093 rtx op4 = expand_normal (arg4);
24094 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24095
24096 tmode0 = insn_data[d->icode].operand[0].mode;
24097 tmode1 = insn_data[d->icode].operand[1].mode;
24098 modev2 = insn_data[d->icode].operand[2].mode;
24099 modei3 = insn_data[d->icode].operand[3].mode;
24100 modev4 = insn_data[d->icode].operand[4].mode;
24101 modei5 = insn_data[d->icode].operand[5].mode;
24102 modeimm = insn_data[d->icode].operand[6].mode;
24103
24104 if (VECTOR_MODE_P (modev2))
24105 op0 = safe_vector_operand (op0, modev2);
24106 if (VECTOR_MODE_P (modev4))
24107 op2 = safe_vector_operand (op2, modev4);
24108
24109 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24110 op0 = copy_to_mode_reg (modev2, op0);
24111 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24112 op1 = copy_to_mode_reg (modei3, op1);
24113 if ((optimize && !register_operand (op2, modev4))
24114 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24115 op2 = copy_to_mode_reg (modev4, op2);
24116 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24117 op3 = copy_to_mode_reg (modei5, op3);
24118
24119 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24120 {
24121 error ("the fifth argument must be a 8-bit immediate");
24122 return const0_rtx;
24123 }
24124
24125 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24126 {
24127 if (optimize || !target
24128 || GET_MODE (target) != tmode0
24129 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24130 target = gen_reg_rtx (tmode0);
24131
24132 scratch1 = gen_reg_rtx (tmode1);
24133
24134 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24135 }
24136 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24137 {
24138 if (optimize || !target
24139 || GET_MODE (target) != tmode1
24140 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24141 target = gen_reg_rtx (tmode1);
24142
24143 scratch0 = gen_reg_rtx (tmode0);
24144
24145 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24146 }
24147 else
24148 {
24149 gcc_assert (d->flag);
24150
24151 scratch0 = gen_reg_rtx (tmode0);
24152 scratch1 = gen_reg_rtx (tmode1);
24153
24154 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24155 }
24156
24157 if (! pat)
24158 return 0;
24159
24160 emit_insn (pat);
24161
24162 if (d->flag)
24163 {
24164 target = gen_reg_rtx (SImode);
24165 emit_move_insn (target, const0_rtx);
24166 target = gen_rtx_SUBREG (QImode, target, 0);
24167
24168 emit_insn
24169 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24170 gen_rtx_fmt_ee (EQ, QImode,
24171 gen_rtx_REG ((enum machine_mode) d->flag,
24172 FLAGS_REG),
24173 const0_rtx)));
24174 return SUBREG_REG (target);
24175 }
24176 else
24177 return target;
24178 }
24179
24180
24181 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24182
24183 static rtx
24184 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24185 tree exp, rtx target)
24186 {
24187 rtx pat;
24188 tree arg0 = CALL_EXPR_ARG (exp, 0);
24189 tree arg1 = CALL_EXPR_ARG (exp, 1);
24190 tree arg2 = CALL_EXPR_ARG (exp, 2);
24191 rtx scratch0, scratch1;
24192 rtx op0 = expand_normal (arg0);
24193 rtx op1 = expand_normal (arg1);
24194 rtx op2 = expand_normal (arg2);
24195 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24196
24197 tmode0 = insn_data[d->icode].operand[0].mode;
24198 tmode1 = insn_data[d->icode].operand[1].mode;
24199 modev2 = insn_data[d->icode].operand[2].mode;
24200 modev3 = insn_data[d->icode].operand[3].mode;
24201 modeimm = insn_data[d->icode].operand[4].mode;
24202
24203 if (VECTOR_MODE_P (modev2))
24204 op0 = safe_vector_operand (op0, modev2);
24205 if (VECTOR_MODE_P (modev3))
24206 op1 = safe_vector_operand (op1, modev3);
24207
24208 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24209 op0 = copy_to_mode_reg (modev2, op0);
24210 if ((optimize && !register_operand (op1, modev3))
24211 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24212 op1 = copy_to_mode_reg (modev3, op1);
24213
24214 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24215 {
24216 error ("the third argument must be a 8-bit immediate");
24217 return const0_rtx;
24218 }
24219
24220 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24221 {
24222 if (optimize || !target
24223 || GET_MODE (target) != tmode0
24224 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24225 target = gen_reg_rtx (tmode0);
24226
24227 scratch1 = gen_reg_rtx (tmode1);
24228
24229 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24230 }
24231 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24232 {
24233 if (optimize || !target
24234 || GET_MODE (target) != tmode1
24235 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24236 target = gen_reg_rtx (tmode1);
24237
24238 scratch0 = gen_reg_rtx (tmode0);
24239
24240 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24241 }
24242 else
24243 {
24244 gcc_assert (d->flag);
24245
24246 scratch0 = gen_reg_rtx (tmode0);
24247 scratch1 = gen_reg_rtx (tmode1);
24248
24249 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24250 }
24251
24252 if (! pat)
24253 return 0;
24254
24255 emit_insn (pat);
24256
24257 if (d->flag)
24258 {
24259 target = gen_reg_rtx (SImode);
24260 emit_move_insn (target, const0_rtx);
24261 target = gen_rtx_SUBREG (QImode, target, 0);
24262
24263 emit_insn
24264 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24265 gen_rtx_fmt_ee (EQ, QImode,
24266 gen_rtx_REG ((enum machine_mode) d->flag,
24267 FLAGS_REG),
24268 const0_rtx)));
24269 return SUBREG_REG (target);
24270 }
24271 else
24272 return target;
24273 }
24274
24275 /* Subroutine of ix86_expand_builtin to take care of insns with
24276 variable number of operands. */
24277
24278 static rtx
24279 ix86_expand_args_builtin (const struct builtin_description *d,
24280 tree exp, rtx target)
24281 {
24282 rtx pat, real_target;
24283 unsigned int i, nargs;
24284 unsigned int nargs_constant = 0;
24285 int num_memory = 0;
24286 struct
24287 {
24288 rtx op;
24289 enum machine_mode mode;
24290 } args[4];
24291 bool last_arg_count = false;
24292 enum insn_code icode = d->icode;
24293 const struct insn_data_d *insn_p = &insn_data[icode];
24294 enum machine_mode tmode = insn_p->operand[0].mode;
24295 enum machine_mode rmode = VOIDmode;
24296 bool swap = false;
24297 enum rtx_code comparison = d->comparison;
24298
24299 switch ((enum ix86_builtin_func_type) d->flag)
24300 {
24301 case INT_FTYPE_V8SF_V8SF_PTEST:
24302 case INT_FTYPE_V4DI_V4DI_PTEST:
24303 case INT_FTYPE_V4DF_V4DF_PTEST:
24304 case INT_FTYPE_V4SF_V4SF_PTEST:
24305 case INT_FTYPE_V2DI_V2DI_PTEST:
24306 case INT_FTYPE_V2DF_V2DF_PTEST:
24307 return ix86_expand_sse_ptest (d, exp, target);
24308 case FLOAT128_FTYPE_FLOAT128:
24309 case FLOAT_FTYPE_FLOAT:
24310 case INT_FTYPE_INT:
24311 case UINT64_FTYPE_INT:
24312 case UINT16_FTYPE_UINT16:
24313 case INT64_FTYPE_INT64:
24314 case INT64_FTYPE_V4SF:
24315 case INT64_FTYPE_V2DF:
24316 case INT_FTYPE_V16QI:
24317 case INT_FTYPE_V8QI:
24318 case INT_FTYPE_V8SF:
24319 case INT_FTYPE_V4DF:
24320 case INT_FTYPE_V4SF:
24321 case INT_FTYPE_V2DF:
24322 case V16QI_FTYPE_V16QI:
24323 case V8SI_FTYPE_V8SF:
24324 case V8SI_FTYPE_V4SI:
24325 case V8HI_FTYPE_V8HI:
24326 case V8HI_FTYPE_V16QI:
24327 case V8QI_FTYPE_V8QI:
24328 case V8SF_FTYPE_V8SF:
24329 case V8SF_FTYPE_V8SI:
24330 case V8SF_FTYPE_V4SF:
24331 case V8SF_FTYPE_V8HI:
24332 case V4SI_FTYPE_V4SI:
24333 case V4SI_FTYPE_V16QI:
24334 case V4SI_FTYPE_V4SF:
24335 case V4SI_FTYPE_V8SI:
24336 case V4SI_FTYPE_V8HI:
24337 case V4SI_FTYPE_V4DF:
24338 case V4SI_FTYPE_V2DF:
24339 case V4HI_FTYPE_V4HI:
24340 case V4DF_FTYPE_V4DF:
24341 case V4DF_FTYPE_V4SI:
24342 case V4DF_FTYPE_V4SF:
24343 case V4DF_FTYPE_V2DF:
24344 case V4SF_FTYPE_V4SF:
24345 case V4SF_FTYPE_V4SI:
24346 case V4SF_FTYPE_V8SF:
24347 case V4SF_FTYPE_V4DF:
24348 case V4SF_FTYPE_V8HI:
24349 case V4SF_FTYPE_V2DF:
24350 case V2DI_FTYPE_V2DI:
24351 case V2DI_FTYPE_V16QI:
24352 case V2DI_FTYPE_V8HI:
24353 case V2DI_FTYPE_V4SI:
24354 case V2DF_FTYPE_V2DF:
24355 case V2DF_FTYPE_V4SI:
24356 case V2DF_FTYPE_V4DF:
24357 case V2DF_FTYPE_V4SF:
24358 case V2DF_FTYPE_V2SI:
24359 case V2SI_FTYPE_V2SI:
24360 case V2SI_FTYPE_V4SF:
24361 case V2SI_FTYPE_V2SF:
24362 case V2SI_FTYPE_V2DF:
24363 case V2SF_FTYPE_V2SF:
24364 case V2SF_FTYPE_V2SI:
24365 nargs = 1;
24366 break;
24367 case V4SF_FTYPE_V4SF_VEC_MERGE:
24368 case V2DF_FTYPE_V2DF_VEC_MERGE:
24369 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24370 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24371 case V16QI_FTYPE_V16QI_V16QI:
24372 case V16QI_FTYPE_V8HI_V8HI:
24373 case V8QI_FTYPE_V8QI_V8QI:
24374 case V8QI_FTYPE_V4HI_V4HI:
24375 case V8HI_FTYPE_V8HI_V8HI:
24376 case V8HI_FTYPE_V16QI_V16QI:
24377 case V8HI_FTYPE_V4SI_V4SI:
24378 case V8SF_FTYPE_V8SF_V8SF:
24379 case V8SF_FTYPE_V8SF_V8SI:
24380 case V4SI_FTYPE_V4SI_V4SI:
24381 case V4SI_FTYPE_V8HI_V8HI:
24382 case V4SI_FTYPE_V4SF_V4SF:
24383 case V4SI_FTYPE_V2DF_V2DF:
24384 case V4HI_FTYPE_V4HI_V4HI:
24385 case V4HI_FTYPE_V8QI_V8QI:
24386 case V4HI_FTYPE_V2SI_V2SI:
24387 case V4DF_FTYPE_V4DF_V4DF:
24388 case V4DF_FTYPE_V4DF_V4DI:
24389 case V4SF_FTYPE_V4SF_V4SF:
24390 case V4SF_FTYPE_V4SF_V4SI:
24391 case V4SF_FTYPE_V4SF_V2SI:
24392 case V4SF_FTYPE_V4SF_V2DF:
24393 case V4SF_FTYPE_V4SF_DI:
24394 case V4SF_FTYPE_V4SF_SI:
24395 case V2DI_FTYPE_V2DI_V2DI:
24396 case V2DI_FTYPE_V16QI_V16QI:
24397 case V2DI_FTYPE_V4SI_V4SI:
24398 case V2DI_FTYPE_V2DI_V16QI:
24399 case V2DI_FTYPE_V2DF_V2DF:
24400 case V2SI_FTYPE_V2SI_V2SI:
24401 case V2SI_FTYPE_V4HI_V4HI:
24402 case V2SI_FTYPE_V2SF_V2SF:
24403 case V2DF_FTYPE_V2DF_V2DF:
24404 case V2DF_FTYPE_V2DF_V4SF:
24405 case V2DF_FTYPE_V2DF_V2DI:
24406 case V2DF_FTYPE_V2DF_DI:
24407 case V2DF_FTYPE_V2DF_SI:
24408 case V2SF_FTYPE_V2SF_V2SF:
24409 case V1DI_FTYPE_V1DI_V1DI:
24410 case V1DI_FTYPE_V8QI_V8QI:
24411 case V1DI_FTYPE_V2SI_V2SI:
24412 if (comparison == UNKNOWN)
24413 return ix86_expand_binop_builtin (icode, exp, target);
24414 nargs = 2;
24415 break;
24416 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24417 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24418 gcc_assert (comparison != UNKNOWN);
24419 nargs = 2;
24420 swap = true;
24421 break;
24422 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24423 case V8HI_FTYPE_V8HI_SI_COUNT:
24424 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24425 case V4SI_FTYPE_V4SI_SI_COUNT:
24426 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24427 case V4HI_FTYPE_V4HI_SI_COUNT:
24428 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24429 case V2DI_FTYPE_V2DI_SI_COUNT:
24430 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24431 case V2SI_FTYPE_V2SI_SI_COUNT:
24432 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24433 case V1DI_FTYPE_V1DI_SI_COUNT:
24434 nargs = 2;
24435 last_arg_count = true;
24436 break;
24437 case UINT64_FTYPE_UINT64_UINT64:
24438 case UINT_FTYPE_UINT_UINT:
24439 case UINT_FTYPE_UINT_USHORT:
24440 case UINT_FTYPE_UINT_UCHAR:
24441 case UINT16_FTYPE_UINT16_INT:
24442 case UINT8_FTYPE_UINT8_INT:
24443 nargs = 2;
24444 break;
24445 case V2DI_FTYPE_V2DI_INT_CONVERT:
24446 nargs = 2;
24447 rmode = V1TImode;
24448 nargs_constant = 1;
24449 break;
24450 case V8HI_FTYPE_V8HI_INT:
24451 case V8HI_FTYPE_V8SF_INT:
24452 case V8HI_FTYPE_V4SF_INT:
24453 case V8SF_FTYPE_V8SF_INT:
24454 case V4SI_FTYPE_V4SI_INT:
24455 case V4SI_FTYPE_V8SI_INT:
24456 case V4HI_FTYPE_V4HI_INT:
24457 case V4DF_FTYPE_V4DF_INT:
24458 case V4SF_FTYPE_V4SF_INT:
24459 case V4SF_FTYPE_V8SF_INT:
24460 case V2DI_FTYPE_V2DI_INT:
24461 case V2DF_FTYPE_V2DF_INT:
24462 case V2DF_FTYPE_V4DF_INT:
24463 nargs = 2;
24464 nargs_constant = 1;
24465 break;
24466 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24467 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24468 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24469 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24470 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24471 nargs = 3;
24472 break;
24473 case V16QI_FTYPE_V16QI_V16QI_INT:
24474 case V8HI_FTYPE_V8HI_V8HI_INT:
24475 case V8SI_FTYPE_V8SI_V8SI_INT:
24476 case V8SI_FTYPE_V8SI_V4SI_INT:
24477 case V8SF_FTYPE_V8SF_V8SF_INT:
24478 case V8SF_FTYPE_V8SF_V4SF_INT:
24479 case V4SI_FTYPE_V4SI_V4SI_INT:
24480 case V4DF_FTYPE_V4DF_V4DF_INT:
24481 case V4DF_FTYPE_V4DF_V2DF_INT:
24482 case V4SF_FTYPE_V4SF_V4SF_INT:
24483 case V2DI_FTYPE_V2DI_V2DI_INT:
24484 case V2DF_FTYPE_V2DF_V2DF_INT:
24485 nargs = 3;
24486 nargs_constant = 1;
24487 break;
24488 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
24489 nargs = 3;
24490 rmode = V2DImode;
24491 nargs_constant = 1;
24492 break;
24493 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
24494 nargs = 3;
24495 rmode = DImode;
24496 nargs_constant = 1;
24497 break;
24498 case V2DI_FTYPE_V2DI_UINT_UINT:
24499 nargs = 3;
24500 nargs_constant = 2;
24501 break;
24502 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
24503 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
24504 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
24505 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
24506 nargs = 4;
24507 nargs_constant = 1;
24508 break;
24509 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24510 nargs = 4;
24511 nargs_constant = 2;
24512 break;
24513 default:
24514 gcc_unreachable ();
24515 }
24516
24517 gcc_assert (nargs <= ARRAY_SIZE (args));
24518
24519 if (comparison != UNKNOWN)
24520 {
24521 gcc_assert (nargs == 2);
24522 return ix86_expand_sse_compare (d, exp, target, swap);
24523 }
24524
24525 if (rmode == VOIDmode || rmode == tmode)
24526 {
24527 if (optimize
24528 || target == 0
24529 || GET_MODE (target) != tmode
24530 || !insn_p->operand[0].predicate (target, tmode))
24531 target = gen_reg_rtx (tmode);
24532 real_target = target;
24533 }
24534 else
24535 {
24536 target = gen_reg_rtx (rmode);
24537 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24538 }
24539
24540 for (i = 0; i < nargs; i++)
24541 {
24542 tree arg = CALL_EXPR_ARG (exp, i);
24543 rtx op = expand_normal (arg);
24544 enum machine_mode mode = insn_p->operand[i + 1].mode;
24545 bool match = insn_p->operand[i + 1].predicate (op, mode);
24546
24547 if (last_arg_count && (i + 1) == nargs)
24548 {
24549 /* SIMD shift insns take either an 8-bit immediate or
24550 register as count. But builtin functions take int as
24551 count. If count doesn't match, we put it in register. */
24552 if (!match)
24553 {
24554 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24555 if (!insn_p->operand[i + 1].predicate (op, mode))
24556 op = copy_to_reg (op);
24557 }
24558 }
24559 else if ((nargs - i) <= nargs_constant)
24560 {
24561 if (!match)
24562 switch (icode)
24563 {
24564 case CODE_FOR_sse4_1_roundpd:
24565 case CODE_FOR_sse4_1_roundps:
24566 case CODE_FOR_sse4_1_roundsd:
24567 case CODE_FOR_sse4_1_roundss:
24568 case CODE_FOR_sse4_1_blendps:
24569 case CODE_FOR_avx_blendpd256:
24570 case CODE_FOR_avx_vpermilv4df:
24571 case CODE_FOR_avx_roundpd256:
24572 case CODE_FOR_avx_roundps256:
24573 error ("the last argument must be a 4-bit immediate");
24574 return const0_rtx;
24575
24576 case CODE_FOR_sse4_1_blendpd:
24577 case CODE_FOR_avx_vpermilv2df:
24578 case CODE_FOR_xop_vpermil2v2df3:
24579 case CODE_FOR_xop_vpermil2v4sf3:
24580 case CODE_FOR_xop_vpermil2v4df3:
24581 case CODE_FOR_xop_vpermil2v8sf3:
24582 error ("the last argument must be a 2-bit immediate");
24583 return const0_rtx;
24584
24585 case CODE_FOR_avx_vextractf128v4df:
24586 case CODE_FOR_avx_vextractf128v8sf:
24587 case CODE_FOR_avx_vextractf128v8si:
24588 case CODE_FOR_avx_vinsertf128v4df:
24589 case CODE_FOR_avx_vinsertf128v8sf:
24590 case CODE_FOR_avx_vinsertf128v8si:
24591 error ("the last argument must be a 1-bit immediate");
24592 return const0_rtx;
24593
24594 case CODE_FOR_avx_cmpsdv2df3:
24595 case CODE_FOR_avx_cmpssv4sf3:
24596 case CODE_FOR_avx_cmppdv2df3:
24597 case CODE_FOR_avx_cmppsv4sf3:
24598 case CODE_FOR_avx_cmppdv4df3:
24599 case CODE_FOR_avx_cmppsv8sf3:
24600 error ("the last argument must be a 5-bit immediate");
24601 return const0_rtx;
24602
24603 default:
24604 switch (nargs_constant)
24605 {
24606 case 2:
24607 if ((nargs - i) == nargs_constant)
24608 {
24609 error ("the next to last argument must be an 8-bit immediate");
24610 break;
24611 }
24612 case 1:
24613 error ("the last argument must be an 8-bit immediate");
24614 break;
24615 default:
24616 gcc_unreachable ();
24617 }
24618 return const0_rtx;
24619 }
24620 }
24621 else
24622 {
24623 if (VECTOR_MODE_P (mode))
24624 op = safe_vector_operand (op, mode);
24625
24626 /* If we aren't optimizing, only allow one memory operand to
24627 be generated. */
24628 if (memory_operand (op, mode))
24629 num_memory++;
24630
24631 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24632 {
24633 if (optimize || !match || num_memory > 1)
24634 op = copy_to_mode_reg (mode, op);
24635 }
24636 else
24637 {
24638 op = copy_to_reg (op);
24639 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24640 }
24641 }
24642
24643 args[i].op = op;
24644 args[i].mode = mode;
24645 }
24646
24647 switch (nargs)
24648 {
24649 case 1:
24650 pat = GEN_FCN (icode) (real_target, args[0].op);
24651 break;
24652 case 2:
24653 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24654 break;
24655 case 3:
24656 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24657 args[2].op);
24658 break;
24659 case 4:
24660 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24661 args[2].op, args[3].op);
24662 break;
24663 default:
24664 gcc_unreachable ();
24665 }
24666
24667 if (! pat)
24668 return 0;
24669
24670 emit_insn (pat);
24671 return target;
24672 }
24673
24674 /* Subroutine of ix86_expand_builtin to take care of special insns
24675 with variable number of operands. */
24676
24677 static rtx
24678 ix86_expand_special_args_builtin (const struct builtin_description *d,
24679 tree exp, rtx target)
24680 {
24681 tree arg;
24682 rtx pat, op;
24683 unsigned int i, nargs, arg_adjust, memory;
24684 struct
24685 {
24686 rtx op;
24687 enum machine_mode mode;
24688 } args[3];
24689 enum insn_code icode = d->icode;
24690 bool last_arg_constant = false;
24691 const struct insn_data_d *insn_p = &insn_data[icode];
24692 enum machine_mode tmode = insn_p->operand[0].mode;
24693 enum { load, store } klass;
24694
24695 switch ((enum ix86_builtin_func_type) d->flag)
24696 {
24697 case VOID_FTYPE_VOID:
24698 emit_insn (GEN_FCN (icode) (target));
24699 return 0;
24700 case VOID_FTYPE_UINT64:
24701 case VOID_FTYPE_UNSIGNED:
24702 nargs = 0;
24703 klass = store;
24704 memory = 0;
24705 break;
24706 break;
24707 case UINT64_FTYPE_VOID:
24708 case UNSIGNED_FTYPE_VOID:
24709 case UINT16_FTYPE_VOID:
24710 nargs = 0;
24711 klass = load;
24712 memory = 0;
24713 break;
24714 case UINT64_FTYPE_PUNSIGNED:
24715 case V2DI_FTYPE_PV2DI:
24716 case V32QI_FTYPE_PCCHAR:
24717 case V16QI_FTYPE_PCCHAR:
24718 case V8SF_FTYPE_PCV4SF:
24719 case V8SF_FTYPE_PCFLOAT:
24720 case V4SF_FTYPE_PCFLOAT:
24721 case V4DF_FTYPE_PCV2DF:
24722 case V4DF_FTYPE_PCDOUBLE:
24723 case V2DF_FTYPE_PCDOUBLE:
24724 case VOID_FTYPE_PVOID:
24725 nargs = 1;
24726 klass = load;
24727 memory = 0;
24728 break;
24729 case VOID_FTYPE_PV2SF_V4SF:
24730 case VOID_FTYPE_PV4DI_V4DI:
24731 case VOID_FTYPE_PV2DI_V2DI:
24732 case VOID_FTYPE_PCHAR_V32QI:
24733 case VOID_FTYPE_PCHAR_V16QI:
24734 case VOID_FTYPE_PFLOAT_V8SF:
24735 case VOID_FTYPE_PFLOAT_V4SF:
24736 case VOID_FTYPE_PDOUBLE_V4DF:
24737 case VOID_FTYPE_PDOUBLE_V2DF:
24738 case VOID_FTYPE_PULONGLONG_ULONGLONG:
24739 case VOID_FTYPE_PINT_INT:
24740 nargs = 1;
24741 klass = store;
24742 /* Reserve memory operand for target. */
24743 memory = ARRAY_SIZE (args);
24744 break;
24745 case V4SF_FTYPE_V4SF_PCV2SF:
24746 case V2DF_FTYPE_V2DF_PCDOUBLE:
24747 nargs = 2;
24748 klass = load;
24749 memory = 1;
24750 break;
24751 case V8SF_FTYPE_PCV8SF_V8SF:
24752 case V4DF_FTYPE_PCV4DF_V4DF:
24753 case V4SF_FTYPE_PCV4SF_V4SF:
24754 case V2DF_FTYPE_PCV2DF_V2DF:
24755 nargs = 2;
24756 klass = load;
24757 memory = 0;
24758 break;
24759 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24760 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24761 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24762 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24763 nargs = 2;
24764 klass = store;
24765 /* Reserve memory operand for target. */
24766 memory = ARRAY_SIZE (args);
24767 break;
24768 case VOID_FTYPE_UINT_UINT_UINT:
24769 case VOID_FTYPE_UINT64_UINT_UINT:
24770 case UCHAR_FTYPE_UINT_UINT_UINT:
24771 case UCHAR_FTYPE_UINT64_UINT_UINT:
24772 nargs = 3;
24773 klass = load;
24774 memory = ARRAY_SIZE (args);
24775 last_arg_constant = true;
24776 break;
24777 default:
24778 gcc_unreachable ();
24779 }
24780
24781 gcc_assert (nargs <= ARRAY_SIZE (args));
24782
24783 if (klass == store)
24784 {
24785 arg = CALL_EXPR_ARG (exp, 0);
24786 op = expand_normal (arg);
24787 gcc_assert (target == 0);
24788 if (memory)
24789 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24790 else
24791 target = force_reg (tmode, op);
24792 arg_adjust = 1;
24793 }
24794 else
24795 {
24796 arg_adjust = 0;
24797 if (optimize
24798 || target == 0
24799 || GET_MODE (target) != tmode
24800 || !insn_p->operand[0].predicate (target, tmode))
24801 target = gen_reg_rtx (tmode);
24802 }
24803
24804 for (i = 0; i < nargs; i++)
24805 {
24806 enum machine_mode mode = insn_p->operand[i + 1].mode;
24807 bool match;
24808
24809 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24810 op = expand_normal (arg);
24811 match = insn_p->operand[i + 1].predicate (op, mode);
24812
24813 if (last_arg_constant && (i + 1) == nargs)
24814 {
24815 if (!match)
24816 {
24817 if (icode == CODE_FOR_lwp_lwpvalsi3
24818 || icode == CODE_FOR_lwp_lwpinssi3
24819 || icode == CODE_FOR_lwp_lwpvaldi3
24820 || icode == CODE_FOR_lwp_lwpinsdi3)
24821 error ("the last argument must be a 32-bit immediate");
24822 else
24823 error ("the last argument must be an 8-bit immediate");
24824 return const0_rtx;
24825 }
24826 }
24827 else
24828 {
24829 if (i == memory)
24830 {
24831 /* This must be the memory operand. */
24832 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24833 gcc_assert (GET_MODE (op) == mode
24834 || GET_MODE (op) == VOIDmode);
24835 }
24836 else
24837 {
24838 /* This must be register. */
24839 if (VECTOR_MODE_P (mode))
24840 op = safe_vector_operand (op, mode);
24841
24842 gcc_assert (GET_MODE (op) == mode
24843 || GET_MODE (op) == VOIDmode);
24844 op = copy_to_mode_reg (mode, op);
24845 }
24846 }
24847
24848 args[i].op = op;
24849 args[i].mode = mode;
24850 }
24851
24852 switch (nargs)
24853 {
24854 case 0:
24855 pat = GEN_FCN (icode) (target);
24856 break;
24857 case 1:
24858 pat = GEN_FCN (icode) (target, args[0].op);
24859 break;
24860 case 2:
24861 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24862 break;
24863 case 3:
24864 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24865 break;
24866 default:
24867 gcc_unreachable ();
24868 }
24869
24870 if (! pat)
24871 return 0;
24872 emit_insn (pat);
24873 return klass == store ? 0 : target;
24874 }
24875
24876 /* Return the integer constant in ARG. Constrain it to be in the range
24877 of the subparts of VEC_TYPE; issue an error if not. */
24878
24879 static int
24880 get_element_number (tree vec_type, tree arg)
24881 {
24882 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24883
24884 if (!host_integerp (arg, 1)
24885 || (elt = tree_low_cst (arg, 1), elt > max))
24886 {
24887 error ("selector must be an integer constant in the range 0..%wi", max);
24888 return 0;
24889 }
24890
24891 return elt;
24892 }
24893
24894 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24895 ix86_expand_vector_init. We DO have language-level syntax for this, in
24896 the form of (type){ init-list }. Except that since we can't place emms
24897 instructions from inside the compiler, we can't allow the use of MMX
24898 registers unless the user explicitly asks for it. So we do *not* define
24899 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
24900 we have builtins invoked by mmintrin.h that gives us license to emit
24901 these sorts of instructions. */
24902
24903 static rtx
24904 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24905 {
24906 enum machine_mode tmode = TYPE_MODE (type);
24907 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24908 int i, n_elt = GET_MODE_NUNITS (tmode);
24909 rtvec v = rtvec_alloc (n_elt);
24910
24911 gcc_assert (VECTOR_MODE_P (tmode));
24912 gcc_assert (call_expr_nargs (exp) == n_elt);
24913
24914 for (i = 0; i < n_elt; ++i)
24915 {
24916 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24917 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24918 }
24919
24920 if (!target || !register_operand (target, tmode))
24921 target = gen_reg_rtx (tmode);
24922
24923 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24924 return target;
24925 }
24926
24927 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24928 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
24929 had a language-level syntax for referencing vector elements. */
24930
24931 static rtx
24932 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24933 {
24934 enum machine_mode tmode, mode0;
24935 tree arg0, arg1;
24936 int elt;
24937 rtx op0;
24938
24939 arg0 = CALL_EXPR_ARG (exp, 0);
24940 arg1 = CALL_EXPR_ARG (exp, 1);
24941
24942 op0 = expand_normal (arg0);
24943 elt = get_element_number (TREE_TYPE (arg0), arg1);
24944
24945 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24946 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24947 gcc_assert (VECTOR_MODE_P (mode0));
24948
24949 op0 = force_reg (mode0, op0);
24950
24951 if (optimize || !target || !register_operand (target, tmode))
24952 target = gen_reg_rtx (tmode);
24953
24954 ix86_expand_vector_extract (true, target, op0, elt);
24955
24956 return target;
24957 }
24958
24959 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24960 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24961 a language-level syntax for referencing vector elements. */
24962
24963 static rtx
24964 ix86_expand_vec_set_builtin (tree exp)
24965 {
24966 enum machine_mode tmode, mode1;
24967 tree arg0, arg1, arg2;
24968 int elt;
24969 rtx op0, op1, target;
24970
24971 arg0 = CALL_EXPR_ARG (exp, 0);
24972 arg1 = CALL_EXPR_ARG (exp, 1);
24973 arg2 = CALL_EXPR_ARG (exp, 2);
24974
24975 tmode = TYPE_MODE (TREE_TYPE (arg0));
24976 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24977 gcc_assert (VECTOR_MODE_P (tmode));
24978
24979 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24980 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24981 elt = get_element_number (TREE_TYPE (arg0), arg2);
24982
24983 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24984 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24985
24986 op0 = force_reg (tmode, op0);
24987 op1 = force_reg (mode1, op1);
24988
24989 /* OP0 is the source of these builtin functions and shouldn't be
24990 modified. Create a copy, use it and return it as target. */
24991 target = gen_reg_rtx (tmode);
24992 emit_move_insn (target, op0);
24993 ix86_expand_vector_set (true, target, op1, elt);
24994
24995 return target;
24996 }
24997
24998 /* Expand an expression EXP that calls a built-in function,
24999 with result going to TARGET if that's convenient
25000 (and in mode MODE if that's convenient).
25001 SUBTARGET may be used as the target for computing one of EXP's operands.
25002 IGNORE is nonzero if the value is to be ignored. */
25003
25004 static rtx
25005 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25006 enum machine_mode mode ATTRIBUTE_UNUSED,
25007 int ignore ATTRIBUTE_UNUSED)
25008 {
25009 const struct builtin_description *d;
25010 size_t i;
25011 enum insn_code icode;
25012 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25013 tree arg0, arg1, arg2;
25014 rtx op0, op1, op2, pat;
25015 enum machine_mode mode0, mode1, mode2;
25016 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25017
25018 /* Determine whether the builtin function is available under the current ISA.
25019 Originally the builtin was not created if it wasn't applicable to the
25020 current ISA based on the command line switches. With function specific
25021 options, we need to check in the context of the function making the call
25022 whether it is supported. */
25023 if (ix86_builtins_isa[fcode].isa
25024 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25025 {
25026 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25027 NULL, NULL, false);
25028
25029 if (!opts)
25030 error ("%qE needs unknown isa option", fndecl);
25031 else
25032 {
25033 gcc_assert (opts != NULL);
25034 error ("%qE needs isa option %s", fndecl, opts);
25035 free (opts);
25036 }
25037 return const0_rtx;
25038 }
25039
25040 switch (fcode)
25041 {
25042 case IX86_BUILTIN_MASKMOVQ:
25043 case IX86_BUILTIN_MASKMOVDQU:
25044 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25045 ? CODE_FOR_mmx_maskmovq
25046 : CODE_FOR_sse2_maskmovdqu);
25047 /* Note the arg order is different from the operand order. */
25048 arg1 = CALL_EXPR_ARG (exp, 0);
25049 arg2 = CALL_EXPR_ARG (exp, 1);
25050 arg0 = CALL_EXPR_ARG (exp, 2);
25051 op0 = expand_normal (arg0);
25052 op1 = expand_normal (arg1);
25053 op2 = expand_normal (arg2);
25054 mode0 = insn_data[icode].operand[0].mode;
25055 mode1 = insn_data[icode].operand[1].mode;
25056 mode2 = insn_data[icode].operand[2].mode;
25057
25058 op0 = force_reg (Pmode, op0);
25059 op0 = gen_rtx_MEM (mode1, op0);
25060
25061 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25062 op0 = copy_to_mode_reg (mode0, op0);
25063 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25064 op1 = copy_to_mode_reg (mode1, op1);
25065 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25066 op2 = copy_to_mode_reg (mode2, op2);
25067 pat = GEN_FCN (icode) (op0, op1, op2);
25068 if (! pat)
25069 return 0;
25070 emit_insn (pat);
25071 return 0;
25072
25073 case IX86_BUILTIN_LDMXCSR:
25074 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25075 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25076 emit_move_insn (target, op0);
25077 emit_insn (gen_sse_ldmxcsr (target));
25078 return 0;
25079
25080 case IX86_BUILTIN_STMXCSR:
25081 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25082 emit_insn (gen_sse_stmxcsr (target));
25083 return copy_to_mode_reg (SImode, target);
25084
25085 case IX86_BUILTIN_CLFLUSH:
25086 arg0 = CALL_EXPR_ARG (exp, 0);
25087 op0 = expand_normal (arg0);
25088 icode = CODE_FOR_sse2_clflush;
25089 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25090 op0 = copy_to_mode_reg (Pmode, op0);
25091
25092 emit_insn (gen_sse2_clflush (op0));
25093 return 0;
25094
25095 case IX86_BUILTIN_MONITOR:
25096 arg0 = CALL_EXPR_ARG (exp, 0);
25097 arg1 = CALL_EXPR_ARG (exp, 1);
25098 arg2 = CALL_EXPR_ARG (exp, 2);
25099 op0 = expand_normal (arg0);
25100 op1 = expand_normal (arg1);
25101 op2 = expand_normal (arg2);
25102 if (!REG_P (op0))
25103 op0 = copy_to_mode_reg (Pmode, op0);
25104 if (!REG_P (op1))
25105 op1 = copy_to_mode_reg (SImode, op1);
25106 if (!REG_P (op2))
25107 op2 = copy_to_mode_reg (SImode, op2);
25108 emit_insn (ix86_gen_monitor (op0, op1, op2));
25109 return 0;
25110
25111 case IX86_BUILTIN_MWAIT:
25112 arg0 = CALL_EXPR_ARG (exp, 0);
25113 arg1 = CALL_EXPR_ARG (exp, 1);
25114 op0 = expand_normal (arg0);
25115 op1 = expand_normal (arg1);
25116 if (!REG_P (op0))
25117 op0 = copy_to_mode_reg (SImode, op0);
25118 if (!REG_P (op1))
25119 op1 = copy_to_mode_reg (SImode, op1);
25120 emit_insn (gen_sse3_mwait (op0, op1));
25121 return 0;
25122
25123 case IX86_BUILTIN_VEC_INIT_V2SI:
25124 case IX86_BUILTIN_VEC_INIT_V4HI:
25125 case IX86_BUILTIN_VEC_INIT_V8QI:
25126 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25127
25128 case IX86_BUILTIN_VEC_EXT_V2DF:
25129 case IX86_BUILTIN_VEC_EXT_V2DI:
25130 case IX86_BUILTIN_VEC_EXT_V4SF:
25131 case IX86_BUILTIN_VEC_EXT_V4SI:
25132 case IX86_BUILTIN_VEC_EXT_V8HI:
25133 case IX86_BUILTIN_VEC_EXT_V2SI:
25134 case IX86_BUILTIN_VEC_EXT_V4HI:
25135 case IX86_BUILTIN_VEC_EXT_V16QI:
25136 return ix86_expand_vec_ext_builtin (exp, target);
25137
25138 case IX86_BUILTIN_VEC_SET_V2DI:
25139 case IX86_BUILTIN_VEC_SET_V4SF:
25140 case IX86_BUILTIN_VEC_SET_V4SI:
25141 case IX86_BUILTIN_VEC_SET_V8HI:
25142 case IX86_BUILTIN_VEC_SET_V4HI:
25143 case IX86_BUILTIN_VEC_SET_V16QI:
25144 return ix86_expand_vec_set_builtin (exp);
25145
25146 case IX86_BUILTIN_VEC_PERM_V2DF:
25147 case IX86_BUILTIN_VEC_PERM_V4SF:
25148 case IX86_BUILTIN_VEC_PERM_V2DI:
25149 case IX86_BUILTIN_VEC_PERM_V4SI:
25150 case IX86_BUILTIN_VEC_PERM_V8HI:
25151 case IX86_BUILTIN_VEC_PERM_V16QI:
25152 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25153 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25154 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25155 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25156 case IX86_BUILTIN_VEC_PERM_V4DF:
25157 case IX86_BUILTIN_VEC_PERM_V8SF:
25158 return ix86_expand_vec_perm_builtin (exp);
25159
25160 case IX86_BUILTIN_INFQ:
25161 case IX86_BUILTIN_HUGE_VALQ:
25162 {
25163 REAL_VALUE_TYPE inf;
25164 rtx tmp;
25165
25166 real_inf (&inf);
25167 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25168
25169 tmp = validize_mem (force_const_mem (mode, tmp));
25170
25171 if (target == 0)
25172 target = gen_reg_rtx (mode);
25173
25174 emit_move_insn (target, tmp);
25175 return target;
25176 }
25177
25178 case IX86_BUILTIN_LLWPCB:
25179 arg0 = CALL_EXPR_ARG (exp, 0);
25180 op0 = expand_normal (arg0);
25181 icode = CODE_FOR_lwp_llwpcb;
25182 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25183 op0 = copy_to_mode_reg (Pmode, op0);
25184 emit_insn (gen_lwp_llwpcb (op0));
25185 return 0;
25186
25187 case IX86_BUILTIN_SLWPCB:
25188 icode = CODE_FOR_lwp_slwpcb;
25189 if (!target
25190 || !insn_data[icode].operand[0].predicate (target, Pmode))
25191 target = gen_reg_rtx (Pmode);
25192 emit_insn (gen_lwp_slwpcb (target));
25193 return target;
25194
25195 default:
25196 break;
25197 }
25198
25199 for (i = 0, d = bdesc_special_args;
25200 i < ARRAY_SIZE (bdesc_special_args);
25201 i++, d++)
25202 if (d->code == fcode)
25203 return ix86_expand_special_args_builtin (d, exp, target);
25204
25205 for (i = 0, d = bdesc_args;
25206 i < ARRAY_SIZE (bdesc_args);
25207 i++, d++)
25208 if (d->code == fcode)
25209 switch (fcode)
25210 {
25211 case IX86_BUILTIN_FABSQ:
25212 case IX86_BUILTIN_COPYSIGNQ:
25213 if (!TARGET_SSE2)
25214 /* Emit a normal call if SSE2 isn't available. */
25215 return expand_call (exp, target, ignore);
25216 default:
25217 return ix86_expand_args_builtin (d, exp, target);
25218 }
25219
25220 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25221 if (d->code == fcode)
25222 return ix86_expand_sse_comi (d, exp, target);
25223
25224 for (i = 0, d = bdesc_pcmpestr;
25225 i < ARRAY_SIZE (bdesc_pcmpestr);
25226 i++, d++)
25227 if (d->code == fcode)
25228 return ix86_expand_sse_pcmpestr (d, exp, target);
25229
25230 for (i = 0, d = bdesc_pcmpistr;
25231 i < ARRAY_SIZE (bdesc_pcmpistr);
25232 i++, d++)
25233 if (d->code == fcode)
25234 return ix86_expand_sse_pcmpistr (d, exp, target);
25235
25236 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25237 if (d->code == fcode)
25238 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25239 (enum ix86_builtin_func_type)
25240 d->flag, d->comparison);
25241
25242 gcc_unreachable ();
25243 }
25244
25245 /* Returns a function decl for a vectorized version of the builtin function
25246 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25247 if it is not available. */
25248
25249 static tree
25250 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
25251 tree type_in)
25252 {
25253 enum machine_mode in_mode, out_mode;
25254 int in_n, out_n;
25255 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
25256
25257 if (TREE_CODE (type_out) != VECTOR_TYPE
25258 || TREE_CODE (type_in) != VECTOR_TYPE
25259 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
25260 return NULL_TREE;
25261
25262 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25263 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25264 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25265 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25266
25267 switch (fn)
25268 {
25269 case BUILT_IN_SQRT:
25270 if (out_mode == DFmode && out_n == 2
25271 && in_mode == DFmode && in_n == 2)
25272 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25273 break;
25274
25275 case BUILT_IN_SQRTF:
25276 if (out_mode == SFmode && out_n == 4
25277 && in_mode == SFmode && in_n == 4)
25278 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25279 break;
25280
25281 case BUILT_IN_LRINT:
25282 if (out_mode == SImode && out_n == 4
25283 && in_mode == DFmode && in_n == 2)
25284 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25285 break;
25286
25287 case BUILT_IN_LRINTF:
25288 if (out_mode == SImode && out_n == 4
25289 && in_mode == SFmode && in_n == 4)
25290 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25291 break;
25292
25293 case BUILT_IN_COPYSIGN:
25294 if (out_mode == DFmode && out_n == 2
25295 && in_mode == DFmode && in_n == 2)
25296 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
25297 break;
25298
25299 case BUILT_IN_COPYSIGNF:
25300 if (out_mode == SFmode && out_n == 4
25301 && in_mode == SFmode && in_n == 4)
25302 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
25303 break;
25304
25305 default:
25306 ;
25307 }
25308
25309 /* Dispatch to a handler for a vectorization library. */
25310 if (ix86_veclib_handler)
25311 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
25312 type_in);
25313
25314 return NULL_TREE;
25315 }
25316
25317 /* Handler for an SVML-style interface to
25318 a library with vectorized intrinsics. */
25319
25320 static tree
25321 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25322 {
25323 char name[20];
25324 tree fntype, new_fndecl, args;
25325 unsigned arity;
25326 const char *bname;
25327 enum machine_mode el_mode, in_mode;
25328 int n, in_n;
25329
25330 /* The SVML is suitable for unsafe math only. */
25331 if (!flag_unsafe_math_optimizations)
25332 return NULL_TREE;
25333
25334 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25335 n = TYPE_VECTOR_SUBPARTS (type_out);
25336 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25337 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25338 if (el_mode != in_mode
25339 || n != in_n)
25340 return NULL_TREE;
25341
25342 switch (fn)
25343 {
25344 case BUILT_IN_EXP:
25345 case BUILT_IN_LOG:
25346 case BUILT_IN_LOG10:
25347 case BUILT_IN_POW:
25348 case BUILT_IN_TANH:
25349 case BUILT_IN_TAN:
25350 case BUILT_IN_ATAN:
25351 case BUILT_IN_ATAN2:
25352 case BUILT_IN_ATANH:
25353 case BUILT_IN_CBRT:
25354 case BUILT_IN_SINH:
25355 case BUILT_IN_SIN:
25356 case BUILT_IN_ASINH:
25357 case BUILT_IN_ASIN:
25358 case BUILT_IN_COSH:
25359 case BUILT_IN_COS:
25360 case BUILT_IN_ACOSH:
25361 case BUILT_IN_ACOS:
25362 if (el_mode != DFmode || n != 2)
25363 return NULL_TREE;
25364 break;
25365
25366 case BUILT_IN_EXPF:
25367 case BUILT_IN_LOGF:
25368 case BUILT_IN_LOG10F:
25369 case BUILT_IN_POWF:
25370 case BUILT_IN_TANHF:
25371 case BUILT_IN_TANF:
25372 case BUILT_IN_ATANF:
25373 case BUILT_IN_ATAN2F:
25374 case BUILT_IN_ATANHF:
25375 case BUILT_IN_CBRTF:
25376 case BUILT_IN_SINHF:
25377 case BUILT_IN_SINF:
25378 case BUILT_IN_ASINHF:
25379 case BUILT_IN_ASINF:
25380 case BUILT_IN_COSHF:
25381 case BUILT_IN_COSF:
25382 case BUILT_IN_ACOSHF:
25383 case BUILT_IN_ACOSF:
25384 if (el_mode != SFmode || n != 4)
25385 return NULL_TREE;
25386 break;
25387
25388 default:
25389 return NULL_TREE;
25390 }
25391
25392 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25393
25394 if (fn == BUILT_IN_LOGF)
25395 strcpy (name, "vmlsLn4");
25396 else if (fn == BUILT_IN_LOG)
25397 strcpy (name, "vmldLn2");
25398 else if (n == 4)
25399 {
25400 sprintf (name, "vmls%s", bname+10);
25401 name[strlen (name)-1] = '4';
25402 }
25403 else
25404 sprintf (name, "vmld%s2", bname+10);
25405
25406 /* Convert to uppercase. */
25407 name[4] &= ~0x20;
25408
25409 arity = 0;
25410 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25411 args = TREE_CHAIN (args))
25412 arity++;
25413
25414 if (arity == 1)
25415 fntype = build_function_type_list (type_out, type_in, NULL);
25416 else
25417 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25418
25419 /* Build a function declaration for the vectorized function. */
25420 new_fndecl = build_decl (BUILTINS_LOCATION,
25421 FUNCTION_DECL, get_identifier (name), fntype);
25422 TREE_PUBLIC (new_fndecl) = 1;
25423 DECL_EXTERNAL (new_fndecl) = 1;
25424 DECL_IS_NOVOPS (new_fndecl) = 1;
25425 TREE_READONLY (new_fndecl) = 1;
25426
25427 return new_fndecl;
25428 }
25429
25430 /* Handler for an ACML-style interface to
25431 a library with vectorized intrinsics. */
25432
25433 static tree
25434 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25435 {
25436 char name[20] = "__vr.._";
25437 tree fntype, new_fndecl, args;
25438 unsigned arity;
25439 const char *bname;
25440 enum machine_mode el_mode, in_mode;
25441 int n, in_n;
25442
25443 /* The ACML is 64bits only and suitable for unsafe math only as
25444 it does not correctly support parts of IEEE with the required
25445 precision such as denormals. */
25446 if (!TARGET_64BIT
25447 || !flag_unsafe_math_optimizations)
25448 return NULL_TREE;
25449
25450 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25451 n = TYPE_VECTOR_SUBPARTS (type_out);
25452 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25453 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25454 if (el_mode != in_mode
25455 || n != in_n)
25456 return NULL_TREE;
25457
25458 switch (fn)
25459 {
25460 case BUILT_IN_SIN:
25461 case BUILT_IN_COS:
25462 case BUILT_IN_EXP:
25463 case BUILT_IN_LOG:
25464 case BUILT_IN_LOG2:
25465 case BUILT_IN_LOG10:
25466 name[4] = 'd';
25467 name[5] = '2';
25468 if (el_mode != DFmode
25469 || n != 2)
25470 return NULL_TREE;
25471 break;
25472
25473 case BUILT_IN_SINF:
25474 case BUILT_IN_COSF:
25475 case BUILT_IN_EXPF:
25476 case BUILT_IN_POWF:
25477 case BUILT_IN_LOGF:
25478 case BUILT_IN_LOG2F:
25479 case BUILT_IN_LOG10F:
25480 name[4] = 's';
25481 name[5] = '4';
25482 if (el_mode != SFmode
25483 || n != 4)
25484 return NULL_TREE;
25485 break;
25486
25487 default:
25488 return NULL_TREE;
25489 }
25490
25491 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25492 sprintf (name + 7, "%s", bname+10);
25493
25494 arity = 0;
25495 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25496 args = TREE_CHAIN (args))
25497 arity++;
25498
25499 if (arity == 1)
25500 fntype = build_function_type_list (type_out, type_in, NULL);
25501 else
25502 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25503
25504 /* Build a function declaration for the vectorized function. */
25505 new_fndecl = build_decl (BUILTINS_LOCATION,
25506 FUNCTION_DECL, get_identifier (name), fntype);
25507 TREE_PUBLIC (new_fndecl) = 1;
25508 DECL_EXTERNAL (new_fndecl) = 1;
25509 DECL_IS_NOVOPS (new_fndecl) = 1;
25510 TREE_READONLY (new_fndecl) = 1;
25511
25512 return new_fndecl;
25513 }
25514
25515
25516 /* Returns a decl of a function that implements conversion of an integer vector
25517 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
25518 are the types involved when converting according to CODE.
25519 Return NULL_TREE if it is not available. */
25520
25521 static tree
25522 ix86_vectorize_builtin_conversion (unsigned int code,
25523 tree dest_type, tree src_type)
25524 {
25525 if (! TARGET_SSE2)
25526 return NULL_TREE;
25527
25528 switch (code)
25529 {
25530 case FLOAT_EXPR:
25531 switch (TYPE_MODE (src_type))
25532 {
25533 case V4SImode:
25534 switch (TYPE_MODE (dest_type))
25535 {
25536 case V4SFmode:
25537 return (TYPE_UNSIGNED (src_type)
25538 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
25539 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25540 case V4DFmode:
25541 return (TYPE_UNSIGNED (src_type)
25542 ? NULL_TREE
25543 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
25544 default:
25545 return NULL_TREE;
25546 }
25547 break;
25548 case V8SImode:
25549 switch (TYPE_MODE (dest_type))
25550 {
25551 case V8SFmode:
25552 return (TYPE_UNSIGNED (src_type)
25553 ? NULL_TREE
25554 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25555 default:
25556 return NULL_TREE;
25557 }
25558 break;
25559 default:
25560 return NULL_TREE;
25561 }
25562
25563 case FIX_TRUNC_EXPR:
25564 switch (TYPE_MODE (dest_type))
25565 {
25566 case V4SImode:
25567 switch (TYPE_MODE (src_type))
25568 {
25569 case V4SFmode:
25570 return (TYPE_UNSIGNED (dest_type)
25571 ? NULL_TREE
25572 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
25573 case V4DFmode:
25574 return (TYPE_UNSIGNED (dest_type)
25575 ? NULL_TREE
25576 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
25577 default:
25578 return NULL_TREE;
25579 }
25580 break;
25581
25582 case V8SImode:
25583 switch (TYPE_MODE (src_type))
25584 {
25585 case V8SFmode:
25586 return (TYPE_UNSIGNED (dest_type)
25587 ? NULL_TREE
25588 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
25589 default:
25590 return NULL_TREE;
25591 }
25592 break;
25593
25594 default:
25595 return NULL_TREE;
25596 }
25597
25598 default:
25599 return NULL_TREE;
25600 }
25601
25602 return NULL_TREE;
25603 }
25604
25605 /* Returns a code for a target-specific builtin that implements
25606 reciprocal of the function, or NULL_TREE if not available. */
25607
25608 static tree
25609 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25610 bool sqrt ATTRIBUTE_UNUSED)
25611 {
25612 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
25613 && flag_finite_math_only && !flag_trapping_math
25614 && flag_unsafe_math_optimizations))
25615 return NULL_TREE;
25616
25617 if (md_fn)
25618 /* Machine dependent builtins. */
25619 switch (fn)
25620 {
25621 /* Vectorized version of sqrt to rsqrt conversion. */
25622 case IX86_BUILTIN_SQRTPS_NR:
25623 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25624
25625 default:
25626 return NULL_TREE;
25627 }
25628 else
25629 /* Normal builtins. */
25630 switch (fn)
25631 {
25632 /* Sqrt to rsqrt conversion. */
25633 case BUILT_IN_SQRTF:
25634 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25635
25636 default:
25637 return NULL_TREE;
25638 }
25639 }
25640 \f
25641 /* Helper for avx_vpermilps256_operand et al. This is also used by
25642 the expansion functions to turn the parallel back into a mask.
25643 The return value is 0 for no match and the imm8+1 for a match. */
25644
25645 int
25646 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
25647 {
25648 unsigned i, nelt = GET_MODE_NUNITS (mode);
25649 unsigned mask = 0;
25650 unsigned char ipar[8];
25651
25652 if (XVECLEN (par, 0) != (int) nelt)
25653 return 0;
25654
25655 /* Validate that all of the elements are constants, and not totally
25656 out of range. Copy the data into an integral array to make the
25657 subsequent checks easier. */
25658 for (i = 0; i < nelt; ++i)
25659 {
25660 rtx er = XVECEXP (par, 0, i);
25661 unsigned HOST_WIDE_INT ei;
25662
25663 if (!CONST_INT_P (er))
25664 return 0;
25665 ei = INTVAL (er);
25666 if (ei >= nelt)
25667 return 0;
25668 ipar[i] = ei;
25669 }
25670
25671 switch (mode)
25672 {
25673 case V4DFmode:
25674 /* In the 256-bit DFmode case, we can only move elements within
25675 a 128-bit lane. */
25676 for (i = 0; i < 2; ++i)
25677 {
25678 if (ipar[i] >= 2)
25679 return 0;
25680 mask |= ipar[i] << i;
25681 }
25682 for (i = 2; i < 4; ++i)
25683 {
25684 if (ipar[i] < 2)
25685 return 0;
25686 mask |= (ipar[i] - 2) << i;
25687 }
25688 break;
25689
25690 case V8SFmode:
25691 /* In the 256-bit SFmode case, we have full freedom of movement
25692 within the low 128-bit lane, but the high 128-bit lane must
25693 mirror the exact same pattern. */
25694 for (i = 0; i < 4; ++i)
25695 if (ipar[i] + 4 != ipar[i + 4])
25696 return 0;
25697 nelt = 4;
25698 /* FALLTHRU */
25699
25700 case V2DFmode:
25701 case V4SFmode:
25702 /* In the 128-bit case, we've full freedom in the placement of
25703 the elements from the source operand. */
25704 for (i = 0; i < nelt; ++i)
25705 mask |= ipar[i] << (i * (nelt / 2));
25706 break;
25707
25708 default:
25709 gcc_unreachable ();
25710 }
25711
25712 /* Make sure success has a non-zero value by adding one. */
25713 return mask + 1;
25714 }
25715
25716 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
25717 the expansion functions to turn the parallel back into a mask.
25718 The return value is 0 for no match and the imm8+1 for a match. */
25719
25720 int
25721 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
25722 {
25723 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
25724 unsigned mask = 0;
25725 unsigned char ipar[8];
25726
25727 if (XVECLEN (par, 0) != (int) nelt)
25728 return 0;
25729
25730 /* Validate that all of the elements are constants, and not totally
25731 out of range. Copy the data into an integral array to make the
25732 subsequent checks easier. */
25733 for (i = 0; i < nelt; ++i)
25734 {
25735 rtx er = XVECEXP (par, 0, i);
25736 unsigned HOST_WIDE_INT ei;
25737
25738 if (!CONST_INT_P (er))
25739 return 0;
25740 ei = INTVAL (er);
25741 if (ei >= 2 * nelt)
25742 return 0;
25743 ipar[i] = ei;
25744 }
25745
25746 /* Validate that the halves of the permute are halves. */
25747 for (i = 0; i < nelt2 - 1; ++i)
25748 if (ipar[i] + 1 != ipar[i + 1])
25749 return 0;
25750 for (i = nelt2; i < nelt - 1; ++i)
25751 if (ipar[i] + 1 != ipar[i + 1])
25752 return 0;
25753
25754 /* Reconstruct the mask. */
25755 for (i = 0; i < 2; ++i)
25756 {
25757 unsigned e = ipar[i * nelt2];
25758 if (e % nelt2)
25759 return 0;
25760 e /= nelt2;
25761 mask |= e << (i * 4);
25762 }
25763
25764 /* Make sure success has a non-zero value by adding one. */
25765 return mask + 1;
25766 }
25767 \f
25768
25769 /* Store OPERAND to the memory after reload is completed. This means
25770 that we can't easily use assign_stack_local. */
25771 rtx
25772 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25773 {
25774 rtx result;
25775
25776 gcc_assert (reload_completed);
25777 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25778 {
25779 result = gen_rtx_MEM (mode,
25780 gen_rtx_PLUS (Pmode,
25781 stack_pointer_rtx,
25782 GEN_INT (-RED_ZONE_SIZE)));
25783 emit_move_insn (result, operand);
25784 }
25785 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25786 {
25787 switch (mode)
25788 {
25789 case HImode:
25790 case SImode:
25791 operand = gen_lowpart (DImode, operand);
25792 /* FALLTHRU */
25793 case DImode:
25794 emit_insn (
25795 gen_rtx_SET (VOIDmode,
25796 gen_rtx_MEM (DImode,
25797 gen_rtx_PRE_DEC (DImode,
25798 stack_pointer_rtx)),
25799 operand));
25800 break;
25801 default:
25802 gcc_unreachable ();
25803 }
25804 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25805 }
25806 else
25807 {
25808 switch (mode)
25809 {
25810 case DImode:
25811 {
25812 rtx operands[2];
25813 split_di (&operand, 1, operands, operands + 1);
25814 emit_insn (
25815 gen_rtx_SET (VOIDmode,
25816 gen_rtx_MEM (SImode,
25817 gen_rtx_PRE_DEC (Pmode,
25818 stack_pointer_rtx)),
25819 operands[1]));
25820 emit_insn (
25821 gen_rtx_SET (VOIDmode,
25822 gen_rtx_MEM (SImode,
25823 gen_rtx_PRE_DEC (Pmode,
25824 stack_pointer_rtx)),
25825 operands[0]));
25826 }
25827 break;
25828 case HImode:
25829 /* Store HImodes as SImodes. */
25830 operand = gen_lowpart (SImode, operand);
25831 /* FALLTHRU */
25832 case SImode:
25833 emit_insn (
25834 gen_rtx_SET (VOIDmode,
25835 gen_rtx_MEM (GET_MODE (operand),
25836 gen_rtx_PRE_DEC (SImode,
25837 stack_pointer_rtx)),
25838 operand));
25839 break;
25840 default:
25841 gcc_unreachable ();
25842 }
25843 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25844 }
25845 return result;
25846 }
25847
25848 /* Free operand from the memory. */
25849 void
25850 ix86_free_from_memory (enum machine_mode mode)
25851 {
25852 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25853 {
25854 int size;
25855
25856 if (mode == DImode || TARGET_64BIT)
25857 size = 8;
25858 else
25859 size = 4;
25860 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25861 to pop or add instruction if registers are available. */
25862 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25863 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25864 GEN_INT (size))));
25865 }
25866 }
25867
25868 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
25869 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
25870 same. */
25871 static const reg_class_t *
25872 i386_ira_cover_classes (void)
25873 {
25874 static const reg_class_t sse_fpmath_classes[] = {
25875 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
25876 };
25877 static const reg_class_t no_sse_fpmath_classes[] = {
25878 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
25879 };
25880
25881 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
25882 }
25883
25884 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25885 QImode must go into class Q_REGS.
25886 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25887 movdf to do mem-to-mem moves through integer regs. */
25888 enum reg_class
25889 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25890 {
25891 enum machine_mode mode = GET_MODE (x);
25892
25893 /* We're only allowed to return a subclass of CLASS. Many of the
25894 following checks fail for NO_REGS, so eliminate that early. */
25895 if (regclass == NO_REGS)
25896 return NO_REGS;
25897
25898 /* All classes can load zeros. */
25899 if (x == CONST0_RTX (mode))
25900 return regclass;
25901
25902 /* Force constants into memory if we are loading a (nonzero) constant into
25903 an MMX or SSE register. This is because there are no MMX/SSE instructions
25904 to load from a constant. */
25905 if (CONSTANT_P (x)
25906 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25907 return NO_REGS;
25908
25909 /* Prefer SSE regs only, if we can use them for math. */
25910 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25911 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25912
25913 /* Floating-point constants need more complex checks. */
25914 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25915 {
25916 /* General regs can load everything. */
25917 if (reg_class_subset_p (regclass, GENERAL_REGS))
25918 return regclass;
25919
25920 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25921 zero above. We only want to wind up preferring 80387 registers if
25922 we plan on doing computation with them. */
25923 if (TARGET_80387
25924 && standard_80387_constant_p (x))
25925 {
25926 /* Limit class to non-sse. */
25927 if (regclass == FLOAT_SSE_REGS)
25928 return FLOAT_REGS;
25929 if (regclass == FP_TOP_SSE_REGS)
25930 return FP_TOP_REG;
25931 if (regclass == FP_SECOND_SSE_REGS)
25932 return FP_SECOND_REG;
25933 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25934 return regclass;
25935 }
25936
25937 return NO_REGS;
25938 }
25939
25940 /* Generally when we see PLUS here, it's the function invariant
25941 (plus soft-fp const_int). Which can only be computed into general
25942 regs. */
25943 if (GET_CODE (x) == PLUS)
25944 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25945
25946 /* QImode constants are easy to load, but non-constant QImode data
25947 must go into Q_REGS. */
25948 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25949 {
25950 if (reg_class_subset_p (regclass, Q_REGS))
25951 return regclass;
25952 if (reg_class_subset_p (Q_REGS, regclass))
25953 return Q_REGS;
25954 return NO_REGS;
25955 }
25956
25957 return regclass;
25958 }
25959
25960 /* Discourage putting floating-point values in SSE registers unless
25961 SSE math is being used, and likewise for the 387 registers. */
25962 enum reg_class
25963 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25964 {
25965 enum machine_mode mode = GET_MODE (x);
25966
25967 /* Restrict the output reload class to the register bank that we are doing
25968 math on. If we would like not to return a subset of CLASS, reject this
25969 alternative: if reload cannot do this, it will still use its choice. */
25970 mode = GET_MODE (x);
25971 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25972 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25973
25974 if (X87_FLOAT_MODE_P (mode))
25975 {
25976 if (regclass == FP_TOP_SSE_REGS)
25977 return FP_TOP_REG;
25978 else if (regclass == FP_SECOND_SSE_REGS)
25979 return FP_SECOND_REG;
25980 else
25981 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25982 }
25983
25984 return regclass;
25985 }
25986
25987 static reg_class_t
25988 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
25989 enum machine_mode mode,
25990 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25991 {
25992 /* QImode spills from non-QI registers require
25993 intermediate register on 32bit targets. */
25994 if (!in_p && mode == QImode && !TARGET_64BIT
25995 && (rclass == GENERAL_REGS
25996 || rclass == LEGACY_REGS
25997 || rclass == INDEX_REGS))
25998 {
25999 int regno;
26000
26001 if (REG_P (x))
26002 regno = REGNO (x);
26003 else
26004 regno = -1;
26005
26006 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26007 regno = true_regnum (x);
26008
26009 /* Return Q_REGS if the operand is in memory. */
26010 if (regno == -1)
26011 return Q_REGS;
26012 }
26013
26014 return NO_REGS;
26015 }
26016
26017 /* If we are copying between general and FP registers, we need a memory
26018 location. The same is true for SSE and MMX registers.
26019
26020 To optimize register_move_cost performance, allow inline variant.
26021
26022 The macro can't work reliably when one of the CLASSES is class containing
26023 registers from multiple units (SSE, MMX, integer). We avoid this by never
26024 combining those units in single alternative in the machine description.
26025 Ensure that this constraint holds to avoid unexpected surprises.
26026
26027 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26028 enforce these sanity checks. */
26029
26030 static inline int
26031 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26032 enum machine_mode mode, int strict)
26033 {
26034 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26035 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26036 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26037 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26038 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26039 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26040 {
26041 gcc_assert (!strict);
26042 return true;
26043 }
26044
26045 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26046 return true;
26047
26048 /* ??? This is a lie. We do have moves between mmx/general, and for
26049 mmx/sse2. But by saying we need secondary memory we discourage the
26050 register allocator from using the mmx registers unless needed. */
26051 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26052 return true;
26053
26054 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26055 {
26056 /* SSE1 doesn't have any direct moves from other classes. */
26057 if (!TARGET_SSE2)
26058 return true;
26059
26060 /* If the target says that inter-unit moves are more expensive
26061 than moving through memory, then don't generate them. */
26062 if (!TARGET_INTER_UNIT_MOVES)
26063 return true;
26064
26065 /* Between SSE and general, we have moves no larger than word size. */
26066 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26067 return true;
26068 }
26069
26070 return false;
26071 }
26072
26073 int
26074 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26075 enum machine_mode mode, int strict)
26076 {
26077 return inline_secondary_memory_needed (class1, class2, mode, strict);
26078 }
26079
26080 /* Return true if the registers in CLASS cannot represent the change from
26081 modes FROM to TO. */
26082
26083 bool
26084 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26085 enum reg_class regclass)
26086 {
26087 if (from == to)
26088 return false;
26089
26090 /* x87 registers can't do subreg at all, as all values are reformatted
26091 to extended precision. */
26092 if (MAYBE_FLOAT_CLASS_P (regclass))
26093 return true;
26094
26095 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26096 {
26097 /* Vector registers do not support QI or HImode loads. If we don't
26098 disallow a change to these modes, reload will assume it's ok to
26099 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26100 the vec_dupv4hi pattern. */
26101 if (GET_MODE_SIZE (from) < 4)
26102 return true;
26103
26104 /* Vector registers do not support subreg with nonzero offsets, which
26105 are otherwise valid for integer registers. Since we can't see
26106 whether we have a nonzero offset from here, prohibit all
26107 nonparadoxical subregs changing size. */
26108 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26109 return true;
26110 }
26111
26112 return false;
26113 }
26114
26115 /* Return the cost of moving data of mode M between a
26116 register and memory. A value of 2 is the default; this cost is
26117 relative to those in `REGISTER_MOVE_COST'.
26118
26119 This function is used extensively by register_move_cost that is used to
26120 build tables at startup. Make it inline in this case.
26121 When IN is 2, return maximum of in and out move cost.
26122
26123 If moving between registers and memory is more expensive than
26124 between two registers, you should define this macro to express the
26125 relative cost.
26126
26127 Model also increased moving costs of QImode registers in non
26128 Q_REGS classes.
26129 */
26130 static inline int
26131 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26132 int in)
26133 {
26134 int cost;
26135 if (FLOAT_CLASS_P (regclass))
26136 {
26137 int index;
26138 switch (mode)
26139 {
26140 case SFmode:
26141 index = 0;
26142 break;
26143 case DFmode:
26144 index = 1;
26145 break;
26146 case XFmode:
26147 index = 2;
26148 break;
26149 default:
26150 return 100;
26151 }
26152 if (in == 2)
26153 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26154 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26155 }
26156 if (SSE_CLASS_P (regclass))
26157 {
26158 int index;
26159 switch (GET_MODE_SIZE (mode))
26160 {
26161 case 4:
26162 index = 0;
26163 break;
26164 case 8:
26165 index = 1;
26166 break;
26167 case 16:
26168 index = 2;
26169 break;
26170 default:
26171 return 100;
26172 }
26173 if (in == 2)
26174 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26175 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26176 }
26177 if (MMX_CLASS_P (regclass))
26178 {
26179 int index;
26180 switch (GET_MODE_SIZE (mode))
26181 {
26182 case 4:
26183 index = 0;
26184 break;
26185 case 8:
26186 index = 1;
26187 break;
26188 default:
26189 return 100;
26190 }
26191 if (in)
26192 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26193 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26194 }
26195 switch (GET_MODE_SIZE (mode))
26196 {
26197 case 1:
26198 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26199 {
26200 if (!in)
26201 return ix86_cost->int_store[0];
26202 if (TARGET_PARTIAL_REG_DEPENDENCY
26203 && optimize_function_for_speed_p (cfun))
26204 cost = ix86_cost->movzbl_load;
26205 else
26206 cost = ix86_cost->int_load[0];
26207 if (in == 2)
26208 return MAX (cost, ix86_cost->int_store[0]);
26209 return cost;
26210 }
26211 else
26212 {
26213 if (in == 2)
26214 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26215 if (in)
26216 return ix86_cost->movzbl_load;
26217 else
26218 return ix86_cost->int_store[0] + 4;
26219 }
26220 break;
26221 case 2:
26222 if (in == 2)
26223 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26224 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26225 default:
26226 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26227 if (mode == TFmode)
26228 mode = XFmode;
26229 if (in == 2)
26230 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26231 else if (in)
26232 cost = ix86_cost->int_load[2];
26233 else
26234 cost = ix86_cost->int_store[2];
26235 return (cost * (((int) GET_MODE_SIZE (mode)
26236 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26237 }
26238 }
26239
26240 static int
26241 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
26242 bool in)
26243 {
26244 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
26245 }
26246
26247
26248 /* Return the cost of moving data from a register in class CLASS1 to
26249 one in class CLASS2.
26250
26251 It is not required that the cost always equal 2 when FROM is the same as TO;
26252 on some machines it is expensive to move between registers if they are not
26253 general registers. */
26254
26255 static int
26256 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
26257 reg_class_t class2_i)
26258 {
26259 enum reg_class class1 = (enum reg_class) class1_i;
26260 enum reg_class class2 = (enum reg_class) class2_i;
26261
26262 /* In case we require secondary memory, compute cost of the store followed
26263 by load. In order to avoid bad register allocation choices, we need
26264 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26265
26266 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26267 {
26268 int cost = 1;
26269
26270 cost += inline_memory_move_cost (mode, class1, 2);
26271 cost += inline_memory_move_cost (mode, class2, 2);
26272
26273 /* In case of copying from general_purpose_register we may emit multiple
26274 stores followed by single load causing memory size mismatch stall.
26275 Count this as arbitrarily high cost of 20. */
26276 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26277 cost += 20;
26278
26279 /* In the case of FP/MMX moves, the registers actually overlap, and we
26280 have to switch modes in order to treat them differently. */
26281 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26282 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26283 cost += 20;
26284
26285 return cost;
26286 }
26287
26288 /* Moves between SSE/MMX and integer unit are expensive. */
26289 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26290 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26291
26292 /* ??? By keeping returned value relatively high, we limit the number
26293 of moves between integer and MMX/SSE registers for all targets.
26294 Additionally, high value prevents problem with x86_modes_tieable_p(),
26295 where integer modes in MMX/SSE registers are not tieable
26296 because of missing QImode and HImode moves to, from or between
26297 MMX/SSE registers. */
26298 return MAX (8, ix86_cost->mmxsse_to_integer);
26299
26300 if (MAYBE_FLOAT_CLASS_P (class1))
26301 return ix86_cost->fp_move;
26302 if (MAYBE_SSE_CLASS_P (class1))
26303 return ix86_cost->sse_move;
26304 if (MAYBE_MMX_CLASS_P (class1))
26305 return ix86_cost->mmx_move;
26306 return 2;
26307 }
26308
26309 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26310
26311 bool
26312 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26313 {
26314 /* Flags and only flags can only hold CCmode values. */
26315 if (CC_REGNO_P (regno))
26316 return GET_MODE_CLASS (mode) == MODE_CC;
26317 if (GET_MODE_CLASS (mode) == MODE_CC
26318 || GET_MODE_CLASS (mode) == MODE_RANDOM
26319 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26320 return 0;
26321 if (FP_REGNO_P (regno))
26322 return VALID_FP_MODE_P (mode);
26323 if (SSE_REGNO_P (regno))
26324 {
26325 /* We implement the move patterns for all vector modes into and
26326 out of SSE registers, even when no operation instructions
26327 are available. OImode move is available only when AVX is
26328 enabled. */
26329 return ((TARGET_AVX && mode == OImode)
26330 || VALID_AVX256_REG_MODE (mode)
26331 || VALID_SSE_REG_MODE (mode)
26332 || VALID_SSE2_REG_MODE (mode)
26333 || VALID_MMX_REG_MODE (mode)
26334 || VALID_MMX_REG_MODE_3DNOW (mode));
26335 }
26336 if (MMX_REGNO_P (regno))
26337 {
26338 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26339 so if the register is available at all, then we can move data of
26340 the given mode into or out of it. */
26341 return (VALID_MMX_REG_MODE (mode)
26342 || VALID_MMX_REG_MODE_3DNOW (mode));
26343 }
26344
26345 if (mode == QImode)
26346 {
26347 /* Take care for QImode values - they can be in non-QI regs,
26348 but then they do cause partial register stalls. */
26349 if (regno <= BX_REG || TARGET_64BIT)
26350 return 1;
26351 if (!TARGET_PARTIAL_REG_STALL)
26352 return 1;
26353 return reload_in_progress || reload_completed;
26354 }
26355 /* We handle both integer and floats in the general purpose registers. */
26356 else if (VALID_INT_MODE_P (mode))
26357 return 1;
26358 else if (VALID_FP_MODE_P (mode))
26359 return 1;
26360 else if (VALID_DFP_MODE_P (mode))
26361 return 1;
26362 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26363 on to use that value in smaller contexts, this can easily force a
26364 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26365 supporting DImode, allow it. */
26366 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26367 return 1;
26368
26369 return 0;
26370 }
26371
26372 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26373 tieable integer mode. */
26374
26375 static bool
26376 ix86_tieable_integer_mode_p (enum machine_mode mode)
26377 {
26378 switch (mode)
26379 {
26380 case HImode:
26381 case SImode:
26382 return true;
26383
26384 case QImode:
26385 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26386
26387 case DImode:
26388 return TARGET_64BIT;
26389
26390 default:
26391 return false;
26392 }
26393 }
26394
26395 /* Return true if MODE1 is accessible in a register that can hold MODE2
26396 without copying. That is, all register classes that can hold MODE2
26397 can also hold MODE1. */
26398
26399 bool
26400 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26401 {
26402 if (mode1 == mode2)
26403 return true;
26404
26405 if (ix86_tieable_integer_mode_p (mode1)
26406 && ix86_tieable_integer_mode_p (mode2))
26407 return true;
26408
26409 /* MODE2 being XFmode implies fp stack or general regs, which means we
26410 can tie any smaller floating point modes to it. Note that we do not
26411 tie this with TFmode. */
26412 if (mode2 == XFmode)
26413 return mode1 == SFmode || mode1 == DFmode;
26414
26415 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26416 that we can tie it with SFmode. */
26417 if (mode2 == DFmode)
26418 return mode1 == SFmode;
26419
26420 /* If MODE2 is only appropriate for an SSE register, then tie with
26421 any other mode acceptable to SSE registers. */
26422 if (GET_MODE_SIZE (mode2) == 16
26423 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26424 return (GET_MODE_SIZE (mode1) == 16
26425 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26426
26427 /* If MODE2 is appropriate for an MMX register, then tie
26428 with any other mode acceptable to MMX registers. */
26429 if (GET_MODE_SIZE (mode2) == 8
26430 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26431 return (GET_MODE_SIZE (mode1) == 8
26432 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26433
26434 return false;
26435 }
26436
26437 /* Compute a (partial) cost for rtx X. Return true if the complete
26438 cost has been computed, and false if subexpressions should be
26439 scanned. In either case, *TOTAL contains the cost result. */
26440
26441 static bool
26442 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26443 {
26444 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26445 enum machine_mode mode = GET_MODE (x);
26446 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26447
26448 switch (code)
26449 {
26450 case CONST_INT:
26451 case CONST:
26452 case LABEL_REF:
26453 case SYMBOL_REF:
26454 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26455 *total = 3;
26456 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26457 *total = 2;
26458 else if (flag_pic && SYMBOLIC_CONST (x)
26459 && (!TARGET_64BIT
26460 || (!GET_CODE (x) != LABEL_REF
26461 && (GET_CODE (x) != SYMBOL_REF
26462 || !SYMBOL_REF_LOCAL_P (x)))))
26463 *total = 1;
26464 else
26465 *total = 0;
26466 return true;
26467
26468 case CONST_DOUBLE:
26469 if (mode == VOIDmode)
26470 *total = 0;
26471 else
26472 switch (standard_80387_constant_p (x))
26473 {
26474 case 1: /* 0.0 */
26475 *total = 1;
26476 break;
26477 default: /* Other constants */
26478 *total = 2;
26479 break;
26480 case 0:
26481 case -1:
26482 /* Start with (MEM (SYMBOL_REF)), since that's where
26483 it'll probably end up. Add a penalty for size. */
26484 *total = (COSTS_N_INSNS (1)
26485 + (flag_pic != 0 && !TARGET_64BIT)
26486 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26487 break;
26488 }
26489 return true;
26490
26491 case ZERO_EXTEND:
26492 /* The zero extensions is often completely free on x86_64, so make
26493 it as cheap as possible. */
26494 if (TARGET_64BIT && mode == DImode
26495 && GET_MODE (XEXP (x, 0)) == SImode)
26496 *total = 1;
26497 else if (TARGET_ZERO_EXTEND_WITH_AND)
26498 *total = cost->add;
26499 else
26500 *total = cost->movzx;
26501 return false;
26502
26503 case SIGN_EXTEND:
26504 *total = cost->movsx;
26505 return false;
26506
26507 case ASHIFT:
26508 if (CONST_INT_P (XEXP (x, 1))
26509 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26510 {
26511 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26512 if (value == 1)
26513 {
26514 *total = cost->add;
26515 return false;
26516 }
26517 if ((value == 2 || value == 3)
26518 && cost->lea <= cost->shift_const)
26519 {
26520 *total = cost->lea;
26521 return false;
26522 }
26523 }
26524 /* FALLTHRU */
26525
26526 case ROTATE:
26527 case ASHIFTRT:
26528 case LSHIFTRT:
26529 case ROTATERT:
26530 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26531 {
26532 if (CONST_INT_P (XEXP (x, 1)))
26533 {
26534 if (INTVAL (XEXP (x, 1)) > 32)
26535 *total = cost->shift_const + COSTS_N_INSNS (2);
26536 else
26537 *total = cost->shift_const * 2;
26538 }
26539 else
26540 {
26541 if (GET_CODE (XEXP (x, 1)) == AND)
26542 *total = cost->shift_var * 2;
26543 else
26544 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26545 }
26546 }
26547 else
26548 {
26549 if (CONST_INT_P (XEXP (x, 1)))
26550 *total = cost->shift_const;
26551 else
26552 *total = cost->shift_var;
26553 }
26554 return false;
26555
26556 case MULT:
26557 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26558 {
26559 /* ??? SSE scalar cost should be used here. */
26560 *total = cost->fmul;
26561 return false;
26562 }
26563 else if (X87_FLOAT_MODE_P (mode))
26564 {
26565 *total = cost->fmul;
26566 return false;
26567 }
26568 else if (FLOAT_MODE_P (mode))
26569 {
26570 /* ??? SSE vector cost should be used here. */
26571 *total = cost->fmul;
26572 return false;
26573 }
26574 else
26575 {
26576 rtx op0 = XEXP (x, 0);
26577 rtx op1 = XEXP (x, 1);
26578 int nbits;
26579 if (CONST_INT_P (XEXP (x, 1)))
26580 {
26581 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26582 for (nbits = 0; value != 0; value &= value - 1)
26583 nbits++;
26584 }
26585 else
26586 /* This is arbitrary. */
26587 nbits = 7;
26588
26589 /* Compute costs correctly for widening multiplication. */
26590 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26591 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26592 == GET_MODE_SIZE (mode))
26593 {
26594 int is_mulwiden = 0;
26595 enum machine_mode inner_mode = GET_MODE (op0);
26596
26597 if (GET_CODE (op0) == GET_CODE (op1))
26598 is_mulwiden = 1, op1 = XEXP (op1, 0);
26599 else if (CONST_INT_P (op1))
26600 {
26601 if (GET_CODE (op0) == SIGN_EXTEND)
26602 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26603 == INTVAL (op1);
26604 else
26605 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26606 }
26607
26608 if (is_mulwiden)
26609 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26610 }
26611
26612 *total = (cost->mult_init[MODE_INDEX (mode)]
26613 + nbits * cost->mult_bit
26614 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26615
26616 return true;
26617 }
26618
26619 case DIV:
26620 case UDIV:
26621 case MOD:
26622 case UMOD:
26623 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26624 /* ??? SSE cost should be used here. */
26625 *total = cost->fdiv;
26626 else if (X87_FLOAT_MODE_P (mode))
26627 *total = cost->fdiv;
26628 else if (FLOAT_MODE_P (mode))
26629 /* ??? SSE vector cost should be used here. */
26630 *total = cost->fdiv;
26631 else
26632 *total = cost->divide[MODE_INDEX (mode)];
26633 return false;
26634
26635 case PLUS:
26636 if (GET_MODE_CLASS (mode) == MODE_INT
26637 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26638 {
26639 if (GET_CODE (XEXP (x, 0)) == PLUS
26640 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26641 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26642 && CONSTANT_P (XEXP (x, 1)))
26643 {
26644 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26645 if (val == 2 || val == 4 || val == 8)
26646 {
26647 *total = cost->lea;
26648 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26649 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26650 outer_code, speed);
26651 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26652 return true;
26653 }
26654 }
26655 else if (GET_CODE (XEXP (x, 0)) == MULT
26656 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26657 {
26658 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26659 if (val == 2 || val == 4 || val == 8)
26660 {
26661 *total = cost->lea;
26662 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26663 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26664 return true;
26665 }
26666 }
26667 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26668 {
26669 *total = cost->lea;
26670 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26671 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26672 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26673 return true;
26674 }
26675 }
26676 /* FALLTHRU */
26677
26678 case MINUS:
26679 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26680 {
26681 /* ??? SSE cost should be used here. */
26682 *total = cost->fadd;
26683 return false;
26684 }
26685 else if (X87_FLOAT_MODE_P (mode))
26686 {
26687 *total = cost->fadd;
26688 return false;
26689 }
26690 else if (FLOAT_MODE_P (mode))
26691 {
26692 /* ??? SSE vector cost should be used here. */
26693 *total = cost->fadd;
26694 return false;
26695 }
26696 /* FALLTHRU */
26697
26698 case AND:
26699 case IOR:
26700 case XOR:
26701 if (!TARGET_64BIT && mode == DImode)
26702 {
26703 *total = (cost->add * 2
26704 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26705 << (GET_MODE (XEXP (x, 0)) != DImode))
26706 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26707 << (GET_MODE (XEXP (x, 1)) != DImode)));
26708 return true;
26709 }
26710 /* FALLTHRU */
26711
26712 case NEG:
26713 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26714 {
26715 /* ??? SSE cost should be used here. */
26716 *total = cost->fchs;
26717 return false;
26718 }
26719 else if (X87_FLOAT_MODE_P (mode))
26720 {
26721 *total = cost->fchs;
26722 return false;
26723 }
26724 else if (FLOAT_MODE_P (mode))
26725 {
26726 /* ??? SSE vector cost should be used here. */
26727 *total = cost->fchs;
26728 return false;
26729 }
26730 /* FALLTHRU */
26731
26732 case NOT:
26733 if (!TARGET_64BIT && mode == DImode)
26734 *total = cost->add * 2;
26735 else
26736 *total = cost->add;
26737 return false;
26738
26739 case COMPARE:
26740 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26741 && XEXP (XEXP (x, 0), 1) == const1_rtx
26742 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26743 && XEXP (x, 1) == const0_rtx)
26744 {
26745 /* This kind of construct is implemented using test[bwl].
26746 Treat it as if we had an AND. */
26747 *total = (cost->add
26748 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26749 + rtx_cost (const1_rtx, outer_code, speed));
26750 return true;
26751 }
26752 return false;
26753
26754 case FLOAT_EXTEND:
26755 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26756 *total = 0;
26757 return false;
26758
26759 case ABS:
26760 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26761 /* ??? SSE cost should be used here. */
26762 *total = cost->fabs;
26763 else if (X87_FLOAT_MODE_P (mode))
26764 *total = cost->fabs;
26765 else if (FLOAT_MODE_P (mode))
26766 /* ??? SSE vector cost should be used here. */
26767 *total = cost->fabs;
26768 return false;
26769
26770 case SQRT:
26771 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26772 /* ??? SSE cost should be used here. */
26773 *total = cost->fsqrt;
26774 else if (X87_FLOAT_MODE_P (mode))
26775 *total = cost->fsqrt;
26776 else if (FLOAT_MODE_P (mode))
26777 /* ??? SSE vector cost should be used here. */
26778 *total = cost->fsqrt;
26779 return false;
26780
26781 case UNSPEC:
26782 if (XINT (x, 1) == UNSPEC_TP)
26783 *total = 0;
26784 return false;
26785
26786 case VEC_SELECT:
26787 case VEC_CONCAT:
26788 case VEC_MERGE:
26789 case VEC_DUPLICATE:
26790 /* ??? Assume all of these vector manipulation patterns are
26791 recognizable. In which case they all pretty much have the
26792 same cost. */
26793 *total = COSTS_N_INSNS (1);
26794 return true;
26795
26796 default:
26797 return false;
26798 }
26799 }
26800
26801 #if TARGET_MACHO
26802
26803 static int current_machopic_label_num;
26804
26805 /* Given a symbol name and its associated stub, write out the
26806 definition of the stub. */
26807
26808 void
26809 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26810 {
26811 unsigned int length;
26812 char *binder_name, *symbol_name, lazy_ptr_name[32];
26813 int label = ++current_machopic_label_num;
26814
26815 /* For 64-bit we shouldn't get here. */
26816 gcc_assert (!TARGET_64BIT);
26817
26818 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26819 symb = targetm.strip_name_encoding (symb);
26820
26821 length = strlen (stub);
26822 binder_name = XALLOCAVEC (char, length + 32);
26823 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26824
26825 length = strlen (symb);
26826 symbol_name = XALLOCAVEC (char, length + 32);
26827 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26828
26829 sprintf (lazy_ptr_name, "L%d$lz", label);
26830
26831 if (MACHOPIC_PURE)
26832 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26833 else
26834 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26835
26836 fprintf (file, "%s:\n", stub);
26837 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26838
26839 if (MACHOPIC_PURE)
26840 {
26841 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26842 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26843 fprintf (file, "\tjmp\t*%%edx\n");
26844 }
26845 else
26846 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26847
26848 fprintf (file, "%s:\n", binder_name);
26849
26850 if (MACHOPIC_PURE)
26851 {
26852 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26853 fputs ("\tpushl\t%eax\n", file);
26854 }
26855 else
26856 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26857
26858 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
26859
26860 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26861 fprintf (file, "%s:\n", lazy_ptr_name);
26862 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26863 fprintf (file, ASM_LONG "%s\n", binder_name);
26864 }
26865 #endif /* TARGET_MACHO */
26866
26867 /* Order the registers for register allocator. */
26868
26869 void
26870 x86_order_regs_for_local_alloc (void)
26871 {
26872 int pos = 0;
26873 int i;
26874
26875 /* First allocate the local general purpose registers. */
26876 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26877 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26878 reg_alloc_order [pos++] = i;
26879
26880 /* Global general purpose registers. */
26881 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26882 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26883 reg_alloc_order [pos++] = i;
26884
26885 /* x87 registers come first in case we are doing FP math
26886 using them. */
26887 if (!TARGET_SSE_MATH)
26888 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26889 reg_alloc_order [pos++] = i;
26890
26891 /* SSE registers. */
26892 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26893 reg_alloc_order [pos++] = i;
26894 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26895 reg_alloc_order [pos++] = i;
26896
26897 /* x87 registers. */
26898 if (TARGET_SSE_MATH)
26899 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26900 reg_alloc_order [pos++] = i;
26901
26902 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26903 reg_alloc_order [pos++] = i;
26904
26905 /* Initialize the rest of array as we do not allocate some registers
26906 at all. */
26907 while (pos < FIRST_PSEUDO_REGISTER)
26908 reg_alloc_order [pos++] = 0;
26909 }
26910
26911 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26912 struct attribute_spec.handler. */
26913 static tree
26914 ix86_handle_abi_attribute (tree *node, tree name,
26915 tree args ATTRIBUTE_UNUSED,
26916 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26917 {
26918 if (TREE_CODE (*node) != FUNCTION_TYPE
26919 && TREE_CODE (*node) != METHOD_TYPE
26920 && TREE_CODE (*node) != FIELD_DECL
26921 && TREE_CODE (*node) != TYPE_DECL)
26922 {
26923 warning (OPT_Wattributes, "%qE attribute only applies to functions",
26924 name);
26925 *no_add_attrs = true;
26926 return NULL_TREE;
26927 }
26928 if (!TARGET_64BIT)
26929 {
26930 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
26931 name);
26932 *no_add_attrs = true;
26933 return NULL_TREE;
26934 }
26935
26936 /* Can combine regparm with all attributes but fastcall. */
26937 if (is_attribute_p ("ms_abi", name))
26938 {
26939 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26940 {
26941 error ("ms_abi and sysv_abi attributes are not compatible");
26942 }
26943
26944 return NULL_TREE;
26945 }
26946 else if (is_attribute_p ("sysv_abi", name))
26947 {
26948 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26949 {
26950 error ("ms_abi and sysv_abi attributes are not compatible");
26951 }
26952
26953 return NULL_TREE;
26954 }
26955
26956 return NULL_TREE;
26957 }
26958
26959 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26960 struct attribute_spec.handler. */
26961 static tree
26962 ix86_handle_struct_attribute (tree *node, tree name,
26963 tree args ATTRIBUTE_UNUSED,
26964 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26965 {
26966 tree *type = NULL;
26967 if (DECL_P (*node))
26968 {
26969 if (TREE_CODE (*node) == TYPE_DECL)
26970 type = &TREE_TYPE (*node);
26971 }
26972 else
26973 type = node;
26974
26975 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26976 || TREE_CODE (*type) == UNION_TYPE)))
26977 {
26978 warning (OPT_Wattributes, "%qE attribute ignored",
26979 name);
26980 *no_add_attrs = true;
26981 }
26982
26983 else if ((is_attribute_p ("ms_struct", name)
26984 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26985 || ((is_attribute_p ("gcc_struct", name)
26986 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26987 {
26988 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
26989 name);
26990 *no_add_attrs = true;
26991 }
26992
26993 return NULL_TREE;
26994 }
26995
26996 static tree
26997 ix86_handle_fndecl_attribute (tree *node, tree name,
26998 tree args ATTRIBUTE_UNUSED,
26999 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27000 {
27001 if (TREE_CODE (*node) != FUNCTION_DECL)
27002 {
27003 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27004 name);
27005 *no_add_attrs = true;
27006 return NULL_TREE;
27007 }
27008
27009 #ifndef HAVE_AS_IX86_SWAP
27010 if (!TARGET_64BIT)
27011 sorry ("ms_hook_prologue attribute needs assembler swap suffix support");
27012 #endif
27013
27014 return NULL_TREE;
27015 }
27016
27017 static bool
27018 ix86_ms_bitfield_layout_p (const_tree record_type)
27019 {
27020 return (TARGET_MS_BITFIELD_LAYOUT &&
27021 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27022 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27023 }
27024
27025 /* Returns an expression indicating where the this parameter is
27026 located on entry to the FUNCTION. */
27027
27028 static rtx
27029 x86_this_parameter (tree function)
27030 {
27031 tree type = TREE_TYPE (function);
27032 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27033 int nregs;
27034
27035 if (TARGET_64BIT)
27036 {
27037 const int *parm_regs;
27038
27039 if (ix86_function_type_abi (type) == MS_ABI)
27040 parm_regs = x86_64_ms_abi_int_parameter_registers;
27041 else
27042 parm_regs = x86_64_int_parameter_registers;
27043 return gen_rtx_REG (DImode, parm_regs[aggr]);
27044 }
27045
27046 nregs = ix86_function_regparm (type, function);
27047
27048 if (nregs > 0 && !stdarg_p (type))
27049 {
27050 int regno;
27051
27052 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27053 regno = aggr ? DX_REG : CX_REG;
27054 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27055 {
27056 regno = CX_REG;
27057 if (aggr)
27058 return gen_rtx_MEM (SImode,
27059 plus_constant (stack_pointer_rtx, 4));
27060 }
27061 else
27062 {
27063 regno = AX_REG;
27064 if (aggr)
27065 {
27066 regno = DX_REG;
27067 if (nregs == 1)
27068 return gen_rtx_MEM (SImode,
27069 plus_constant (stack_pointer_rtx, 4));
27070 }
27071 }
27072 return gen_rtx_REG (SImode, regno);
27073 }
27074
27075 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27076 }
27077
27078 /* Determine whether x86_output_mi_thunk can succeed. */
27079
27080 static bool
27081 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27082 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27083 HOST_WIDE_INT vcall_offset, const_tree function)
27084 {
27085 /* 64-bit can handle anything. */
27086 if (TARGET_64BIT)
27087 return true;
27088
27089 /* For 32-bit, everything's fine if we have one free register. */
27090 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27091 return true;
27092
27093 /* Need a free register for vcall_offset. */
27094 if (vcall_offset)
27095 return false;
27096
27097 /* Need a free register for GOT references. */
27098 if (flag_pic && !targetm.binds_local_p (function))
27099 return false;
27100
27101 /* Otherwise ok. */
27102 return true;
27103 }
27104
27105 /* Output the assembler code for a thunk function. THUNK_DECL is the
27106 declaration for the thunk function itself, FUNCTION is the decl for
27107 the target function. DELTA is an immediate constant offset to be
27108 added to THIS. If VCALL_OFFSET is nonzero, the word at
27109 *(*this + vcall_offset) should be added to THIS. */
27110
27111 static void
27112 x86_output_mi_thunk (FILE *file,
27113 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27114 HOST_WIDE_INT vcall_offset, tree function)
27115 {
27116 rtx xops[3];
27117 rtx this_param = x86_this_parameter (function);
27118 rtx this_reg, tmp;
27119
27120 /* Make sure unwind info is emitted for the thunk if needed. */
27121 final_start_function (emit_barrier (), file, 1);
27122
27123 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27124 pull it in now and let DELTA benefit. */
27125 if (REG_P (this_param))
27126 this_reg = this_param;
27127 else if (vcall_offset)
27128 {
27129 /* Put the this parameter into %eax. */
27130 xops[0] = this_param;
27131 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27132 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27133 }
27134 else
27135 this_reg = NULL_RTX;
27136
27137 /* Adjust the this parameter by a fixed constant. */
27138 if (delta)
27139 {
27140 xops[0] = GEN_INT (delta);
27141 xops[1] = this_reg ? this_reg : this_param;
27142 if (TARGET_64BIT)
27143 {
27144 if (!x86_64_general_operand (xops[0], DImode))
27145 {
27146 tmp = gen_rtx_REG (DImode, R10_REG);
27147 xops[1] = tmp;
27148 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27149 xops[0] = tmp;
27150 xops[1] = this_param;
27151 }
27152 if (x86_maybe_negate_const_int (&xops[0], DImode))
27153 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27154 else
27155 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27156 }
27157 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27158 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27159 else
27160 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27161 }
27162
27163 /* Adjust the this parameter by a value stored in the vtable. */
27164 if (vcall_offset)
27165 {
27166 if (TARGET_64BIT)
27167 tmp = gen_rtx_REG (DImode, R10_REG);
27168 else
27169 {
27170 int tmp_regno = CX_REG;
27171 if (lookup_attribute ("fastcall",
27172 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27173 || lookup_attribute ("thiscall",
27174 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27175 tmp_regno = AX_REG;
27176 tmp = gen_rtx_REG (SImode, tmp_regno);
27177 }
27178
27179 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27180 xops[1] = tmp;
27181 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27182
27183 /* Adjust the this parameter. */
27184 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27185 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27186 {
27187 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27188 xops[0] = GEN_INT (vcall_offset);
27189 xops[1] = tmp2;
27190 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27191 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27192 }
27193 xops[1] = this_reg;
27194 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27195 }
27196
27197 /* If necessary, drop THIS back to its stack slot. */
27198 if (this_reg && this_reg != this_param)
27199 {
27200 xops[0] = this_reg;
27201 xops[1] = this_param;
27202 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27203 }
27204
27205 xops[0] = XEXP (DECL_RTL (function), 0);
27206 if (TARGET_64BIT)
27207 {
27208 if (!flag_pic || targetm.binds_local_p (function))
27209 output_asm_insn ("jmp\t%P0", xops);
27210 /* All thunks should be in the same object as their target,
27211 and thus binds_local_p should be true. */
27212 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27213 gcc_unreachable ();
27214 else
27215 {
27216 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27217 tmp = gen_rtx_CONST (Pmode, tmp);
27218 tmp = gen_rtx_MEM (QImode, tmp);
27219 xops[0] = tmp;
27220 output_asm_insn ("jmp\t%A0", xops);
27221 }
27222 }
27223 else
27224 {
27225 if (!flag_pic || targetm.binds_local_p (function))
27226 output_asm_insn ("jmp\t%P0", xops);
27227 else
27228 #if TARGET_MACHO
27229 if (TARGET_MACHO)
27230 {
27231 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27232 tmp = (gen_rtx_SYMBOL_REF
27233 (Pmode,
27234 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27235 tmp = gen_rtx_MEM (QImode, tmp);
27236 xops[0] = tmp;
27237 output_asm_insn ("jmp\t%0", xops);
27238 }
27239 else
27240 #endif /* TARGET_MACHO */
27241 {
27242 tmp = gen_rtx_REG (SImode, CX_REG);
27243 output_set_got (tmp, NULL_RTX);
27244
27245 xops[1] = tmp;
27246 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27247 output_asm_insn ("jmp\t{*}%1", xops);
27248 }
27249 }
27250 final_end_function ();
27251 }
27252
27253 static void
27254 x86_file_start (void)
27255 {
27256 default_file_start ();
27257 #if TARGET_MACHO
27258 darwin_file_start ();
27259 #endif
27260 if (X86_FILE_START_VERSION_DIRECTIVE)
27261 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27262 if (X86_FILE_START_FLTUSED)
27263 fputs ("\t.global\t__fltused\n", asm_out_file);
27264 if (ix86_asm_dialect == ASM_INTEL)
27265 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27266 }
27267
27268 int
27269 x86_field_alignment (tree field, int computed)
27270 {
27271 enum machine_mode mode;
27272 tree type = TREE_TYPE (field);
27273
27274 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27275 return computed;
27276 mode = TYPE_MODE (strip_array_types (type));
27277 if (mode == DFmode || mode == DCmode
27278 || GET_MODE_CLASS (mode) == MODE_INT
27279 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27280 return MIN (32, computed);
27281 return computed;
27282 }
27283
27284 /* Output assembler code to FILE to increment profiler label # LABELNO
27285 for profiling a function entry. */
27286 void
27287 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27288 {
27289 if (TARGET_64BIT)
27290 {
27291 #ifndef NO_PROFILE_COUNTERS
27292 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
27293 #endif
27294
27295 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27296 fputs ("\tcall\t*" MCOUNT_NAME "@GOTPCREL(%rip)\n", file);
27297 else
27298 fputs ("\tcall\t" MCOUNT_NAME "\n", file);
27299 }
27300 else if (flag_pic)
27301 {
27302 #ifndef NO_PROFILE_COUNTERS
27303 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
27304 LPREFIX, labelno);
27305 #endif
27306 fputs ("\tcall\t*" MCOUNT_NAME "@GOT(%ebx)\n", file);
27307 }
27308 else
27309 {
27310 #ifndef NO_PROFILE_COUNTERS
27311 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
27312 LPREFIX, labelno);
27313 #endif
27314 fputs ("\tcall\t" MCOUNT_NAME "\n", file);
27315 }
27316 }
27317
27318 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27319 /* We don't have exact information about the insn sizes, but we may assume
27320 quite safely that we are informed about all 1 byte insns and memory
27321 address sizes. This is enough to eliminate unnecessary padding in
27322 99% of cases. */
27323
27324 static int
27325 min_insn_size (rtx insn)
27326 {
27327 int l = 0, len;
27328
27329 if (!INSN_P (insn) || !active_insn_p (insn))
27330 return 0;
27331
27332 /* Discard alignments we've emit and jump instructions. */
27333 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27334 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27335 return 0;
27336 if (JUMP_TABLE_DATA_P (insn))
27337 return 0;
27338
27339 /* Important case - calls are always 5 bytes.
27340 It is common to have many calls in the row. */
27341 if (CALL_P (insn)
27342 && symbolic_reference_mentioned_p (PATTERN (insn))
27343 && !SIBLING_CALL_P (insn))
27344 return 5;
27345 len = get_attr_length (insn);
27346 if (len <= 1)
27347 return 1;
27348
27349 /* For normal instructions we rely on get_attr_length being exact,
27350 with a few exceptions. */
27351 if (!JUMP_P (insn))
27352 {
27353 enum attr_type type = get_attr_type (insn);
27354
27355 switch (type)
27356 {
27357 case TYPE_MULTI:
27358 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27359 || asm_noperands (PATTERN (insn)) >= 0)
27360 return 0;
27361 break;
27362 case TYPE_OTHER:
27363 case TYPE_FCMP:
27364 break;
27365 default:
27366 /* Otherwise trust get_attr_length. */
27367 return len;
27368 }
27369
27370 l = get_attr_length_address (insn);
27371 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27372 l = 4;
27373 }
27374 if (l)
27375 return 1+l;
27376 else
27377 return 2;
27378 }
27379
27380 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27381 window. */
27382
27383 static void
27384 ix86_avoid_jump_mispredicts (void)
27385 {
27386 rtx insn, start = get_insns ();
27387 int nbytes = 0, njumps = 0;
27388 int isjump = 0;
27389
27390 /* Look for all minimal intervals of instructions containing 4 jumps.
27391 The intervals are bounded by START and INSN. NBYTES is the total
27392 size of instructions in the interval including INSN and not including
27393 START. When the NBYTES is smaller than 16 bytes, it is possible
27394 that the end of START and INSN ends up in the same 16byte page.
27395
27396 The smallest offset in the page INSN can start is the case where START
27397 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27398 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27399 */
27400 for (insn = start; insn; insn = NEXT_INSN (insn))
27401 {
27402 int min_size;
27403
27404 if (LABEL_P (insn))
27405 {
27406 int align = label_to_alignment (insn);
27407 int max_skip = label_to_max_skip (insn);
27408
27409 if (max_skip > 15)
27410 max_skip = 15;
27411 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27412 already in the current 16 byte page, because otherwise
27413 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27414 bytes to reach 16 byte boundary. */
27415 if (align <= 0
27416 || (align <= 3 && max_skip != (1 << align) - 1))
27417 max_skip = 0;
27418 if (dump_file)
27419 fprintf (dump_file, "Label %i with max_skip %i\n",
27420 INSN_UID (insn), max_skip);
27421 if (max_skip)
27422 {
27423 while (nbytes + max_skip >= 16)
27424 {
27425 start = NEXT_INSN (start);
27426 if ((JUMP_P (start)
27427 && GET_CODE (PATTERN (start)) != ADDR_VEC
27428 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27429 || CALL_P (start))
27430 njumps--, isjump = 1;
27431 else
27432 isjump = 0;
27433 nbytes -= min_insn_size (start);
27434 }
27435 }
27436 continue;
27437 }
27438
27439 min_size = min_insn_size (insn);
27440 nbytes += min_size;
27441 if (dump_file)
27442 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27443 INSN_UID (insn), min_size);
27444 if ((JUMP_P (insn)
27445 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27446 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27447 || CALL_P (insn))
27448 njumps++;
27449 else
27450 continue;
27451
27452 while (njumps > 3)
27453 {
27454 start = NEXT_INSN (start);
27455 if ((JUMP_P (start)
27456 && GET_CODE (PATTERN (start)) != ADDR_VEC
27457 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27458 || CALL_P (start))
27459 njumps--, isjump = 1;
27460 else
27461 isjump = 0;
27462 nbytes -= min_insn_size (start);
27463 }
27464 gcc_assert (njumps >= 0);
27465 if (dump_file)
27466 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27467 INSN_UID (start), INSN_UID (insn), nbytes);
27468
27469 if (njumps == 3 && isjump && nbytes < 16)
27470 {
27471 int padsize = 15 - nbytes + min_insn_size (insn);
27472
27473 if (dump_file)
27474 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27475 INSN_UID (insn), padsize);
27476 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27477 }
27478 }
27479 }
27480 #endif
27481
27482 /* AMD Athlon works faster
27483 when RET is not destination of conditional jump or directly preceded
27484 by other jump instruction. We avoid the penalty by inserting NOP just
27485 before the RET instructions in such cases. */
27486 static void
27487 ix86_pad_returns (void)
27488 {
27489 edge e;
27490 edge_iterator ei;
27491
27492 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27493 {
27494 basic_block bb = e->src;
27495 rtx ret = BB_END (bb);
27496 rtx prev;
27497 bool replace = false;
27498
27499 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27500 || optimize_bb_for_size_p (bb))
27501 continue;
27502 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27503 if (active_insn_p (prev) || LABEL_P (prev))
27504 break;
27505 if (prev && LABEL_P (prev))
27506 {
27507 edge e;
27508 edge_iterator ei;
27509
27510 FOR_EACH_EDGE (e, ei, bb->preds)
27511 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27512 && !(e->flags & EDGE_FALLTHRU))
27513 replace = true;
27514 }
27515 if (!replace)
27516 {
27517 prev = prev_active_insn (ret);
27518 if (prev
27519 && ((JUMP_P (prev) && any_condjump_p (prev))
27520 || CALL_P (prev)))
27521 replace = true;
27522 /* Empty functions get branch mispredict even when the jump destination
27523 is not visible to us. */
27524 if (!prev && !optimize_function_for_size_p (cfun))
27525 replace = true;
27526 }
27527 if (replace)
27528 {
27529 emit_jump_insn_before (gen_return_internal_long (), ret);
27530 delete_insn (ret);
27531 }
27532 }
27533 }
27534
27535 /* Implement machine specific optimizations. We implement padding of returns
27536 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27537 static void
27538 ix86_reorg (void)
27539 {
27540 if (optimize && optimize_function_for_speed_p (cfun))
27541 {
27542 if (TARGET_PAD_RETURNS)
27543 ix86_pad_returns ();
27544 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27545 if (TARGET_FOUR_JUMP_LIMIT)
27546 ix86_avoid_jump_mispredicts ();
27547 #endif
27548 }
27549 }
27550
27551 /* Return nonzero when QImode register that must be represented via REX prefix
27552 is used. */
27553 bool
27554 x86_extended_QIreg_mentioned_p (rtx insn)
27555 {
27556 int i;
27557 extract_insn_cached (insn);
27558 for (i = 0; i < recog_data.n_operands; i++)
27559 if (REG_P (recog_data.operand[i])
27560 && REGNO (recog_data.operand[i]) > BX_REG)
27561 return true;
27562 return false;
27563 }
27564
27565 /* Return nonzero when P points to register encoded via REX prefix.
27566 Called via for_each_rtx. */
27567 static int
27568 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27569 {
27570 unsigned int regno;
27571 if (!REG_P (*p))
27572 return 0;
27573 regno = REGNO (*p);
27574 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27575 }
27576
27577 /* Return true when INSN mentions register that must be encoded using REX
27578 prefix. */
27579 bool
27580 x86_extended_reg_mentioned_p (rtx insn)
27581 {
27582 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27583 extended_reg_mentioned_1, NULL);
27584 }
27585
27586 /* If profitable, negate (without causing overflow) integer constant
27587 of mode MODE at location LOC. Return true in this case. */
27588 bool
27589 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
27590 {
27591 HOST_WIDE_INT val;
27592
27593 if (!CONST_INT_P (*loc))
27594 return false;
27595
27596 switch (mode)
27597 {
27598 case DImode:
27599 /* DImode x86_64 constants must fit in 32 bits. */
27600 gcc_assert (x86_64_immediate_operand (*loc, mode));
27601
27602 mode = SImode;
27603 break;
27604
27605 case SImode:
27606 case HImode:
27607 case QImode:
27608 break;
27609
27610 default:
27611 gcc_unreachable ();
27612 }
27613
27614 /* Avoid overflows. */
27615 if (mode_signbit_p (mode, *loc))
27616 return false;
27617
27618 val = INTVAL (*loc);
27619
27620 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
27621 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
27622 if ((val < 0 && val != -128)
27623 || val == 128)
27624 {
27625 *loc = GEN_INT (-val);
27626 return true;
27627 }
27628
27629 return false;
27630 }
27631
27632 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27633 optabs would emit if we didn't have TFmode patterns. */
27634
27635 void
27636 x86_emit_floatuns (rtx operands[2])
27637 {
27638 rtx neglab, donelab, i0, i1, f0, in, out;
27639 enum machine_mode mode, inmode;
27640
27641 inmode = GET_MODE (operands[1]);
27642 gcc_assert (inmode == SImode || inmode == DImode);
27643
27644 out = operands[0];
27645 in = force_reg (inmode, operands[1]);
27646 mode = GET_MODE (out);
27647 neglab = gen_label_rtx ();
27648 donelab = gen_label_rtx ();
27649 f0 = gen_reg_rtx (mode);
27650
27651 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27652
27653 expand_float (out, in, 0);
27654
27655 emit_jump_insn (gen_jump (donelab));
27656 emit_barrier ();
27657
27658 emit_label (neglab);
27659
27660 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27661 1, OPTAB_DIRECT);
27662 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27663 1, OPTAB_DIRECT);
27664 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27665
27666 expand_float (f0, i0, 0);
27667
27668 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27669
27670 emit_label (donelab);
27671 }
27672 \f
27673 /* AVX does not support 32-byte integer vector operations,
27674 thus the longest vector we are faced with is V16QImode. */
27675 #define MAX_VECT_LEN 16
27676
27677 struct expand_vec_perm_d
27678 {
27679 rtx target, op0, op1;
27680 unsigned char perm[MAX_VECT_LEN];
27681 enum machine_mode vmode;
27682 unsigned char nelt;
27683 bool testing_p;
27684 };
27685
27686 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
27687 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
27688
27689 /* Get a vector mode of the same size as the original but with elements
27690 twice as wide. This is only guaranteed to apply to integral vectors. */
27691
27692 static inline enum machine_mode
27693 get_mode_wider_vector (enum machine_mode o)
27694 {
27695 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
27696 enum machine_mode n = GET_MODE_WIDER_MODE (o);
27697 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
27698 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
27699 return n;
27700 }
27701
27702 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27703 with all elements equal to VAR. Return true if successful. */
27704
27705 static bool
27706 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27707 rtx target, rtx val)
27708 {
27709 bool ok;
27710
27711 switch (mode)
27712 {
27713 case V2SImode:
27714 case V2SFmode:
27715 if (!mmx_ok)
27716 return false;
27717 /* FALLTHRU */
27718
27719 case V4DFmode:
27720 case V4DImode:
27721 case V8SFmode:
27722 case V8SImode:
27723 case V2DFmode:
27724 case V2DImode:
27725 case V4SFmode:
27726 case V4SImode:
27727 {
27728 rtx insn, dup;
27729
27730 /* First attempt to recognize VAL as-is. */
27731 dup = gen_rtx_VEC_DUPLICATE (mode, val);
27732 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
27733 if (recog_memoized (insn) < 0)
27734 {
27735 rtx seq;
27736 /* If that fails, force VAL into a register. */
27737
27738 start_sequence ();
27739 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
27740 seq = get_insns ();
27741 end_sequence ();
27742 if (seq)
27743 emit_insn_before (seq, insn);
27744
27745 ok = recog_memoized (insn) >= 0;
27746 gcc_assert (ok);
27747 }
27748 }
27749 return true;
27750
27751 case V4HImode:
27752 if (!mmx_ok)
27753 return false;
27754 if (TARGET_SSE || TARGET_3DNOW_A)
27755 {
27756 rtx x;
27757
27758 val = gen_lowpart (SImode, val);
27759 x = gen_rtx_TRUNCATE (HImode, val);
27760 x = gen_rtx_VEC_DUPLICATE (mode, x);
27761 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27762 return true;
27763 }
27764 goto widen;
27765
27766 case V8QImode:
27767 if (!mmx_ok)
27768 return false;
27769 goto widen;
27770
27771 case V8HImode:
27772 if (TARGET_SSE2)
27773 {
27774 struct expand_vec_perm_d dperm;
27775 rtx tmp1, tmp2;
27776
27777 permute:
27778 memset (&dperm, 0, sizeof (dperm));
27779 dperm.target = target;
27780 dperm.vmode = mode;
27781 dperm.nelt = GET_MODE_NUNITS (mode);
27782 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
27783
27784 /* Extend to SImode using a paradoxical SUBREG. */
27785 tmp1 = gen_reg_rtx (SImode);
27786 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27787
27788 /* Insert the SImode value as low element of a V4SImode vector. */
27789 tmp2 = gen_lowpart (V4SImode, dperm.op0);
27790 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
27791
27792 ok = (expand_vec_perm_1 (&dperm)
27793 || expand_vec_perm_broadcast_1 (&dperm));
27794 gcc_assert (ok);
27795 return ok;
27796 }
27797 goto widen;
27798
27799 case V16QImode:
27800 if (TARGET_SSE2)
27801 goto permute;
27802 goto widen;
27803
27804 widen:
27805 /* Replicate the value once into the next wider mode and recurse. */
27806 {
27807 enum machine_mode smode, wsmode, wvmode;
27808 rtx x;
27809
27810 smode = GET_MODE_INNER (mode);
27811 wvmode = get_mode_wider_vector (mode);
27812 wsmode = GET_MODE_INNER (wvmode);
27813
27814 val = convert_modes (wsmode, smode, val, true);
27815 x = expand_simple_binop (wsmode, ASHIFT, val,
27816 GEN_INT (GET_MODE_BITSIZE (smode)),
27817 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27818 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27819
27820 x = gen_lowpart (wvmode, target);
27821 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
27822 gcc_assert (ok);
27823 return ok;
27824 }
27825
27826 case V16HImode:
27827 case V32QImode:
27828 {
27829 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
27830 rtx x = gen_reg_rtx (hvmode);
27831
27832 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
27833 gcc_assert (ok);
27834
27835 x = gen_rtx_VEC_CONCAT (mode, x, x);
27836 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27837 }
27838 return true;
27839
27840 default:
27841 return false;
27842 }
27843 }
27844
27845 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27846 whose ONE_VAR element is VAR, and other elements are zero. Return true
27847 if successful. */
27848
27849 static bool
27850 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27851 rtx target, rtx var, int one_var)
27852 {
27853 enum machine_mode vsimode;
27854 rtx new_target;
27855 rtx x, tmp;
27856 bool use_vector_set = false;
27857
27858 switch (mode)
27859 {
27860 case V2DImode:
27861 /* For SSE4.1, we normally use vector set. But if the second
27862 element is zero and inter-unit moves are OK, we use movq
27863 instead. */
27864 use_vector_set = (TARGET_64BIT
27865 && TARGET_SSE4_1
27866 && !(TARGET_INTER_UNIT_MOVES
27867 && one_var == 0));
27868 break;
27869 case V16QImode:
27870 case V4SImode:
27871 case V4SFmode:
27872 use_vector_set = TARGET_SSE4_1;
27873 break;
27874 case V8HImode:
27875 use_vector_set = TARGET_SSE2;
27876 break;
27877 case V4HImode:
27878 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27879 break;
27880 case V32QImode:
27881 case V16HImode:
27882 case V8SImode:
27883 case V8SFmode:
27884 case V4DFmode:
27885 use_vector_set = TARGET_AVX;
27886 break;
27887 case V4DImode:
27888 /* Use ix86_expand_vector_set in 64bit mode only. */
27889 use_vector_set = TARGET_AVX && TARGET_64BIT;
27890 break;
27891 default:
27892 break;
27893 }
27894
27895 if (use_vector_set)
27896 {
27897 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27898 var = force_reg (GET_MODE_INNER (mode), var);
27899 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27900 return true;
27901 }
27902
27903 switch (mode)
27904 {
27905 case V2SFmode:
27906 case V2SImode:
27907 if (!mmx_ok)
27908 return false;
27909 /* FALLTHRU */
27910
27911 case V2DFmode:
27912 case V2DImode:
27913 if (one_var != 0)
27914 return false;
27915 var = force_reg (GET_MODE_INNER (mode), var);
27916 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27917 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27918 return true;
27919
27920 case V4SFmode:
27921 case V4SImode:
27922 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27923 new_target = gen_reg_rtx (mode);
27924 else
27925 new_target = target;
27926 var = force_reg (GET_MODE_INNER (mode), var);
27927 x = gen_rtx_VEC_DUPLICATE (mode, var);
27928 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27929 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27930 if (one_var != 0)
27931 {
27932 /* We need to shuffle the value to the correct position, so
27933 create a new pseudo to store the intermediate result. */
27934
27935 /* With SSE2, we can use the integer shuffle insns. */
27936 if (mode != V4SFmode && TARGET_SSE2)
27937 {
27938 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27939 const1_rtx,
27940 GEN_INT (one_var == 1 ? 0 : 1),
27941 GEN_INT (one_var == 2 ? 0 : 1),
27942 GEN_INT (one_var == 3 ? 0 : 1)));
27943 if (target != new_target)
27944 emit_move_insn (target, new_target);
27945 return true;
27946 }
27947
27948 /* Otherwise convert the intermediate result to V4SFmode and
27949 use the SSE1 shuffle instructions. */
27950 if (mode != V4SFmode)
27951 {
27952 tmp = gen_reg_rtx (V4SFmode);
27953 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27954 }
27955 else
27956 tmp = new_target;
27957
27958 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27959 const1_rtx,
27960 GEN_INT (one_var == 1 ? 0 : 1),
27961 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27962 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27963
27964 if (mode != V4SFmode)
27965 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27966 else if (tmp != target)
27967 emit_move_insn (target, tmp);
27968 }
27969 else if (target != new_target)
27970 emit_move_insn (target, new_target);
27971 return true;
27972
27973 case V8HImode:
27974 case V16QImode:
27975 vsimode = V4SImode;
27976 goto widen;
27977 case V4HImode:
27978 case V8QImode:
27979 if (!mmx_ok)
27980 return false;
27981 vsimode = V2SImode;
27982 goto widen;
27983 widen:
27984 if (one_var != 0)
27985 return false;
27986
27987 /* Zero extend the variable element to SImode and recurse. */
27988 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27989
27990 x = gen_reg_rtx (vsimode);
27991 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27992 var, one_var))
27993 gcc_unreachable ();
27994
27995 emit_move_insn (target, gen_lowpart (mode, x));
27996 return true;
27997
27998 default:
27999 return false;
28000 }
28001 }
28002
28003 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28004 consisting of the values in VALS. It is known that all elements
28005 except ONE_VAR are constants. Return true if successful. */
28006
28007 static bool
28008 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28009 rtx target, rtx vals, int one_var)
28010 {
28011 rtx var = XVECEXP (vals, 0, one_var);
28012 enum machine_mode wmode;
28013 rtx const_vec, x;
28014
28015 const_vec = copy_rtx (vals);
28016 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28017 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28018
28019 switch (mode)
28020 {
28021 case V2DFmode:
28022 case V2DImode:
28023 case V2SFmode:
28024 case V2SImode:
28025 /* For the two element vectors, it's just as easy to use
28026 the general case. */
28027 return false;
28028
28029 case V4DImode:
28030 /* Use ix86_expand_vector_set in 64bit mode only. */
28031 if (!TARGET_64BIT)
28032 return false;
28033 case V4DFmode:
28034 case V8SFmode:
28035 case V8SImode:
28036 case V16HImode:
28037 case V32QImode:
28038 case V4SFmode:
28039 case V4SImode:
28040 case V8HImode:
28041 case V4HImode:
28042 break;
28043
28044 case V16QImode:
28045 if (TARGET_SSE4_1)
28046 break;
28047 wmode = V8HImode;
28048 goto widen;
28049 case V8QImode:
28050 wmode = V4HImode;
28051 goto widen;
28052 widen:
28053 /* There's no way to set one QImode entry easily. Combine
28054 the variable value with its adjacent constant value, and
28055 promote to an HImode set. */
28056 x = XVECEXP (vals, 0, one_var ^ 1);
28057 if (one_var & 1)
28058 {
28059 var = convert_modes (HImode, QImode, var, true);
28060 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28061 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28062 x = GEN_INT (INTVAL (x) & 0xff);
28063 }
28064 else
28065 {
28066 var = convert_modes (HImode, QImode, var, true);
28067 x = gen_int_mode (INTVAL (x) << 8, HImode);
28068 }
28069 if (x != const0_rtx)
28070 var = expand_simple_binop (HImode, IOR, var, x, var,
28071 1, OPTAB_LIB_WIDEN);
28072
28073 x = gen_reg_rtx (wmode);
28074 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28075 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28076
28077 emit_move_insn (target, gen_lowpart (mode, x));
28078 return true;
28079
28080 default:
28081 return false;
28082 }
28083
28084 emit_move_insn (target, const_vec);
28085 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28086 return true;
28087 }
28088
28089 /* A subroutine of ix86_expand_vector_init_general. Use vector
28090 concatenate to handle the most general case: all values variable,
28091 and none identical. */
28092
28093 static void
28094 ix86_expand_vector_init_concat (enum machine_mode mode,
28095 rtx target, rtx *ops, int n)
28096 {
28097 enum machine_mode cmode, hmode = VOIDmode;
28098 rtx first[8], second[4];
28099 rtvec v;
28100 int i, j;
28101
28102 switch (n)
28103 {
28104 case 2:
28105 switch (mode)
28106 {
28107 case V8SImode:
28108 cmode = V4SImode;
28109 break;
28110 case V8SFmode:
28111 cmode = V4SFmode;
28112 break;
28113 case V4DImode:
28114 cmode = V2DImode;
28115 break;
28116 case V4DFmode:
28117 cmode = V2DFmode;
28118 break;
28119 case V4SImode:
28120 cmode = V2SImode;
28121 break;
28122 case V4SFmode:
28123 cmode = V2SFmode;
28124 break;
28125 case V2DImode:
28126 cmode = DImode;
28127 break;
28128 case V2SImode:
28129 cmode = SImode;
28130 break;
28131 case V2DFmode:
28132 cmode = DFmode;
28133 break;
28134 case V2SFmode:
28135 cmode = SFmode;
28136 break;
28137 default:
28138 gcc_unreachable ();
28139 }
28140
28141 if (!register_operand (ops[1], cmode))
28142 ops[1] = force_reg (cmode, ops[1]);
28143 if (!register_operand (ops[0], cmode))
28144 ops[0] = force_reg (cmode, ops[0]);
28145 emit_insn (gen_rtx_SET (VOIDmode, target,
28146 gen_rtx_VEC_CONCAT (mode, ops[0],
28147 ops[1])));
28148 break;
28149
28150 case 4:
28151 switch (mode)
28152 {
28153 case V4DImode:
28154 cmode = V2DImode;
28155 break;
28156 case V4DFmode:
28157 cmode = V2DFmode;
28158 break;
28159 case V4SImode:
28160 cmode = V2SImode;
28161 break;
28162 case V4SFmode:
28163 cmode = V2SFmode;
28164 break;
28165 default:
28166 gcc_unreachable ();
28167 }
28168 goto half;
28169
28170 case 8:
28171 switch (mode)
28172 {
28173 case V8SImode:
28174 cmode = V2SImode;
28175 hmode = V4SImode;
28176 break;
28177 case V8SFmode:
28178 cmode = V2SFmode;
28179 hmode = V4SFmode;
28180 break;
28181 default:
28182 gcc_unreachable ();
28183 }
28184 goto half;
28185
28186 half:
28187 /* FIXME: We process inputs backward to help RA. PR 36222. */
28188 i = n - 1;
28189 j = (n >> 1) - 1;
28190 for (; i > 0; i -= 2, j--)
28191 {
28192 first[j] = gen_reg_rtx (cmode);
28193 v = gen_rtvec (2, ops[i - 1], ops[i]);
28194 ix86_expand_vector_init (false, first[j],
28195 gen_rtx_PARALLEL (cmode, v));
28196 }
28197
28198 n >>= 1;
28199 if (n > 2)
28200 {
28201 gcc_assert (hmode != VOIDmode);
28202 for (i = j = 0; i < n; i += 2, j++)
28203 {
28204 second[j] = gen_reg_rtx (hmode);
28205 ix86_expand_vector_init_concat (hmode, second [j],
28206 &first [i], 2);
28207 }
28208 n >>= 1;
28209 ix86_expand_vector_init_concat (mode, target, second, n);
28210 }
28211 else
28212 ix86_expand_vector_init_concat (mode, target, first, n);
28213 break;
28214
28215 default:
28216 gcc_unreachable ();
28217 }
28218 }
28219
28220 /* A subroutine of ix86_expand_vector_init_general. Use vector
28221 interleave to handle the most general case: all values variable,
28222 and none identical. */
28223
28224 static void
28225 ix86_expand_vector_init_interleave (enum machine_mode mode,
28226 rtx target, rtx *ops, int n)
28227 {
28228 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28229 int i, j;
28230 rtx op0, op1;
28231 rtx (*gen_load_even) (rtx, rtx, rtx);
28232 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28233 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28234
28235 switch (mode)
28236 {
28237 case V8HImode:
28238 gen_load_even = gen_vec_setv8hi;
28239 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28240 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28241 inner_mode = HImode;
28242 first_imode = V4SImode;
28243 second_imode = V2DImode;
28244 third_imode = VOIDmode;
28245 break;
28246 case V16QImode:
28247 gen_load_even = gen_vec_setv16qi;
28248 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28249 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28250 inner_mode = QImode;
28251 first_imode = V8HImode;
28252 second_imode = V4SImode;
28253 third_imode = V2DImode;
28254 break;
28255 default:
28256 gcc_unreachable ();
28257 }
28258
28259 for (i = 0; i < n; i++)
28260 {
28261 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28262 op0 = gen_reg_rtx (SImode);
28263 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28264
28265 /* Insert the SImode value as low element of V4SImode vector. */
28266 op1 = gen_reg_rtx (V4SImode);
28267 op0 = gen_rtx_VEC_MERGE (V4SImode,
28268 gen_rtx_VEC_DUPLICATE (V4SImode,
28269 op0),
28270 CONST0_RTX (V4SImode),
28271 const1_rtx);
28272 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28273
28274 /* Cast the V4SImode vector back to a vector in orignal mode. */
28275 op0 = gen_reg_rtx (mode);
28276 emit_move_insn (op0, gen_lowpart (mode, op1));
28277
28278 /* Load even elements into the second positon. */
28279 emit_insn (gen_load_even (op0,
28280 force_reg (inner_mode,
28281 ops [i + i + 1]),
28282 const1_rtx));
28283
28284 /* Cast vector to FIRST_IMODE vector. */
28285 ops[i] = gen_reg_rtx (first_imode);
28286 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28287 }
28288
28289 /* Interleave low FIRST_IMODE vectors. */
28290 for (i = j = 0; i < n; i += 2, j++)
28291 {
28292 op0 = gen_reg_rtx (first_imode);
28293 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
28294
28295 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28296 ops[j] = gen_reg_rtx (second_imode);
28297 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28298 }
28299
28300 /* Interleave low SECOND_IMODE vectors. */
28301 switch (second_imode)
28302 {
28303 case V4SImode:
28304 for (i = j = 0; i < n / 2; i += 2, j++)
28305 {
28306 op0 = gen_reg_rtx (second_imode);
28307 emit_insn (gen_interleave_second_low (op0, ops[i],
28308 ops[i + 1]));
28309
28310 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28311 vector. */
28312 ops[j] = gen_reg_rtx (third_imode);
28313 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28314 }
28315 second_imode = V2DImode;
28316 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28317 /* FALLTHRU */
28318
28319 case V2DImode:
28320 op0 = gen_reg_rtx (second_imode);
28321 emit_insn (gen_interleave_second_low (op0, ops[0],
28322 ops[1]));
28323
28324 /* Cast the SECOND_IMODE vector back to a vector on original
28325 mode. */
28326 emit_insn (gen_rtx_SET (VOIDmode, target,
28327 gen_lowpart (mode, op0)));
28328 break;
28329
28330 default:
28331 gcc_unreachable ();
28332 }
28333 }
28334
28335 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28336 all values variable, and none identical. */
28337
28338 static void
28339 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28340 rtx target, rtx vals)
28341 {
28342 rtx ops[32], op0, op1;
28343 enum machine_mode half_mode = VOIDmode;
28344 int n, i;
28345
28346 switch (mode)
28347 {
28348 case V2SFmode:
28349 case V2SImode:
28350 if (!mmx_ok && !TARGET_SSE)
28351 break;
28352 /* FALLTHRU */
28353
28354 case V8SFmode:
28355 case V8SImode:
28356 case V4DFmode:
28357 case V4DImode:
28358 case V4SFmode:
28359 case V4SImode:
28360 case V2DFmode:
28361 case V2DImode:
28362 n = GET_MODE_NUNITS (mode);
28363 for (i = 0; i < n; i++)
28364 ops[i] = XVECEXP (vals, 0, i);
28365 ix86_expand_vector_init_concat (mode, target, ops, n);
28366 return;
28367
28368 case V32QImode:
28369 half_mode = V16QImode;
28370 goto half;
28371
28372 case V16HImode:
28373 half_mode = V8HImode;
28374 goto half;
28375
28376 half:
28377 n = GET_MODE_NUNITS (mode);
28378 for (i = 0; i < n; i++)
28379 ops[i] = XVECEXP (vals, 0, i);
28380 op0 = gen_reg_rtx (half_mode);
28381 op1 = gen_reg_rtx (half_mode);
28382 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28383 n >> 2);
28384 ix86_expand_vector_init_interleave (half_mode, op1,
28385 &ops [n >> 1], n >> 2);
28386 emit_insn (gen_rtx_SET (VOIDmode, target,
28387 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28388 return;
28389
28390 case V16QImode:
28391 if (!TARGET_SSE4_1)
28392 break;
28393 /* FALLTHRU */
28394
28395 case V8HImode:
28396 if (!TARGET_SSE2)
28397 break;
28398
28399 /* Don't use ix86_expand_vector_init_interleave if we can't
28400 move from GPR to SSE register directly. */
28401 if (!TARGET_INTER_UNIT_MOVES)
28402 break;
28403
28404 n = GET_MODE_NUNITS (mode);
28405 for (i = 0; i < n; i++)
28406 ops[i] = XVECEXP (vals, 0, i);
28407 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28408 return;
28409
28410 case V4HImode:
28411 case V8QImode:
28412 break;
28413
28414 default:
28415 gcc_unreachable ();
28416 }
28417
28418 {
28419 int i, j, n_elts, n_words, n_elt_per_word;
28420 enum machine_mode inner_mode;
28421 rtx words[4], shift;
28422
28423 inner_mode = GET_MODE_INNER (mode);
28424 n_elts = GET_MODE_NUNITS (mode);
28425 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28426 n_elt_per_word = n_elts / n_words;
28427 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28428
28429 for (i = 0; i < n_words; ++i)
28430 {
28431 rtx word = NULL_RTX;
28432
28433 for (j = 0; j < n_elt_per_word; ++j)
28434 {
28435 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28436 elt = convert_modes (word_mode, inner_mode, elt, true);
28437
28438 if (j == 0)
28439 word = elt;
28440 else
28441 {
28442 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28443 word, 1, OPTAB_LIB_WIDEN);
28444 word = expand_simple_binop (word_mode, IOR, word, elt,
28445 word, 1, OPTAB_LIB_WIDEN);
28446 }
28447 }
28448
28449 words[i] = word;
28450 }
28451
28452 if (n_words == 1)
28453 emit_move_insn (target, gen_lowpart (mode, words[0]));
28454 else if (n_words == 2)
28455 {
28456 rtx tmp = gen_reg_rtx (mode);
28457 emit_clobber (tmp);
28458 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28459 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28460 emit_move_insn (target, tmp);
28461 }
28462 else if (n_words == 4)
28463 {
28464 rtx tmp = gen_reg_rtx (V4SImode);
28465 gcc_assert (word_mode == SImode);
28466 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28467 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28468 emit_move_insn (target, gen_lowpart (mode, tmp));
28469 }
28470 else
28471 gcc_unreachable ();
28472 }
28473 }
28474
28475 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28476 instructions unless MMX_OK is true. */
28477
28478 void
28479 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28480 {
28481 enum machine_mode mode = GET_MODE (target);
28482 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28483 int n_elts = GET_MODE_NUNITS (mode);
28484 int n_var = 0, one_var = -1;
28485 bool all_same = true, all_const_zero = true;
28486 int i;
28487 rtx x;
28488
28489 for (i = 0; i < n_elts; ++i)
28490 {
28491 x = XVECEXP (vals, 0, i);
28492 if (!(CONST_INT_P (x)
28493 || GET_CODE (x) == CONST_DOUBLE
28494 || GET_CODE (x) == CONST_FIXED))
28495 n_var++, one_var = i;
28496 else if (x != CONST0_RTX (inner_mode))
28497 all_const_zero = false;
28498 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28499 all_same = false;
28500 }
28501
28502 /* Constants are best loaded from the constant pool. */
28503 if (n_var == 0)
28504 {
28505 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28506 return;
28507 }
28508
28509 /* If all values are identical, broadcast the value. */
28510 if (all_same
28511 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28512 XVECEXP (vals, 0, 0)))
28513 return;
28514
28515 /* Values where only one field is non-constant are best loaded from
28516 the pool and overwritten via move later. */
28517 if (n_var == 1)
28518 {
28519 if (all_const_zero
28520 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28521 XVECEXP (vals, 0, one_var),
28522 one_var))
28523 return;
28524
28525 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28526 return;
28527 }
28528
28529 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28530 }
28531
28532 void
28533 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28534 {
28535 enum machine_mode mode = GET_MODE (target);
28536 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28537 enum machine_mode half_mode;
28538 bool use_vec_merge = false;
28539 rtx tmp;
28540 static rtx (*gen_extract[6][2]) (rtx, rtx)
28541 = {
28542 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28543 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28544 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28545 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28546 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28547 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28548 };
28549 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28550 = {
28551 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28552 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28553 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28554 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28555 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28556 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28557 };
28558 int i, j, n;
28559
28560 switch (mode)
28561 {
28562 case V2SFmode:
28563 case V2SImode:
28564 if (mmx_ok)
28565 {
28566 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28567 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28568 if (elt == 0)
28569 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28570 else
28571 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28572 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28573 return;
28574 }
28575 break;
28576
28577 case V2DImode:
28578 use_vec_merge = TARGET_SSE4_1;
28579 if (use_vec_merge)
28580 break;
28581
28582 case V2DFmode:
28583 {
28584 rtx op0, op1;
28585
28586 /* For the two element vectors, we implement a VEC_CONCAT with
28587 the extraction of the other element. */
28588
28589 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28590 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28591
28592 if (elt == 0)
28593 op0 = val, op1 = tmp;
28594 else
28595 op0 = tmp, op1 = val;
28596
28597 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28598 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28599 }
28600 return;
28601
28602 case V4SFmode:
28603 use_vec_merge = TARGET_SSE4_1;
28604 if (use_vec_merge)
28605 break;
28606
28607 switch (elt)
28608 {
28609 case 0:
28610 use_vec_merge = true;
28611 break;
28612
28613 case 1:
28614 /* tmp = target = A B C D */
28615 tmp = copy_to_reg (target);
28616 /* target = A A B B */
28617 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
28618 /* target = X A B B */
28619 ix86_expand_vector_set (false, target, val, 0);
28620 /* target = A X C D */
28621 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28622 const1_rtx, const0_rtx,
28623 GEN_INT (2+4), GEN_INT (3+4)));
28624 return;
28625
28626 case 2:
28627 /* tmp = target = A B C D */
28628 tmp = copy_to_reg (target);
28629 /* tmp = X B C D */
28630 ix86_expand_vector_set (false, tmp, val, 0);
28631 /* target = A B X D */
28632 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28633 const0_rtx, const1_rtx,
28634 GEN_INT (0+4), GEN_INT (3+4)));
28635 return;
28636
28637 case 3:
28638 /* tmp = target = A B C D */
28639 tmp = copy_to_reg (target);
28640 /* tmp = X B C D */
28641 ix86_expand_vector_set (false, tmp, val, 0);
28642 /* target = A B X D */
28643 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28644 const0_rtx, const1_rtx,
28645 GEN_INT (2+4), GEN_INT (0+4)));
28646 return;
28647
28648 default:
28649 gcc_unreachable ();
28650 }
28651 break;
28652
28653 case V4SImode:
28654 use_vec_merge = TARGET_SSE4_1;
28655 if (use_vec_merge)
28656 break;
28657
28658 /* Element 0 handled by vec_merge below. */
28659 if (elt == 0)
28660 {
28661 use_vec_merge = true;
28662 break;
28663 }
28664
28665 if (TARGET_SSE2)
28666 {
28667 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28668 store into element 0, then shuffle them back. */
28669
28670 rtx order[4];
28671
28672 order[0] = GEN_INT (elt);
28673 order[1] = const1_rtx;
28674 order[2] = const2_rtx;
28675 order[3] = GEN_INT (3);
28676 order[elt] = const0_rtx;
28677
28678 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28679 order[1], order[2], order[3]));
28680
28681 ix86_expand_vector_set (false, target, val, 0);
28682
28683 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28684 order[1], order[2], order[3]));
28685 }
28686 else
28687 {
28688 /* For SSE1, we have to reuse the V4SF code. */
28689 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28690 gen_lowpart (SFmode, val), elt);
28691 }
28692 return;
28693
28694 case V8HImode:
28695 use_vec_merge = TARGET_SSE2;
28696 break;
28697 case V4HImode:
28698 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28699 break;
28700
28701 case V16QImode:
28702 use_vec_merge = TARGET_SSE4_1;
28703 break;
28704
28705 case V8QImode:
28706 break;
28707
28708 case V32QImode:
28709 half_mode = V16QImode;
28710 j = 0;
28711 n = 16;
28712 goto half;
28713
28714 case V16HImode:
28715 half_mode = V8HImode;
28716 j = 1;
28717 n = 8;
28718 goto half;
28719
28720 case V8SImode:
28721 half_mode = V4SImode;
28722 j = 2;
28723 n = 4;
28724 goto half;
28725
28726 case V4DImode:
28727 half_mode = V2DImode;
28728 j = 3;
28729 n = 2;
28730 goto half;
28731
28732 case V8SFmode:
28733 half_mode = V4SFmode;
28734 j = 4;
28735 n = 4;
28736 goto half;
28737
28738 case V4DFmode:
28739 half_mode = V2DFmode;
28740 j = 5;
28741 n = 2;
28742 goto half;
28743
28744 half:
28745 /* Compute offset. */
28746 i = elt / n;
28747 elt %= n;
28748
28749 gcc_assert (i <= 1);
28750
28751 /* Extract the half. */
28752 tmp = gen_reg_rtx (half_mode);
28753 emit_insn (gen_extract[j][i] (tmp, target));
28754
28755 /* Put val in tmp at elt. */
28756 ix86_expand_vector_set (false, tmp, val, elt);
28757
28758 /* Put it back. */
28759 emit_insn (gen_insert[j][i] (target, target, tmp));
28760 return;
28761
28762 default:
28763 break;
28764 }
28765
28766 if (use_vec_merge)
28767 {
28768 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28769 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28770 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28771 }
28772 else
28773 {
28774 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28775
28776 emit_move_insn (mem, target);
28777
28778 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28779 emit_move_insn (tmp, val);
28780
28781 emit_move_insn (target, mem);
28782 }
28783 }
28784
28785 void
28786 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28787 {
28788 enum machine_mode mode = GET_MODE (vec);
28789 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28790 bool use_vec_extr = false;
28791 rtx tmp;
28792
28793 switch (mode)
28794 {
28795 case V2SImode:
28796 case V2SFmode:
28797 if (!mmx_ok)
28798 break;
28799 /* FALLTHRU */
28800
28801 case V2DFmode:
28802 case V2DImode:
28803 use_vec_extr = true;
28804 break;
28805
28806 case V4SFmode:
28807 use_vec_extr = TARGET_SSE4_1;
28808 if (use_vec_extr)
28809 break;
28810
28811 switch (elt)
28812 {
28813 case 0:
28814 tmp = vec;
28815 break;
28816
28817 case 1:
28818 case 3:
28819 tmp = gen_reg_rtx (mode);
28820 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28821 GEN_INT (elt), GEN_INT (elt),
28822 GEN_INT (elt+4), GEN_INT (elt+4)));
28823 break;
28824
28825 case 2:
28826 tmp = gen_reg_rtx (mode);
28827 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
28828 break;
28829
28830 default:
28831 gcc_unreachable ();
28832 }
28833 vec = tmp;
28834 use_vec_extr = true;
28835 elt = 0;
28836 break;
28837
28838 case V4SImode:
28839 use_vec_extr = TARGET_SSE4_1;
28840 if (use_vec_extr)
28841 break;
28842
28843 if (TARGET_SSE2)
28844 {
28845 switch (elt)
28846 {
28847 case 0:
28848 tmp = vec;
28849 break;
28850
28851 case 1:
28852 case 3:
28853 tmp = gen_reg_rtx (mode);
28854 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28855 GEN_INT (elt), GEN_INT (elt),
28856 GEN_INT (elt), GEN_INT (elt)));
28857 break;
28858
28859 case 2:
28860 tmp = gen_reg_rtx (mode);
28861 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
28862 break;
28863
28864 default:
28865 gcc_unreachable ();
28866 }
28867 vec = tmp;
28868 use_vec_extr = true;
28869 elt = 0;
28870 }
28871 else
28872 {
28873 /* For SSE1, we have to reuse the V4SF code. */
28874 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28875 gen_lowpart (V4SFmode, vec), elt);
28876 return;
28877 }
28878 break;
28879
28880 case V8HImode:
28881 use_vec_extr = TARGET_SSE2;
28882 break;
28883 case V4HImode:
28884 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28885 break;
28886
28887 case V16QImode:
28888 use_vec_extr = TARGET_SSE4_1;
28889 break;
28890
28891 case V8QImode:
28892 /* ??? Could extract the appropriate HImode element and shift. */
28893 default:
28894 break;
28895 }
28896
28897 if (use_vec_extr)
28898 {
28899 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28900 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28901
28902 /* Let the rtl optimizers know about the zero extension performed. */
28903 if (inner_mode == QImode || inner_mode == HImode)
28904 {
28905 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28906 target = gen_lowpart (SImode, target);
28907 }
28908
28909 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28910 }
28911 else
28912 {
28913 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28914
28915 emit_move_insn (mem, vec);
28916
28917 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28918 emit_move_insn (target, tmp);
28919 }
28920 }
28921
28922 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28923 pattern to reduce; DEST is the destination; IN is the input vector. */
28924
28925 void
28926 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28927 {
28928 rtx tmp1, tmp2, tmp3;
28929
28930 tmp1 = gen_reg_rtx (V4SFmode);
28931 tmp2 = gen_reg_rtx (V4SFmode);
28932 tmp3 = gen_reg_rtx (V4SFmode);
28933
28934 emit_insn (gen_sse_movhlps (tmp1, in, in));
28935 emit_insn (fn (tmp2, tmp1, in));
28936
28937 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28938 const1_rtx, const1_rtx,
28939 GEN_INT (1+4), GEN_INT (1+4)));
28940 emit_insn (fn (dest, tmp2, tmp3));
28941 }
28942 \f
28943 /* Target hook for scalar_mode_supported_p. */
28944 static bool
28945 ix86_scalar_mode_supported_p (enum machine_mode mode)
28946 {
28947 if (DECIMAL_FLOAT_MODE_P (mode))
28948 return default_decimal_float_supported_p ();
28949 else if (mode == TFmode)
28950 return true;
28951 else
28952 return default_scalar_mode_supported_p (mode);
28953 }
28954
28955 /* Implements target hook vector_mode_supported_p. */
28956 static bool
28957 ix86_vector_mode_supported_p (enum machine_mode mode)
28958 {
28959 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28960 return true;
28961 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28962 return true;
28963 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28964 return true;
28965 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28966 return true;
28967 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28968 return true;
28969 return false;
28970 }
28971
28972 /* Target hook for c_mode_for_suffix. */
28973 static enum machine_mode
28974 ix86_c_mode_for_suffix (char suffix)
28975 {
28976 if (suffix == 'q')
28977 return TFmode;
28978 if (suffix == 'w')
28979 return XFmode;
28980
28981 return VOIDmode;
28982 }
28983
28984 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28985
28986 We do this in the new i386 backend to maintain source compatibility
28987 with the old cc0-based compiler. */
28988
28989 static tree
28990 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28991 tree inputs ATTRIBUTE_UNUSED,
28992 tree clobbers)
28993 {
28994 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28995 clobbers);
28996 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28997 clobbers);
28998 return clobbers;
28999 }
29000
29001 /* Implements target vector targetm.asm.encode_section_info. This
29002 is not used by netware. */
29003
29004 static void ATTRIBUTE_UNUSED
29005 ix86_encode_section_info (tree decl, rtx rtl, int first)
29006 {
29007 default_encode_section_info (decl, rtl, first);
29008
29009 if (TREE_CODE (decl) == VAR_DECL
29010 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29011 && ix86_in_large_data_p (decl))
29012 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29013 }
29014
29015 /* Worker function for REVERSE_CONDITION. */
29016
29017 enum rtx_code
29018 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29019 {
29020 return (mode != CCFPmode && mode != CCFPUmode
29021 ? reverse_condition (code)
29022 : reverse_condition_maybe_unordered (code));
29023 }
29024
29025 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29026 to OPERANDS[0]. */
29027
29028 const char *
29029 output_387_reg_move (rtx insn, rtx *operands)
29030 {
29031 if (REG_P (operands[0]))
29032 {
29033 if (REG_P (operands[1])
29034 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29035 {
29036 if (REGNO (operands[0]) == FIRST_STACK_REG)
29037 return output_387_ffreep (operands, 0);
29038 return "fstp\t%y0";
29039 }
29040 if (STACK_TOP_P (operands[0]))
29041 return "fld%Z1\t%y1";
29042 return "fst\t%y0";
29043 }
29044 else if (MEM_P (operands[0]))
29045 {
29046 gcc_assert (REG_P (operands[1]));
29047 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29048 return "fstp%Z0\t%y0";
29049 else
29050 {
29051 /* There is no non-popping store to memory for XFmode.
29052 So if we need one, follow the store with a load. */
29053 if (GET_MODE (operands[0]) == XFmode)
29054 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29055 else
29056 return "fst%Z0\t%y0";
29057 }
29058 }
29059 else
29060 gcc_unreachable();
29061 }
29062
29063 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29064 FP status register is set. */
29065
29066 void
29067 ix86_emit_fp_unordered_jump (rtx label)
29068 {
29069 rtx reg = gen_reg_rtx (HImode);
29070 rtx temp;
29071
29072 emit_insn (gen_x86_fnstsw_1 (reg));
29073
29074 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29075 {
29076 emit_insn (gen_x86_sahf_1 (reg));
29077
29078 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29079 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29080 }
29081 else
29082 {
29083 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29084
29085 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29086 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29087 }
29088
29089 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29090 gen_rtx_LABEL_REF (VOIDmode, label),
29091 pc_rtx);
29092 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29093
29094 emit_jump_insn (temp);
29095 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29096 }
29097
29098 /* Output code to perform a log1p XFmode calculation. */
29099
29100 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29101 {
29102 rtx label1 = gen_label_rtx ();
29103 rtx label2 = gen_label_rtx ();
29104
29105 rtx tmp = gen_reg_rtx (XFmode);
29106 rtx tmp2 = gen_reg_rtx (XFmode);
29107 rtx test;
29108
29109 emit_insn (gen_absxf2 (tmp, op1));
29110 test = gen_rtx_GE (VOIDmode, tmp,
29111 CONST_DOUBLE_FROM_REAL_VALUE (
29112 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29113 XFmode));
29114 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29115
29116 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29117 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29118 emit_jump (label2);
29119
29120 emit_label (label1);
29121 emit_move_insn (tmp, CONST1_RTX (XFmode));
29122 emit_insn (gen_addxf3 (tmp, op1, tmp));
29123 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29124 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29125
29126 emit_label (label2);
29127 }
29128
29129 /* Output code to perform a Newton-Rhapson approximation of a single precision
29130 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29131
29132 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29133 {
29134 rtx x0, x1, e0, e1, two;
29135
29136 x0 = gen_reg_rtx (mode);
29137 e0 = gen_reg_rtx (mode);
29138 e1 = gen_reg_rtx (mode);
29139 x1 = gen_reg_rtx (mode);
29140
29141 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29142
29143 if (VECTOR_MODE_P (mode))
29144 two = ix86_build_const_vector (SFmode, true, two);
29145
29146 two = force_reg (mode, two);
29147
29148 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29149
29150 /* x0 = rcp(b) estimate */
29151 emit_insn (gen_rtx_SET (VOIDmode, x0,
29152 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29153 UNSPEC_RCP)));
29154 /* e0 = x0 * a */
29155 emit_insn (gen_rtx_SET (VOIDmode, e0,
29156 gen_rtx_MULT (mode, x0, a)));
29157 /* e1 = x0 * b */
29158 emit_insn (gen_rtx_SET (VOIDmode, e1,
29159 gen_rtx_MULT (mode, x0, b)));
29160 /* x1 = 2. - e1 */
29161 emit_insn (gen_rtx_SET (VOIDmode, x1,
29162 gen_rtx_MINUS (mode, two, e1)));
29163 /* res = e0 * x1 */
29164 emit_insn (gen_rtx_SET (VOIDmode, res,
29165 gen_rtx_MULT (mode, e0, x1)));
29166 }
29167
29168 /* Output code to perform a Newton-Rhapson approximation of a
29169 single precision floating point [reciprocal] square root. */
29170
29171 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29172 bool recip)
29173 {
29174 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29175 REAL_VALUE_TYPE r;
29176
29177 x0 = gen_reg_rtx (mode);
29178 e0 = gen_reg_rtx (mode);
29179 e1 = gen_reg_rtx (mode);
29180 e2 = gen_reg_rtx (mode);
29181 e3 = gen_reg_rtx (mode);
29182
29183 real_from_integer (&r, VOIDmode, -3, -1, 0);
29184 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29185
29186 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29187 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29188
29189 if (VECTOR_MODE_P (mode))
29190 {
29191 mthree = ix86_build_const_vector (SFmode, true, mthree);
29192 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29193 }
29194
29195 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29196 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29197
29198 /* x0 = rsqrt(a) estimate */
29199 emit_insn (gen_rtx_SET (VOIDmode, x0,
29200 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29201 UNSPEC_RSQRT)));
29202
29203 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29204 if (!recip)
29205 {
29206 rtx zero, mask;
29207
29208 zero = gen_reg_rtx (mode);
29209 mask = gen_reg_rtx (mode);
29210
29211 zero = force_reg (mode, CONST0_RTX(mode));
29212 emit_insn (gen_rtx_SET (VOIDmode, mask,
29213 gen_rtx_NE (mode, zero, a)));
29214
29215 emit_insn (gen_rtx_SET (VOIDmode, x0,
29216 gen_rtx_AND (mode, x0, mask)));
29217 }
29218
29219 /* e0 = x0 * a */
29220 emit_insn (gen_rtx_SET (VOIDmode, e0,
29221 gen_rtx_MULT (mode, x0, a)));
29222 /* e1 = e0 * x0 */
29223 emit_insn (gen_rtx_SET (VOIDmode, e1,
29224 gen_rtx_MULT (mode, e0, x0)));
29225
29226 /* e2 = e1 - 3. */
29227 mthree = force_reg (mode, mthree);
29228 emit_insn (gen_rtx_SET (VOIDmode, e2,
29229 gen_rtx_PLUS (mode, e1, mthree)));
29230
29231 mhalf = force_reg (mode, mhalf);
29232 if (recip)
29233 /* e3 = -.5 * x0 */
29234 emit_insn (gen_rtx_SET (VOIDmode, e3,
29235 gen_rtx_MULT (mode, x0, mhalf)));
29236 else
29237 /* e3 = -.5 * e0 */
29238 emit_insn (gen_rtx_SET (VOIDmode, e3,
29239 gen_rtx_MULT (mode, e0, mhalf)));
29240 /* ret = e2 * e3 */
29241 emit_insn (gen_rtx_SET (VOIDmode, res,
29242 gen_rtx_MULT (mode, e2, e3)));
29243 }
29244
29245 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29246
29247 static void ATTRIBUTE_UNUSED
29248 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29249 tree decl)
29250 {
29251 /* With Binutils 2.15, the "@unwind" marker must be specified on
29252 every occurrence of the ".eh_frame" section, not just the first
29253 one. */
29254 if (TARGET_64BIT
29255 && strcmp (name, ".eh_frame") == 0)
29256 {
29257 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29258 flags & SECTION_WRITE ? "aw" : "a");
29259 return;
29260 }
29261 default_elf_asm_named_section (name, flags, decl);
29262 }
29263
29264 /* Return the mangling of TYPE if it is an extended fundamental type. */
29265
29266 static const char *
29267 ix86_mangle_type (const_tree type)
29268 {
29269 type = TYPE_MAIN_VARIANT (type);
29270
29271 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29272 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29273 return NULL;
29274
29275 switch (TYPE_MODE (type))
29276 {
29277 case TFmode:
29278 /* __float128 is "g". */
29279 return "g";
29280 case XFmode:
29281 /* "long double" or __float80 is "e". */
29282 return "e";
29283 default:
29284 return NULL;
29285 }
29286 }
29287
29288 /* For 32-bit code we can save PIC register setup by using
29289 __stack_chk_fail_local hidden function instead of calling
29290 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29291 register, so it is better to call __stack_chk_fail directly. */
29292
29293 static tree
29294 ix86_stack_protect_fail (void)
29295 {
29296 return TARGET_64BIT
29297 ? default_external_stack_protect_fail ()
29298 : default_hidden_stack_protect_fail ();
29299 }
29300
29301 /* Select a format to encode pointers in exception handling data. CODE
29302 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29303 true if the symbol may be affected by dynamic relocations.
29304
29305 ??? All x86 object file formats are capable of representing this.
29306 After all, the relocation needed is the same as for the call insn.
29307 Whether or not a particular assembler allows us to enter such, I
29308 guess we'll have to see. */
29309 int
29310 asm_preferred_eh_data_format (int code, int global)
29311 {
29312 if (flag_pic)
29313 {
29314 int type = DW_EH_PE_sdata8;
29315 if (!TARGET_64BIT
29316 || ix86_cmodel == CM_SMALL_PIC
29317 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29318 type = DW_EH_PE_sdata4;
29319 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29320 }
29321 if (ix86_cmodel == CM_SMALL
29322 || (ix86_cmodel == CM_MEDIUM && code))
29323 return DW_EH_PE_udata4;
29324 return DW_EH_PE_absptr;
29325 }
29326 \f
29327 /* Expand copysign from SIGN to the positive value ABS_VALUE
29328 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29329 the sign-bit. */
29330 static void
29331 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29332 {
29333 enum machine_mode mode = GET_MODE (sign);
29334 rtx sgn = gen_reg_rtx (mode);
29335 if (mask == NULL_RTX)
29336 {
29337 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29338 if (!VECTOR_MODE_P (mode))
29339 {
29340 /* We need to generate a scalar mode mask in this case. */
29341 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29342 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29343 mask = gen_reg_rtx (mode);
29344 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29345 }
29346 }
29347 else
29348 mask = gen_rtx_NOT (mode, mask);
29349 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29350 gen_rtx_AND (mode, mask, sign)));
29351 emit_insn (gen_rtx_SET (VOIDmode, result,
29352 gen_rtx_IOR (mode, abs_value, sgn)));
29353 }
29354
29355 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29356 mask for masking out the sign-bit is stored in *SMASK, if that is
29357 non-null. */
29358 static rtx
29359 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29360 {
29361 enum machine_mode mode = GET_MODE (op0);
29362 rtx xa, mask;
29363
29364 xa = gen_reg_rtx (mode);
29365 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29366 if (!VECTOR_MODE_P (mode))
29367 {
29368 /* We need to generate a scalar mode mask in this case. */
29369 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29370 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29371 mask = gen_reg_rtx (mode);
29372 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29373 }
29374 emit_insn (gen_rtx_SET (VOIDmode, xa,
29375 gen_rtx_AND (mode, op0, mask)));
29376
29377 if (smask)
29378 *smask = mask;
29379
29380 return xa;
29381 }
29382
29383 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29384 swapping the operands if SWAP_OPERANDS is true. The expanded
29385 code is a forward jump to a newly created label in case the
29386 comparison is true. The generated label rtx is returned. */
29387 static rtx
29388 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29389 bool swap_operands)
29390 {
29391 rtx label, tmp;
29392
29393 if (swap_operands)
29394 {
29395 tmp = op0;
29396 op0 = op1;
29397 op1 = tmp;
29398 }
29399
29400 label = gen_label_rtx ();
29401 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29402 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29403 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29404 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29405 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29406 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29407 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29408 JUMP_LABEL (tmp) = label;
29409
29410 return label;
29411 }
29412
29413 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29414 using comparison code CODE. Operands are swapped for the comparison if
29415 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29416 static rtx
29417 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29418 bool swap_operands)
29419 {
29420 enum machine_mode mode = GET_MODE (op0);
29421 rtx mask = gen_reg_rtx (mode);
29422
29423 if (swap_operands)
29424 {
29425 rtx tmp = op0;
29426 op0 = op1;
29427 op1 = tmp;
29428 }
29429
29430 if (mode == DFmode)
29431 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29432 gen_rtx_fmt_ee (code, mode, op0, op1)));
29433 else
29434 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29435 gen_rtx_fmt_ee (code, mode, op0, op1)));
29436
29437 return mask;
29438 }
29439
29440 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29441 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29442 static rtx
29443 ix86_gen_TWO52 (enum machine_mode mode)
29444 {
29445 REAL_VALUE_TYPE TWO52r;
29446 rtx TWO52;
29447
29448 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29449 TWO52 = const_double_from_real_value (TWO52r, mode);
29450 TWO52 = force_reg (mode, TWO52);
29451
29452 return TWO52;
29453 }
29454
29455 /* Expand SSE sequence for computing lround from OP1 storing
29456 into OP0. */
29457 void
29458 ix86_expand_lround (rtx op0, rtx op1)
29459 {
29460 /* C code for the stuff we're doing below:
29461 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29462 return (long)tmp;
29463 */
29464 enum machine_mode mode = GET_MODE (op1);
29465 const struct real_format *fmt;
29466 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29467 rtx adj;
29468
29469 /* load nextafter (0.5, 0.0) */
29470 fmt = REAL_MODE_FORMAT (mode);
29471 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29472 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29473
29474 /* adj = copysign (0.5, op1) */
29475 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29476 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29477
29478 /* adj = op1 + adj */
29479 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29480
29481 /* op0 = (imode)adj */
29482 expand_fix (op0, adj, 0);
29483 }
29484
29485 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29486 into OPERAND0. */
29487 void
29488 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29489 {
29490 /* C code for the stuff we're doing below (for do_floor):
29491 xi = (long)op1;
29492 xi -= (double)xi > op1 ? 1 : 0;
29493 return xi;
29494 */
29495 enum machine_mode fmode = GET_MODE (op1);
29496 enum machine_mode imode = GET_MODE (op0);
29497 rtx ireg, freg, label, tmp;
29498
29499 /* reg = (long)op1 */
29500 ireg = gen_reg_rtx (imode);
29501 expand_fix (ireg, op1, 0);
29502
29503 /* freg = (double)reg */
29504 freg = gen_reg_rtx (fmode);
29505 expand_float (freg, ireg, 0);
29506
29507 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29508 label = ix86_expand_sse_compare_and_jump (UNLE,
29509 freg, op1, !do_floor);
29510 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29511 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29512 emit_move_insn (ireg, tmp);
29513
29514 emit_label (label);
29515 LABEL_NUSES (label) = 1;
29516
29517 emit_move_insn (op0, ireg);
29518 }
29519
29520 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29521 result in OPERAND0. */
29522 void
29523 ix86_expand_rint (rtx operand0, rtx operand1)
29524 {
29525 /* C code for the stuff we're doing below:
29526 xa = fabs (operand1);
29527 if (!isless (xa, 2**52))
29528 return operand1;
29529 xa = xa + 2**52 - 2**52;
29530 return copysign (xa, operand1);
29531 */
29532 enum machine_mode mode = GET_MODE (operand0);
29533 rtx res, xa, label, TWO52, mask;
29534
29535 res = gen_reg_rtx (mode);
29536 emit_move_insn (res, operand1);
29537
29538 /* xa = abs (operand1) */
29539 xa = ix86_expand_sse_fabs (res, &mask);
29540
29541 /* if (!isless (xa, TWO52)) goto label; */
29542 TWO52 = ix86_gen_TWO52 (mode);
29543 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29544
29545 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29546 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29547
29548 ix86_sse_copysign_to_positive (res, xa, res, mask);
29549
29550 emit_label (label);
29551 LABEL_NUSES (label) = 1;
29552
29553 emit_move_insn (operand0, res);
29554 }
29555
29556 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29557 into OPERAND0. */
29558 void
29559 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29560 {
29561 /* C code for the stuff we expand below.
29562 double xa = fabs (x), x2;
29563 if (!isless (xa, TWO52))
29564 return x;
29565 xa = xa + TWO52 - TWO52;
29566 x2 = copysign (xa, x);
29567 Compensate. Floor:
29568 if (x2 > x)
29569 x2 -= 1;
29570 Compensate. Ceil:
29571 if (x2 < x)
29572 x2 -= -1;
29573 return x2;
29574 */
29575 enum machine_mode mode = GET_MODE (operand0);
29576 rtx xa, TWO52, tmp, label, one, res, mask;
29577
29578 TWO52 = ix86_gen_TWO52 (mode);
29579
29580 /* Temporary for holding the result, initialized to the input
29581 operand to ease control flow. */
29582 res = gen_reg_rtx (mode);
29583 emit_move_insn (res, operand1);
29584
29585 /* xa = abs (operand1) */
29586 xa = ix86_expand_sse_fabs (res, &mask);
29587
29588 /* if (!isless (xa, TWO52)) goto label; */
29589 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29590
29591 /* xa = xa + TWO52 - TWO52; */
29592 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29593 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29594
29595 /* xa = copysign (xa, operand1) */
29596 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29597
29598 /* generate 1.0 or -1.0 */
29599 one = force_reg (mode,
29600 const_double_from_real_value (do_floor
29601 ? dconst1 : dconstm1, mode));
29602
29603 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29604 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29605 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29606 gen_rtx_AND (mode, one, tmp)));
29607 /* We always need to subtract here to preserve signed zero. */
29608 tmp = expand_simple_binop (mode, MINUS,
29609 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29610 emit_move_insn (res, tmp);
29611
29612 emit_label (label);
29613 LABEL_NUSES (label) = 1;
29614
29615 emit_move_insn (operand0, res);
29616 }
29617
29618 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29619 into OPERAND0. */
29620 void
29621 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29622 {
29623 /* C code for the stuff we expand below.
29624 double xa = fabs (x), x2;
29625 if (!isless (xa, TWO52))
29626 return x;
29627 x2 = (double)(long)x;
29628 Compensate. Floor:
29629 if (x2 > x)
29630 x2 -= 1;
29631 Compensate. Ceil:
29632 if (x2 < x)
29633 x2 += 1;
29634 if (HONOR_SIGNED_ZEROS (mode))
29635 return copysign (x2, x);
29636 return x2;
29637 */
29638 enum machine_mode mode = GET_MODE (operand0);
29639 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29640
29641 TWO52 = ix86_gen_TWO52 (mode);
29642
29643 /* Temporary for holding the result, initialized to the input
29644 operand to ease control flow. */
29645 res = gen_reg_rtx (mode);
29646 emit_move_insn (res, operand1);
29647
29648 /* xa = abs (operand1) */
29649 xa = ix86_expand_sse_fabs (res, &mask);
29650
29651 /* if (!isless (xa, TWO52)) goto label; */
29652 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29653
29654 /* xa = (double)(long)x */
29655 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29656 expand_fix (xi, res, 0);
29657 expand_float (xa, xi, 0);
29658
29659 /* generate 1.0 */
29660 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29661
29662 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29663 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29664 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29665 gen_rtx_AND (mode, one, tmp)));
29666 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29667 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29668 emit_move_insn (res, tmp);
29669
29670 if (HONOR_SIGNED_ZEROS (mode))
29671 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29672
29673 emit_label (label);
29674 LABEL_NUSES (label) = 1;
29675
29676 emit_move_insn (operand0, res);
29677 }
29678
29679 /* Expand SSE sequence for computing round from OPERAND1 storing
29680 into OPERAND0. Sequence that works without relying on DImode truncation
29681 via cvttsd2siq that is only available on 64bit targets. */
29682 void
29683 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29684 {
29685 /* C code for the stuff we expand below.
29686 double xa = fabs (x), xa2, x2;
29687 if (!isless (xa, TWO52))
29688 return x;
29689 Using the absolute value and copying back sign makes
29690 -0.0 -> -0.0 correct.
29691 xa2 = xa + TWO52 - TWO52;
29692 Compensate.
29693 dxa = xa2 - xa;
29694 if (dxa <= -0.5)
29695 xa2 += 1;
29696 else if (dxa > 0.5)
29697 xa2 -= 1;
29698 x2 = copysign (xa2, x);
29699 return x2;
29700 */
29701 enum machine_mode mode = GET_MODE (operand0);
29702 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29703
29704 TWO52 = ix86_gen_TWO52 (mode);
29705
29706 /* Temporary for holding the result, initialized to the input
29707 operand to ease control flow. */
29708 res = gen_reg_rtx (mode);
29709 emit_move_insn (res, operand1);
29710
29711 /* xa = abs (operand1) */
29712 xa = ix86_expand_sse_fabs (res, &mask);
29713
29714 /* if (!isless (xa, TWO52)) goto label; */
29715 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29716
29717 /* xa2 = xa + TWO52 - TWO52; */
29718 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29719 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29720
29721 /* dxa = xa2 - xa; */
29722 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29723
29724 /* generate 0.5, 1.0 and -0.5 */
29725 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29726 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29727 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29728 0, OPTAB_DIRECT);
29729
29730 /* Compensate. */
29731 tmp = gen_reg_rtx (mode);
29732 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29733 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29734 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29735 gen_rtx_AND (mode, one, tmp)));
29736 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29737 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29738 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29739 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29740 gen_rtx_AND (mode, one, tmp)));
29741 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29742
29743 /* res = copysign (xa2, operand1) */
29744 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29745
29746 emit_label (label);
29747 LABEL_NUSES (label) = 1;
29748
29749 emit_move_insn (operand0, res);
29750 }
29751
29752 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29753 into OPERAND0. */
29754 void
29755 ix86_expand_trunc (rtx operand0, rtx operand1)
29756 {
29757 /* C code for SSE variant we expand below.
29758 double xa = fabs (x), x2;
29759 if (!isless (xa, TWO52))
29760 return x;
29761 x2 = (double)(long)x;
29762 if (HONOR_SIGNED_ZEROS (mode))
29763 return copysign (x2, x);
29764 return x2;
29765 */
29766 enum machine_mode mode = GET_MODE (operand0);
29767 rtx xa, xi, TWO52, label, res, mask;
29768
29769 TWO52 = ix86_gen_TWO52 (mode);
29770
29771 /* Temporary for holding the result, initialized to the input
29772 operand to ease control flow. */
29773 res = gen_reg_rtx (mode);
29774 emit_move_insn (res, operand1);
29775
29776 /* xa = abs (operand1) */
29777 xa = ix86_expand_sse_fabs (res, &mask);
29778
29779 /* if (!isless (xa, TWO52)) goto label; */
29780 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29781
29782 /* x = (double)(long)x */
29783 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29784 expand_fix (xi, res, 0);
29785 expand_float (res, xi, 0);
29786
29787 if (HONOR_SIGNED_ZEROS (mode))
29788 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29789
29790 emit_label (label);
29791 LABEL_NUSES (label) = 1;
29792
29793 emit_move_insn (operand0, res);
29794 }
29795
29796 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29797 into OPERAND0. */
29798 void
29799 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29800 {
29801 enum machine_mode mode = GET_MODE (operand0);
29802 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29803
29804 /* C code for SSE variant we expand below.
29805 double xa = fabs (x), x2;
29806 if (!isless (xa, TWO52))
29807 return x;
29808 xa2 = xa + TWO52 - TWO52;
29809 Compensate:
29810 if (xa2 > xa)
29811 xa2 -= 1.0;
29812 x2 = copysign (xa2, x);
29813 return x2;
29814 */
29815
29816 TWO52 = ix86_gen_TWO52 (mode);
29817
29818 /* Temporary for holding the result, initialized to the input
29819 operand to ease control flow. */
29820 res = gen_reg_rtx (mode);
29821 emit_move_insn (res, operand1);
29822
29823 /* xa = abs (operand1) */
29824 xa = ix86_expand_sse_fabs (res, &smask);
29825
29826 /* if (!isless (xa, TWO52)) goto label; */
29827 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29828
29829 /* res = xa + TWO52 - TWO52; */
29830 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29831 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29832 emit_move_insn (res, tmp);
29833
29834 /* generate 1.0 */
29835 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29836
29837 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29838 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29839 emit_insn (gen_rtx_SET (VOIDmode, mask,
29840 gen_rtx_AND (mode, mask, one)));
29841 tmp = expand_simple_binop (mode, MINUS,
29842 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29843 emit_move_insn (res, tmp);
29844
29845 /* res = copysign (res, operand1) */
29846 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29847
29848 emit_label (label);
29849 LABEL_NUSES (label) = 1;
29850
29851 emit_move_insn (operand0, res);
29852 }
29853
29854 /* Expand SSE sequence for computing round from OPERAND1 storing
29855 into OPERAND0. */
29856 void
29857 ix86_expand_round (rtx operand0, rtx operand1)
29858 {
29859 /* C code for the stuff we're doing below:
29860 double xa = fabs (x);
29861 if (!isless (xa, TWO52))
29862 return x;
29863 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29864 return copysign (xa, x);
29865 */
29866 enum machine_mode mode = GET_MODE (operand0);
29867 rtx res, TWO52, xa, label, xi, half, mask;
29868 const struct real_format *fmt;
29869 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29870
29871 /* Temporary for holding the result, initialized to the input
29872 operand to ease control flow. */
29873 res = gen_reg_rtx (mode);
29874 emit_move_insn (res, operand1);
29875
29876 TWO52 = ix86_gen_TWO52 (mode);
29877 xa = ix86_expand_sse_fabs (res, &mask);
29878 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29879
29880 /* load nextafter (0.5, 0.0) */
29881 fmt = REAL_MODE_FORMAT (mode);
29882 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29883 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29884
29885 /* xa = xa + 0.5 */
29886 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29887 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29888
29889 /* xa = (double)(int64_t)xa */
29890 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29891 expand_fix (xi, xa, 0);
29892 expand_float (xa, xi, 0);
29893
29894 /* res = copysign (xa, operand1) */
29895 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29896
29897 emit_label (label);
29898 LABEL_NUSES (label) = 1;
29899
29900 emit_move_insn (operand0, res);
29901 }
29902 \f
29903
29904 /* Table of valid machine attributes. */
29905 static const struct attribute_spec ix86_attribute_table[] =
29906 {
29907 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29908 /* Stdcall attribute says callee is responsible for popping arguments
29909 if they are not variable. */
29910 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29911 /* Fastcall attribute says callee is responsible for popping arguments
29912 if they are not variable. */
29913 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29914 /* Thiscall attribute says callee is responsible for popping arguments
29915 if they are not variable. */
29916 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29917 /* Cdecl attribute says the callee is a normal C declaration */
29918 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29919 /* Regparm attribute specifies how many integer arguments are to be
29920 passed in registers. */
29921 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29922 /* Sseregparm attribute says we are using x86_64 calling conventions
29923 for FP arguments. */
29924 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29925 /* force_align_arg_pointer says this function realigns the stack at entry. */
29926 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29927 false, true, true, ix86_handle_cconv_attribute },
29928 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29929 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29930 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29931 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29932 #endif
29933 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29934 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29935 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29936 SUBTARGET_ATTRIBUTE_TABLE,
29937 #endif
29938 /* ms_abi and sysv_abi calling convention function attributes. */
29939 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29940 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29941 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
29942 /* End element. */
29943 { NULL, 0, 0, false, false, false, NULL }
29944 };
29945
29946 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29947 static int
29948 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
29949 tree vectype ATTRIBUTE_UNUSED,
29950 int misalign ATTRIBUTE_UNUSED)
29951 {
29952 switch (type_of_cost)
29953 {
29954 case scalar_stmt:
29955 return ix86_cost->scalar_stmt_cost;
29956
29957 case scalar_load:
29958 return ix86_cost->scalar_load_cost;
29959
29960 case scalar_store:
29961 return ix86_cost->scalar_store_cost;
29962
29963 case vector_stmt:
29964 return ix86_cost->vec_stmt_cost;
29965
29966 case vector_load:
29967 return ix86_cost->vec_align_load_cost;
29968
29969 case vector_store:
29970 return ix86_cost->vec_store_cost;
29971
29972 case vec_to_scalar:
29973 return ix86_cost->vec_to_scalar_cost;
29974
29975 case scalar_to_vec:
29976 return ix86_cost->scalar_to_vec_cost;
29977
29978 case unaligned_load:
29979 case unaligned_store:
29980 return ix86_cost->vec_unalign_load_cost;
29981
29982 case cond_branch_taken:
29983 return ix86_cost->cond_taken_branch_cost;
29984
29985 case cond_branch_not_taken:
29986 return ix86_cost->cond_not_taken_branch_cost;
29987
29988 case vec_perm:
29989 return 1;
29990
29991 default:
29992 gcc_unreachable ();
29993 }
29994 }
29995
29996
29997 /* Implement targetm.vectorize.builtin_vec_perm. */
29998
29999 static tree
30000 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30001 {
30002 tree itype = TREE_TYPE (vec_type);
30003 bool u = TYPE_UNSIGNED (itype);
30004 enum machine_mode vmode = TYPE_MODE (vec_type);
30005 enum ix86_builtins fcode = fcode; /* Silence bogus warning. */
30006 bool ok = TARGET_SSE2;
30007
30008 switch (vmode)
30009 {
30010 case V4DFmode:
30011 ok = TARGET_AVX;
30012 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30013 goto get_di;
30014 case V2DFmode:
30015 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30016 get_di:
30017 itype = ix86_get_builtin_type (IX86_BT_DI);
30018 break;
30019
30020 case V8SFmode:
30021 ok = TARGET_AVX;
30022 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30023 goto get_si;
30024 case V4SFmode:
30025 ok = TARGET_SSE;
30026 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30027 get_si:
30028 itype = ix86_get_builtin_type (IX86_BT_SI);
30029 break;
30030
30031 case V2DImode:
30032 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30033 break;
30034 case V4SImode:
30035 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30036 break;
30037 case V8HImode:
30038 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
30039 break;
30040 case V16QImode:
30041 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
30042 break;
30043 default:
30044 ok = false;
30045 break;
30046 }
30047
30048 if (!ok)
30049 return NULL_TREE;
30050
30051 *mask_type = itype;
30052 return ix86_builtins[(int) fcode];
30053 }
30054
30055 /* Return a vector mode with twice as many elements as VMODE. */
30056 /* ??? Consider moving this to a table generated by genmodes.c. */
30057
30058 static enum machine_mode
30059 doublesize_vector_mode (enum machine_mode vmode)
30060 {
30061 switch (vmode)
30062 {
30063 case V2SFmode: return V4SFmode;
30064 case V1DImode: return V2DImode;
30065 case V2SImode: return V4SImode;
30066 case V4HImode: return V8HImode;
30067 case V8QImode: return V16QImode;
30068
30069 case V2DFmode: return V4DFmode;
30070 case V4SFmode: return V8SFmode;
30071 case V2DImode: return V4DImode;
30072 case V4SImode: return V8SImode;
30073 case V8HImode: return V16HImode;
30074 case V16QImode: return V32QImode;
30075
30076 case V4DFmode: return V8DFmode;
30077 case V8SFmode: return V16SFmode;
30078 case V4DImode: return V8DImode;
30079 case V8SImode: return V16SImode;
30080 case V16HImode: return V32HImode;
30081 case V32QImode: return V64QImode;
30082
30083 default:
30084 gcc_unreachable ();
30085 }
30086 }
30087
30088 /* Construct (set target (vec_select op0 (parallel perm))) and
30089 return true if that's a valid instruction in the active ISA. */
30090
30091 static bool
30092 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
30093 {
30094 rtx rperm[MAX_VECT_LEN], x;
30095 unsigned i;
30096
30097 for (i = 0; i < nelt; ++i)
30098 rperm[i] = GEN_INT (perm[i]);
30099
30100 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
30101 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
30102 x = gen_rtx_SET (VOIDmode, target, x);
30103
30104 x = emit_insn (x);
30105 if (recog_memoized (x) < 0)
30106 {
30107 remove_insn (x);
30108 return false;
30109 }
30110 return true;
30111 }
30112
30113 /* Similar, but generate a vec_concat from op0 and op1 as well. */
30114
30115 static bool
30116 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
30117 const unsigned char *perm, unsigned nelt)
30118 {
30119 enum machine_mode v2mode;
30120 rtx x;
30121
30122 v2mode = doublesize_vector_mode (GET_MODE (op0));
30123 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
30124 return expand_vselect (target, x, perm, nelt);
30125 }
30126
30127 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30128 in terms of blendp[sd] / pblendw / pblendvb. */
30129
30130 static bool
30131 expand_vec_perm_blend (struct expand_vec_perm_d *d)
30132 {
30133 enum machine_mode vmode = d->vmode;
30134 unsigned i, mask, nelt = d->nelt;
30135 rtx target, op0, op1, x;
30136
30137 if (!TARGET_SSE4_1 || d->op0 == d->op1)
30138 return false;
30139 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
30140 return false;
30141
30142 /* This is a blend, not a permute. Elements must stay in their
30143 respective lanes. */
30144 for (i = 0; i < nelt; ++i)
30145 {
30146 unsigned e = d->perm[i];
30147 if (!(e == i || e == i + nelt))
30148 return false;
30149 }
30150
30151 if (d->testing_p)
30152 return true;
30153
30154 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
30155 decision should be extracted elsewhere, so that we only try that
30156 sequence once all budget==3 options have been tried. */
30157
30158 /* For bytes, see if bytes move in pairs so we can use pblendw with
30159 an immediate argument, rather than pblendvb with a vector argument. */
30160 if (vmode == V16QImode)
30161 {
30162 bool pblendw_ok = true;
30163 for (i = 0; i < 16 && pblendw_ok; i += 2)
30164 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
30165
30166 if (!pblendw_ok)
30167 {
30168 rtx rperm[16], vperm;
30169
30170 for (i = 0; i < nelt; ++i)
30171 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
30172
30173 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30174 vperm = force_reg (V16QImode, vperm);
30175
30176 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
30177 return true;
30178 }
30179 }
30180
30181 target = d->target;
30182 op0 = d->op0;
30183 op1 = d->op1;
30184 mask = 0;
30185
30186 switch (vmode)
30187 {
30188 case V4DFmode:
30189 case V8SFmode:
30190 case V2DFmode:
30191 case V4SFmode:
30192 case V8HImode:
30193 for (i = 0; i < nelt; ++i)
30194 mask |= (d->perm[i] >= nelt) << i;
30195 break;
30196
30197 case V2DImode:
30198 for (i = 0; i < 2; ++i)
30199 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
30200 goto do_subreg;
30201
30202 case V4SImode:
30203 for (i = 0; i < 4; ++i)
30204 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
30205 goto do_subreg;
30206
30207 case V16QImode:
30208 for (i = 0; i < 8; ++i)
30209 mask |= (d->perm[i * 2] >= 16) << i;
30210
30211 do_subreg:
30212 vmode = V8HImode;
30213 target = gen_lowpart (vmode, target);
30214 op0 = gen_lowpart (vmode, op0);
30215 op1 = gen_lowpart (vmode, op1);
30216 break;
30217
30218 default:
30219 gcc_unreachable ();
30220 }
30221
30222 /* This matches five different patterns with the different modes. */
30223 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
30224 x = gen_rtx_SET (VOIDmode, target, x);
30225 emit_insn (x);
30226
30227 return true;
30228 }
30229
30230 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30231 in terms of the variable form of vpermilps.
30232
30233 Note that we will have already failed the immediate input vpermilps,
30234 which requires that the high and low part shuffle be identical; the
30235 variable form doesn't require that. */
30236
30237 static bool
30238 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
30239 {
30240 rtx rperm[8], vperm;
30241 unsigned i;
30242
30243 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
30244 return false;
30245
30246 /* We can only permute within the 128-bit lane. */
30247 for (i = 0; i < 8; ++i)
30248 {
30249 unsigned e = d->perm[i];
30250 if (i < 4 ? e >= 4 : e < 4)
30251 return false;
30252 }
30253
30254 if (d->testing_p)
30255 return true;
30256
30257 for (i = 0; i < 8; ++i)
30258 {
30259 unsigned e = d->perm[i];
30260
30261 /* Within each 128-bit lane, the elements of op0 are numbered
30262 from 0 and the elements of op1 are numbered from 4. */
30263 if (e >= 8 + 4)
30264 e -= 8;
30265 else if (e >= 4)
30266 e -= 4;
30267
30268 rperm[i] = GEN_INT (e);
30269 }
30270
30271 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
30272 vperm = force_reg (V8SImode, vperm);
30273 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
30274
30275 return true;
30276 }
30277
30278 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30279 in terms of pshufb or vpperm. */
30280
30281 static bool
30282 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
30283 {
30284 unsigned i, nelt, eltsz;
30285 rtx rperm[16], vperm, target, op0, op1;
30286
30287 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
30288 return false;
30289 if (GET_MODE_SIZE (d->vmode) != 16)
30290 return false;
30291
30292 if (d->testing_p)
30293 return true;
30294
30295 nelt = d->nelt;
30296 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30297
30298 for (i = 0; i < nelt; ++i)
30299 {
30300 unsigned j, e = d->perm[i];
30301 for (j = 0; j < eltsz; ++j)
30302 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
30303 }
30304
30305 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30306 vperm = force_reg (V16QImode, vperm);
30307
30308 target = gen_lowpart (V16QImode, d->target);
30309 op0 = gen_lowpart (V16QImode, d->op0);
30310 if (d->op0 == d->op1)
30311 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
30312 else
30313 {
30314 op1 = gen_lowpart (V16QImode, d->op1);
30315 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
30316 }
30317
30318 return true;
30319 }
30320
30321 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
30322 in a single instruction. */
30323
30324 static bool
30325 expand_vec_perm_1 (struct expand_vec_perm_d *d)
30326 {
30327 unsigned i, nelt = d->nelt;
30328 unsigned char perm2[MAX_VECT_LEN];
30329
30330 /* Check plain VEC_SELECT first, because AVX has instructions that could
30331 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
30332 input where SEL+CONCAT may not. */
30333 if (d->op0 == d->op1)
30334 {
30335 int mask = nelt - 1;
30336
30337 for (i = 0; i < nelt; i++)
30338 perm2[i] = d->perm[i] & mask;
30339
30340 if (expand_vselect (d->target, d->op0, perm2, nelt))
30341 return true;
30342
30343 /* There are plenty of patterns in sse.md that are written for
30344 SEL+CONCAT and are not replicated for a single op. Perhaps
30345 that should be changed, to avoid the nastiness here. */
30346
30347 /* Recognize interleave style patterns, which means incrementing
30348 every other permutation operand. */
30349 for (i = 0; i < nelt; i += 2)
30350 {
30351 perm2[i] = d->perm[i] & mask;
30352 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
30353 }
30354 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30355 return true;
30356
30357 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
30358 if (nelt >= 4)
30359 {
30360 for (i = 0; i < nelt; i += 4)
30361 {
30362 perm2[i + 0] = d->perm[i + 0] & mask;
30363 perm2[i + 1] = d->perm[i + 1] & mask;
30364 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
30365 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
30366 }
30367
30368 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30369 return true;
30370 }
30371 }
30372
30373 /* Finally, try the fully general two operand permute. */
30374 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
30375 return true;
30376
30377 /* Recognize interleave style patterns with reversed operands. */
30378 if (d->op0 != d->op1)
30379 {
30380 for (i = 0; i < nelt; ++i)
30381 {
30382 unsigned e = d->perm[i];
30383 if (e >= nelt)
30384 e -= nelt;
30385 else
30386 e += nelt;
30387 perm2[i] = e;
30388 }
30389
30390 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
30391 return true;
30392 }
30393
30394 /* Try the SSE4.1 blend variable merge instructions. */
30395 if (expand_vec_perm_blend (d))
30396 return true;
30397
30398 /* Try one of the AVX vpermil variable permutations. */
30399 if (expand_vec_perm_vpermil (d))
30400 return true;
30401
30402 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
30403 if (expand_vec_perm_pshufb (d))
30404 return true;
30405
30406 return false;
30407 }
30408
30409 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30410 in terms of a pair of pshuflw + pshufhw instructions. */
30411
30412 static bool
30413 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
30414 {
30415 unsigned char perm2[MAX_VECT_LEN];
30416 unsigned i;
30417 bool ok;
30418
30419 if (d->vmode != V8HImode || d->op0 != d->op1)
30420 return false;
30421
30422 /* The two permutations only operate in 64-bit lanes. */
30423 for (i = 0; i < 4; ++i)
30424 if (d->perm[i] >= 4)
30425 return false;
30426 for (i = 4; i < 8; ++i)
30427 if (d->perm[i] < 4)
30428 return false;
30429
30430 if (d->testing_p)
30431 return true;
30432
30433 /* Emit the pshuflw. */
30434 memcpy (perm2, d->perm, 4);
30435 for (i = 4; i < 8; ++i)
30436 perm2[i] = i;
30437 ok = expand_vselect (d->target, d->op0, perm2, 8);
30438 gcc_assert (ok);
30439
30440 /* Emit the pshufhw. */
30441 memcpy (perm2 + 4, d->perm + 4, 4);
30442 for (i = 0; i < 4; ++i)
30443 perm2[i] = i;
30444 ok = expand_vselect (d->target, d->target, perm2, 8);
30445 gcc_assert (ok);
30446
30447 return true;
30448 }
30449
30450 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30451 the permutation using the SSSE3 palignr instruction. This succeeds
30452 when all of the elements in PERM fit within one vector and we merely
30453 need to shift them down so that a single vector permutation has a
30454 chance to succeed. */
30455
30456 static bool
30457 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
30458 {
30459 unsigned i, nelt = d->nelt;
30460 unsigned min, max;
30461 bool in_order, ok;
30462 rtx shift;
30463
30464 /* Even with AVX, palignr only operates on 128-bit vectors. */
30465 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30466 return false;
30467
30468 min = nelt, max = 0;
30469 for (i = 0; i < nelt; ++i)
30470 {
30471 unsigned e = d->perm[i];
30472 if (e < min)
30473 min = e;
30474 if (e > max)
30475 max = e;
30476 }
30477 if (min == 0 || max - min >= nelt)
30478 return false;
30479
30480 /* Given that we have SSSE3, we know we'll be able to implement the
30481 single operand permutation after the palignr with pshufb. */
30482 if (d->testing_p)
30483 return true;
30484
30485 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
30486 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
30487 gen_lowpart (TImode, d->op1),
30488 gen_lowpart (TImode, d->op0), shift));
30489
30490 d->op0 = d->op1 = d->target;
30491
30492 in_order = true;
30493 for (i = 0; i < nelt; ++i)
30494 {
30495 unsigned e = d->perm[i] - min;
30496 if (e != i)
30497 in_order = false;
30498 d->perm[i] = e;
30499 }
30500
30501 /* Test for the degenerate case where the alignment by itself
30502 produces the desired permutation. */
30503 if (in_order)
30504 return true;
30505
30506 ok = expand_vec_perm_1 (d);
30507 gcc_assert (ok);
30508
30509 return ok;
30510 }
30511
30512 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30513 a two vector permutation into a single vector permutation by using
30514 an interleave operation to merge the vectors. */
30515
30516 static bool
30517 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
30518 {
30519 struct expand_vec_perm_d dremap, dfinal;
30520 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
30521 unsigned contents, h1, h2, h3, h4;
30522 unsigned char remap[2 * MAX_VECT_LEN];
30523 rtx seq;
30524 bool ok;
30525
30526 if (d->op0 == d->op1)
30527 return false;
30528
30529 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
30530 lanes. We can use similar techniques with the vperm2f128 instruction,
30531 but it requires slightly different logic. */
30532 if (GET_MODE_SIZE (d->vmode) != 16)
30533 return false;
30534
30535 /* Examine from whence the elements come. */
30536 contents = 0;
30537 for (i = 0; i < nelt; ++i)
30538 contents |= 1u << d->perm[i];
30539
30540 /* Split the two input vectors into 4 halves. */
30541 h1 = (1u << nelt2) - 1;
30542 h2 = h1 << nelt2;
30543 h3 = h2 << nelt2;
30544 h4 = h3 << nelt2;
30545
30546 memset (remap, 0xff, sizeof (remap));
30547 dremap = *d;
30548
30549 /* If the elements from the low halves use interleave low, and similarly
30550 for interleave high. If the elements are from mis-matched halves, we
30551 can use shufps for V4SF/V4SI or do a DImode shuffle. */
30552 if ((contents & (h1 | h3)) == contents)
30553 {
30554 for (i = 0; i < nelt2; ++i)
30555 {
30556 remap[i] = i * 2;
30557 remap[i + nelt] = i * 2 + 1;
30558 dremap.perm[i * 2] = i;
30559 dremap.perm[i * 2 + 1] = i + nelt;
30560 }
30561 }
30562 else if ((contents & (h2 | h4)) == contents)
30563 {
30564 for (i = 0; i < nelt2; ++i)
30565 {
30566 remap[i + nelt2] = i * 2;
30567 remap[i + nelt + nelt2] = i * 2 + 1;
30568 dremap.perm[i * 2] = i + nelt2;
30569 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
30570 }
30571 }
30572 else if ((contents & (h1 | h4)) == contents)
30573 {
30574 for (i = 0; i < nelt2; ++i)
30575 {
30576 remap[i] = i;
30577 remap[i + nelt + nelt2] = i + nelt2;
30578 dremap.perm[i] = i;
30579 dremap.perm[i + nelt2] = i + nelt + nelt2;
30580 }
30581 if (nelt != 4)
30582 {
30583 dremap.vmode = V2DImode;
30584 dremap.nelt = 2;
30585 dremap.perm[0] = 0;
30586 dremap.perm[1] = 3;
30587 }
30588 }
30589 else if ((contents & (h2 | h3)) == contents)
30590 {
30591 for (i = 0; i < nelt2; ++i)
30592 {
30593 remap[i + nelt2] = i;
30594 remap[i + nelt] = i + nelt2;
30595 dremap.perm[i] = i + nelt2;
30596 dremap.perm[i + nelt2] = i + nelt;
30597 }
30598 if (nelt != 4)
30599 {
30600 dremap.vmode = V2DImode;
30601 dremap.nelt = 2;
30602 dremap.perm[0] = 1;
30603 dremap.perm[1] = 2;
30604 }
30605 }
30606 else
30607 return false;
30608
30609 /* Use the remapping array set up above to move the elements from their
30610 swizzled locations into their final destinations. */
30611 dfinal = *d;
30612 for (i = 0; i < nelt; ++i)
30613 {
30614 unsigned e = remap[d->perm[i]];
30615 gcc_assert (e < nelt);
30616 dfinal.perm[i] = e;
30617 }
30618 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
30619 dfinal.op1 = dfinal.op0;
30620 dremap.target = dfinal.op0;
30621
30622 /* Test if the final remap can be done with a single insn. For V4SFmode or
30623 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
30624 start_sequence ();
30625 ok = expand_vec_perm_1 (&dfinal);
30626 seq = get_insns ();
30627 end_sequence ();
30628
30629 if (!ok)
30630 return false;
30631
30632 if (dremap.vmode != dfinal.vmode)
30633 {
30634 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
30635 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
30636 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
30637 }
30638
30639 ok = expand_vec_perm_1 (&dremap);
30640 gcc_assert (ok);
30641
30642 emit_insn (seq);
30643 return true;
30644 }
30645
30646 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
30647 permutation with two pshufb insns and an ior. We should have already
30648 failed all two instruction sequences. */
30649
30650 static bool
30651 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
30652 {
30653 rtx rperm[2][16], vperm, l, h, op, m128;
30654 unsigned int i, nelt, eltsz;
30655
30656 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30657 return false;
30658 gcc_assert (d->op0 != d->op1);
30659
30660 nelt = d->nelt;
30661 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30662
30663 /* Generate two permutation masks. If the required element is within
30664 the given vector it is shuffled into the proper lane. If the required
30665 element is in the other vector, force a zero into the lane by setting
30666 bit 7 in the permutation mask. */
30667 m128 = GEN_INT (-128);
30668 for (i = 0; i < nelt; ++i)
30669 {
30670 unsigned j, e = d->perm[i];
30671 unsigned which = (e >= nelt);
30672 if (e >= nelt)
30673 e -= nelt;
30674
30675 for (j = 0; j < eltsz; ++j)
30676 {
30677 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
30678 rperm[1-which][i*eltsz + j] = m128;
30679 }
30680 }
30681
30682 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
30683 vperm = force_reg (V16QImode, vperm);
30684
30685 l = gen_reg_rtx (V16QImode);
30686 op = gen_lowpart (V16QImode, d->op0);
30687 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
30688
30689 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
30690 vperm = force_reg (V16QImode, vperm);
30691
30692 h = gen_reg_rtx (V16QImode);
30693 op = gen_lowpart (V16QImode, d->op1);
30694 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
30695
30696 op = gen_lowpart (V16QImode, d->target);
30697 emit_insn (gen_iorv16qi3 (op, l, h));
30698
30699 return true;
30700 }
30701
30702 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
30703 and extract-odd permutations. */
30704
30705 static bool
30706 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
30707 {
30708 rtx t1, t2, t3, t4;
30709
30710 switch (d->vmode)
30711 {
30712 case V4DFmode:
30713 t1 = gen_reg_rtx (V4DFmode);
30714 t2 = gen_reg_rtx (V4DFmode);
30715
30716 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
30717 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
30718 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
30719
30720 /* Now an unpck[lh]pd will produce the result required. */
30721 if (odd)
30722 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
30723 else
30724 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
30725 emit_insn (t3);
30726 break;
30727
30728 case V8SFmode:
30729 {
30730 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
30731 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
30732 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
30733
30734 t1 = gen_reg_rtx (V8SFmode);
30735 t2 = gen_reg_rtx (V8SFmode);
30736 t3 = gen_reg_rtx (V8SFmode);
30737 t4 = gen_reg_rtx (V8SFmode);
30738
30739 /* Shuffle within the 128-bit lanes to produce:
30740 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
30741 expand_vselect (t1, d->op0, perm1, 8);
30742 expand_vselect (t2, d->op1, perm1, 8);
30743
30744 /* Shuffle the lanes around to produce:
30745 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
30746 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
30747 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
30748
30749 /* Now a vpermil2p will produce the result required. */
30750 /* ??? The vpermil2p requires a vector constant. Another option
30751 is a unpck[lh]ps to merge the two vectors to produce
30752 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
30753 vpermilps to get the elements into the final order. */
30754 d->op0 = t3;
30755 d->op1 = t4;
30756 memcpy (d->perm, odd ? permo: perme, 8);
30757 expand_vec_perm_vpermil (d);
30758 }
30759 break;
30760
30761 case V2DFmode:
30762 case V4SFmode:
30763 case V2DImode:
30764 case V4SImode:
30765 /* These are always directly implementable by expand_vec_perm_1. */
30766 gcc_unreachable ();
30767
30768 case V8HImode:
30769 if (TARGET_SSSE3)
30770 return expand_vec_perm_pshufb2 (d);
30771 else
30772 {
30773 /* We need 2*log2(N)-1 operations to achieve odd/even
30774 with interleave. */
30775 t1 = gen_reg_rtx (V8HImode);
30776 t2 = gen_reg_rtx (V8HImode);
30777 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
30778 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
30779 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
30780 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
30781 if (odd)
30782 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
30783 else
30784 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
30785 emit_insn (t3);
30786 }
30787 break;
30788
30789 case V16QImode:
30790 if (TARGET_SSSE3)
30791 return expand_vec_perm_pshufb2 (d);
30792 else
30793 {
30794 t1 = gen_reg_rtx (V16QImode);
30795 t2 = gen_reg_rtx (V16QImode);
30796 t3 = gen_reg_rtx (V16QImode);
30797 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
30798 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
30799 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
30800 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
30801 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
30802 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
30803 if (odd)
30804 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
30805 else
30806 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
30807 emit_insn (t3);
30808 }
30809 break;
30810
30811 default:
30812 gcc_unreachable ();
30813 }
30814
30815 return true;
30816 }
30817
30818 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
30819 extract-even and extract-odd permutations. */
30820
30821 static bool
30822 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
30823 {
30824 unsigned i, odd, nelt = d->nelt;
30825
30826 odd = d->perm[0];
30827 if (odd != 0 && odd != 1)
30828 return false;
30829
30830 for (i = 1; i < nelt; ++i)
30831 if (d->perm[i] != 2 * i + odd)
30832 return false;
30833
30834 return expand_vec_perm_even_odd_1 (d, odd);
30835 }
30836
30837 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
30838 permutations. We assume that expand_vec_perm_1 has already failed. */
30839
30840 static bool
30841 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
30842 {
30843 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
30844 enum machine_mode vmode = d->vmode;
30845 unsigned char perm2[4];
30846 rtx op0 = d->op0;
30847 bool ok;
30848
30849 switch (vmode)
30850 {
30851 case V4DFmode:
30852 case V8SFmode:
30853 /* These are special-cased in sse.md so that we can optionally
30854 use the vbroadcast instruction. They expand to two insns
30855 if the input happens to be in a register. */
30856 gcc_unreachable ();
30857
30858 case V2DFmode:
30859 case V2DImode:
30860 case V4SFmode:
30861 case V4SImode:
30862 /* These are always implementable using standard shuffle patterns. */
30863 gcc_unreachable ();
30864
30865 case V8HImode:
30866 case V16QImode:
30867 /* These can be implemented via interleave. We save one insn by
30868 stopping once we have promoted to V4SImode and then use pshufd. */
30869 do
30870 {
30871 optab otab = vec_interleave_low_optab;
30872
30873 if (elt >= nelt2)
30874 {
30875 otab = vec_interleave_high_optab;
30876 elt -= nelt2;
30877 }
30878 nelt2 /= 2;
30879
30880 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
30881 vmode = get_mode_wider_vector (vmode);
30882 op0 = gen_lowpart (vmode, op0);
30883 }
30884 while (vmode != V4SImode);
30885
30886 memset (perm2, elt, 4);
30887 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
30888 gcc_assert (ok);
30889 return true;
30890
30891 default:
30892 gcc_unreachable ();
30893 }
30894 }
30895
30896 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
30897 broadcast permutations. */
30898
30899 static bool
30900 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
30901 {
30902 unsigned i, elt, nelt = d->nelt;
30903
30904 if (d->op0 != d->op1)
30905 return false;
30906
30907 elt = d->perm[0];
30908 for (i = 1; i < nelt; ++i)
30909 if (d->perm[i] != elt)
30910 return false;
30911
30912 return expand_vec_perm_broadcast_1 (d);
30913 }
30914
30915 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
30916 With all of the interface bits taken care of, perform the expansion
30917 in D and return true on success. */
30918
30919 static bool
30920 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
30921 {
30922 /* Try a single instruction expansion. */
30923 if (expand_vec_perm_1 (d))
30924 return true;
30925
30926 /* Try sequences of two instructions. */
30927
30928 if (expand_vec_perm_pshuflw_pshufhw (d))
30929 return true;
30930
30931 if (expand_vec_perm_palignr (d))
30932 return true;
30933
30934 if (expand_vec_perm_interleave2 (d))
30935 return true;
30936
30937 if (expand_vec_perm_broadcast (d))
30938 return true;
30939
30940 /* Try sequences of three instructions. */
30941
30942 if (expand_vec_perm_pshufb2 (d))
30943 return true;
30944
30945 /* ??? Look for narrow permutations whose element orderings would
30946 allow the promotion to a wider mode. */
30947
30948 /* ??? Look for sequences of interleave or a wider permute that place
30949 the data into the correct lanes for a half-vector shuffle like
30950 pshuf[lh]w or vpermilps. */
30951
30952 /* ??? Look for sequences of interleave that produce the desired results.
30953 The combinatorics of punpck[lh] get pretty ugly... */
30954
30955 if (expand_vec_perm_even_odd (d))
30956 return true;
30957
30958 return false;
30959 }
30960
30961 /* Extract the values from the vector CST into the permutation array in D.
30962 Return 0 on error, 1 if all values from the permutation come from the
30963 first vector, 2 if all values from the second vector, and 3 otherwise. */
30964
30965 static int
30966 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
30967 {
30968 tree list = TREE_VECTOR_CST_ELTS (cst);
30969 unsigned i, nelt = d->nelt;
30970 int ret = 0;
30971
30972 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
30973 {
30974 unsigned HOST_WIDE_INT e;
30975
30976 if (!host_integerp (TREE_VALUE (list), 1))
30977 return 0;
30978 e = tree_low_cst (TREE_VALUE (list), 1);
30979 if (e >= 2 * nelt)
30980 return 0;
30981
30982 ret |= (e < nelt ? 1 : 2);
30983 d->perm[i] = e;
30984 }
30985 gcc_assert (list == NULL);
30986
30987 /* For all elements from second vector, fold the elements to first. */
30988 if (ret == 2)
30989 for (i = 0; i < nelt; ++i)
30990 d->perm[i] -= nelt;
30991
30992 return ret;
30993 }
30994
30995 static rtx
30996 ix86_expand_vec_perm_builtin (tree exp)
30997 {
30998 struct expand_vec_perm_d d;
30999 tree arg0, arg1, arg2;
31000
31001 arg0 = CALL_EXPR_ARG (exp, 0);
31002 arg1 = CALL_EXPR_ARG (exp, 1);
31003 arg2 = CALL_EXPR_ARG (exp, 2);
31004
31005 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31006 d.nelt = GET_MODE_NUNITS (d.vmode);
31007 d.testing_p = false;
31008 gcc_assert (VECTOR_MODE_P (d.vmode));
31009
31010 if (TREE_CODE (arg2) != VECTOR_CST)
31011 {
31012 error_at (EXPR_LOCATION (exp),
31013 "vector permutation requires vector constant");
31014 goto exit_error;
31015 }
31016
31017 switch (extract_vec_perm_cst (&d, arg2))
31018 {
31019 default:
31020 gcc_unreachable();
31021
31022 case 0:
31023 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31024 goto exit_error;
31025
31026 case 3:
31027 if (!operand_equal_p (arg0, arg1, 0))
31028 {
31029 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31030 d.op0 = force_reg (d.vmode, d.op0);
31031 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31032 d.op1 = force_reg (d.vmode, d.op1);
31033 break;
31034 }
31035
31036 /* The elements of PERM do not suggest that only the first operand
31037 is used, but both operands are identical. Allow easier matching
31038 of the permutation by folding the permutation into the single
31039 input vector. */
31040 {
31041 unsigned i, nelt = d.nelt;
31042 for (i = 0; i < nelt; ++i)
31043 if (d.perm[i] >= nelt)
31044 d.perm[i] -= nelt;
31045 }
31046 /* FALLTHRU */
31047
31048 case 1:
31049 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31050 d.op0 = force_reg (d.vmode, d.op0);
31051 d.op1 = d.op0;
31052 break;
31053
31054 case 2:
31055 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31056 d.op0 = force_reg (d.vmode, d.op0);
31057 d.op1 = d.op0;
31058 break;
31059 }
31060
31061 d.target = gen_reg_rtx (d.vmode);
31062 if (ix86_expand_vec_perm_builtin_1 (&d))
31063 return d.target;
31064
31065 /* For compiler generated permutations, we should never got here, because
31066 the compiler should also be checking the ok hook. But since this is a
31067 builtin the user has access too, so don't abort. */
31068 switch (d.nelt)
31069 {
31070 case 2:
31071 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
31072 break;
31073 case 4:
31074 sorry ("vector permutation (%d %d %d %d)",
31075 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
31076 break;
31077 case 8:
31078 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
31079 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31080 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
31081 break;
31082 case 16:
31083 sorry ("vector permutation "
31084 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
31085 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31086 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
31087 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
31088 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
31089 break;
31090 default:
31091 gcc_unreachable ();
31092 }
31093 exit_error:
31094 return CONST0_RTX (d.vmode);
31095 }
31096
31097 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
31098
31099 static bool
31100 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
31101 {
31102 struct expand_vec_perm_d d;
31103 int vec_mask;
31104 bool ret, one_vec;
31105
31106 d.vmode = TYPE_MODE (vec_type);
31107 d.nelt = GET_MODE_NUNITS (d.vmode);
31108 d.testing_p = true;
31109
31110 /* Given sufficient ISA support we can just return true here
31111 for selected vector modes. */
31112 if (GET_MODE_SIZE (d.vmode) == 16)
31113 {
31114 /* All implementable with a single vpperm insn. */
31115 if (TARGET_XOP)
31116 return true;
31117 /* All implementable with 2 pshufb + 1 ior. */
31118 if (TARGET_SSSE3)
31119 return true;
31120 /* All implementable with shufpd or unpck[lh]pd. */
31121 if (d.nelt == 2)
31122 return true;
31123 }
31124
31125 vec_mask = extract_vec_perm_cst (&d, mask);
31126
31127 /* This hook is cannot be called in response to something that the
31128 user does (unlike the builtin expander) so we shouldn't ever see
31129 an error generated from the extract. */
31130 gcc_assert (vec_mask > 0 && vec_mask <= 3);
31131 one_vec = (vec_mask != 3);
31132
31133 /* Implementable with shufps or pshufd. */
31134 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
31135 return true;
31136
31137 /* Otherwise we have to go through the motions and see if we can
31138 figure out how to generate the requested permutation. */
31139 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31140 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31141 if (!one_vec)
31142 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31143
31144 start_sequence ();
31145 ret = ix86_expand_vec_perm_builtin_1 (&d);
31146 end_sequence ();
31147
31148 return ret;
31149 }
31150
31151 void
31152 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
31153 {
31154 struct expand_vec_perm_d d;
31155 unsigned i, nelt;
31156
31157 d.target = targ;
31158 d.op0 = op0;
31159 d.op1 = op1;
31160 d.vmode = GET_MODE (targ);
31161 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31162 d.testing_p = false;
31163
31164 for (i = 0; i < nelt; ++i)
31165 d.perm[i] = i * 2 + odd;
31166
31167 /* We'll either be able to implement the permutation directly... */
31168 if (expand_vec_perm_1 (&d))
31169 return;
31170
31171 /* ... or we use the special-case patterns. */
31172 expand_vec_perm_even_odd_1 (&d, odd);
31173 }
31174 \f
31175 /* This function returns the calling abi specific va_list type node.
31176 It returns the FNDECL specific va_list type. */
31177
31178 static tree
31179 ix86_fn_abi_va_list (tree fndecl)
31180 {
31181 if (!TARGET_64BIT)
31182 return va_list_type_node;
31183 gcc_assert (fndecl != NULL_TREE);
31184
31185 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
31186 return ms_va_list_type_node;
31187 else
31188 return sysv_va_list_type_node;
31189 }
31190
31191 /* Returns the canonical va_list type specified by TYPE. If there
31192 is no valid TYPE provided, it return NULL_TREE. */
31193
31194 static tree
31195 ix86_canonical_va_list_type (tree type)
31196 {
31197 tree wtype, htype;
31198
31199 /* Resolve references and pointers to va_list type. */
31200 if (TREE_CODE (type) == MEM_REF)
31201 type = TREE_TYPE (type);
31202 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
31203 type = TREE_TYPE (type);
31204 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
31205 type = TREE_TYPE (type);
31206
31207 if (TARGET_64BIT)
31208 {
31209 wtype = va_list_type_node;
31210 gcc_assert (wtype != NULL_TREE);
31211 htype = type;
31212 if (TREE_CODE (wtype) == ARRAY_TYPE)
31213 {
31214 /* If va_list is an array type, the argument may have decayed
31215 to a pointer type, e.g. by being passed to another function.
31216 In that case, unwrap both types so that we can compare the
31217 underlying records. */
31218 if (TREE_CODE (htype) == ARRAY_TYPE
31219 || POINTER_TYPE_P (htype))
31220 {
31221 wtype = TREE_TYPE (wtype);
31222 htype = TREE_TYPE (htype);
31223 }
31224 }
31225 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31226 return va_list_type_node;
31227 wtype = sysv_va_list_type_node;
31228 gcc_assert (wtype != NULL_TREE);
31229 htype = type;
31230 if (TREE_CODE (wtype) == ARRAY_TYPE)
31231 {
31232 /* If va_list is an array type, the argument may have decayed
31233 to a pointer type, e.g. by being passed to another function.
31234 In that case, unwrap both types so that we can compare the
31235 underlying records. */
31236 if (TREE_CODE (htype) == ARRAY_TYPE
31237 || POINTER_TYPE_P (htype))
31238 {
31239 wtype = TREE_TYPE (wtype);
31240 htype = TREE_TYPE (htype);
31241 }
31242 }
31243 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31244 return sysv_va_list_type_node;
31245 wtype = ms_va_list_type_node;
31246 gcc_assert (wtype != NULL_TREE);
31247 htype = type;
31248 if (TREE_CODE (wtype) == ARRAY_TYPE)
31249 {
31250 /* If va_list is an array type, the argument may have decayed
31251 to a pointer type, e.g. by being passed to another function.
31252 In that case, unwrap both types so that we can compare the
31253 underlying records. */
31254 if (TREE_CODE (htype) == ARRAY_TYPE
31255 || POINTER_TYPE_P (htype))
31256 {
31257 wtype = TREE_TYPE (wtype);
31258 htype = TREE_TYPE (htype);
31259 }
31260 }
31261 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31262 return ms_va_list_type_node;
31263 return NULL_TREE;
31264 }
31265 return std_canonical_va_list_type (type);
31266 }
31267
31268 /* Iterate through the target-specific builtin types for va_list.
31269 IDX denotes the iterator, *PTREE is set to the result type of
31270 the va_list builtin, and *PNAME to its internal type.
31271 Returns zero if there is no element for this index, otherwise
31272 IDX should be increased upon the next call.
31273 Note, do not iterate a base builtin's name like __builtin_va_list.
31274 Used from c_common_nodes_and_builtins. */
31275
31276 static int
31277 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
31278 {
31279 if (TARGET_64BIT)
31280 {
31281 switch (idx)
31282 {
31283 default:
31284 break;
31285
31286 case 0:
31287 *ptree = ms_va_list_type_node;
31288 *pname = "__builtin_ms_va_list";
31289 return 1;
31290
31291 case 1:
31292 *ptree = sysv_va_list_type_node;
31293 *pname = "__builtin_sysv_va_list";
31294 return 1;
31295 }
31296 }
31297
31298 return 0;
31299 }
31300
31301 /* Initialize the GCC target structure. */
31302 #undef TARGET_RETURN_IN_MEMORY
31303 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
31304
31305 #undef TARGET_LEGITIMIZE_ADDRESS
31306 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
31307
31308 #undef TARGET_ATTRIBUTE_TABLE
31309 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
31310 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31311 # undef TARGET_MERGE_DECL_ATTRIBUTES
31312 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
31313 #endif
31314
31315 #undef TARGET_COMP_TYPE_ATTRIBUTES
31316 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
31317
31318 #undef TARGET_INIT_BUILTINS
31319 #define TARGET_INIT_BUILTINS ix86_init_builtins
31320 #undef TARGET_BUILTIN_DECL
31321 #define TARGET_BUILTIN_DECL ix86_builtin_decl
31322 #undef TARGET_EXPAND_BUILTIN
31323 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
31324
31325 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
31326 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
31327 ix86_builtin_vectorized_function
31328
31329 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
31330 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
31331
31332 #undef TARGET_BUILTIN_RECIPROCAL
31333 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
31334
31335 #undef TARGET_ASM_FUNCTION_EPILOGUE
31336 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
31337
31338 #undef TARGET_ENCODE_SECTION_INFO
31339 #ifndef SUBTARGET_ENCODE_SECTION_INFO
31340 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
31341 #else
31342 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
31343 #endif
31344
31345 #undef TARGET_ASM_OPEN_PAREN
31346 #define TARGET_ASM_OPEN_PAREN ""
31347 #undef TARGET_ASM_CLOSE_PAREN
31348 #define TARGET_ASM_CLOSE_PAREN ""
31349
31350 #undef TARGET_ASM_BYTE_OP
31351 #define TARGET_ASM_BYTE_OP ASM_BYTE
31352
31353 #undef TARGET_ASM_ALIGNED_HI_OP
31354 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
31355 #undef TARGET_ASM_ALIGNED_SI_OP
31356 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
31357 #ifdef ASM_QUAD
31358 #undef TARGET_ASM_ALIGNED_DI_OP
31359 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
31360 #endif
31361
31362 #undef TARGET_ASM_UNALIGNED_HI_OP
31363 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
31364 #undef TARGET_ASM_UNALIGNED_SI_OP
31365 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
31366 #undef TARGET_ASM_UNALIGNED_DI_OP
31367 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
31368
31369 #undef TARGET_PRINT_OPERAND
31370 #define TARGET_PRINT_OPERAND ix86_print_operand
31371 #undef TARGET_PRINT_OPERAND_ADDRESS
31372 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
31373 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
31374 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
31375
31376 #undef TARGET_SCHED_ADJUST_COST
31377 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
31378 #undef TARGET_SCHED_ISSUE_RATE
31379 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
31380 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
31381 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
31382 ia32_multipass_dfa_lookahead
31383
31384 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
31385 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
31386
31387 #ifdef HAVE_AS_TLS
31388 #undef TARGET_HAVE_TLS
31389 #define TARGET_HAVE_TLS true
31390 #endif
31391 #undef TARGET_CANNOT_FORCE_CONST_MEM
31392 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
31393 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
31394 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
31395
31396 #undef TARGET_DELEGITIMIZE_ADDRESS
31397 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
31398
31399 #undef TARGET_MS_BITFIELD_LAYOUT_P
31400 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
31401
31402 #if TARGET_MACHO
31403 #undef TARGET_BINDS_LOCAL_P
31404 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
31405 #endif
31406 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31407 #undef TARGET_BINDS_LOCAL_P
31408 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
31409 #endif
31410
31411 #undef TARGET_ASM_OUTPUT_MI_THUNK
31412 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
31413 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
31414 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
31415
31416 #undef TARGET_ASM_FILE_START
31417 #define TARGET_ASM_FILE_START x86_file_start
31418
31419 #undef TARGET_DEFAULT_TARGET_FLAGS
31420 #define TARGET_DEFAULT_TARGET_FLAGS \
31421 (TARGET_DEFAULT \
31422 | TARGET_SUBTARGET_DEFAULT \
31423 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
31424 | MASK_FUSED_MADD)
31425
31426 #undef TARGET_HANDLE_OPTION
31427 #define TARGET_HANDLE_OPTION ix86_handle_option
31428
31429 #undef TARGET_REGISTER_MOVE_COST
31430 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
31431 #undef TARGET_MEMORY_MOVE_COST
31432 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
31433 #undef TARGET_RTX_COSTS
31434 #define TARGET_RTX_COSTS ix86_rtx_costs
31435 #undef TARGET_ADDRESS_COST
31436 #define TARGET_ADDRESS_COST ix86_address_cost
31437
31438 #undef TARGET_FIXED_CONDITION_CODE_REGS
31439 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
31440 #undef TARGET_CC_MODES_COMPATIBLE
31441 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
31442
31443 #undef TARGET_MACHINE_DEPENDENT_REORG
31444 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
31445
31446 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
31447 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
31448
31449 #undef TARGET_BUILD_BUILTIN_VA_LIST
31450 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
31451
31452 #undef TARGET_ENUM_VA_LIST_P
31453 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
31454
31455 #undef TARGET_FN_ABI_VA_LIST
31456 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
31457
31458 #undef TARGET_CANONICAL_VA_LIST_TYPE
31459 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
31460
31461 #undef TARGET_EXPAND_BUILTIN_VA_START
31462 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
31463
31464 #undef TARGET_MD_ASM_CLOBBERS
31465 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
31466
31467 #undef TARGET_PROMOTE_PROTOTYPES
31468 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
31469 #undef TARGET_STRUCT_VALUE_RTX
31470 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
31471 #undef TARGET_SETUP_INCOMING_VARARGS
31472 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
31473 #undef TARGET_MUST_PASS_IN_STACK
31474 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
31475 #undef TARGET_FUNCTION_ARG_ADVANCE
31476 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
31477 #undef TARGET_FUNCTION_ARG
31478 #define TARGET_FUNCTION_ARG ix86_function_arg
31479 #undef TARGET_PASS_BY_REFERENCE
31480 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
31481 #undef TARGET_INTERNAL_ARG_POINTER
31482 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
31483 #undef TARGET_UPDATE_STACK_BOUNDARY
31484 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
31485 #undef TARGET_GET_DRAP_RTX
31486 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
31487 #undef TARGET_STRICT_ARGUMENT_NAMING
31488 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
31489 #undef TARGET_STATIC_CHAIN
31490 #define TARGET_STATIC_CHAIN ix86_static_chain
31491 #undef TARGET_TRAMPOLINE_INIT
31492 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
31493 #undef TARGET_RETURN_POPS_ARGS
31494 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
31495
31496 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
31497 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
31498
31499 #undef TARGET_SCALAR_MODE_SUPPORTED_P
31500 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
31501
31502 #undef TARGET_VECTOR_MODE_SUPPORTED_P
31503 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
31504
31505 #undef TARGET_C_MODE_FOR_SUFFIX
31506 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
31507
31508 #ifdef HAVE_AS_TLS
31509 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
31510 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
31511 #endif
31512
31513 #ifdef SUBTARGET_INSERT_ATTRIBUTES
31514 #undef TARGET_INSERT_ATTRIBUTES
31515 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
31516 #endif
31517
31518 #undef TARGET_MANGLE_TYPE
31519 #define TARGET_MANGLE_TYPE ix86_mangle_type
31520
31521 #undef TARGET_STACK_PROTECT_FAIL
31522 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
31523
31524 #undef TARGET_FUNCTION_VALUE
31525 #define TARGET_FUNCTION_VALUE ix86_function_value
31526
31527 #undef TARGET_FUNCTION_VALUE_REGNO_P
31528 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
31529
31530 #undef TARGET_SECONDARY_RELOAD
31531 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
31532
31533 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
31534 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
31535 ix86_builtin_vectorization_cost
31536 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
31537 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
31538 ix86_vectorize_builtin_vec_perm
31539 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
31540 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
31541 ix86_vectorize_builtin_vec_perm_ok
31542
31543 #undef TARGET_SET_CURRENT_FUNCTION
31544 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
31545
31546 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
31547 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
31548
31549 #undef TARGET_OPTION_SAVE
31550 #define TARGET_OPTION_SAVE ix86_function_specific_save
31551
31552 #undef TARGET_OPTION_RESTORE
31553 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
31554
31555 #undef TARGET_OPTION_PRINT
31556 #define TARGET_OPTION_PRINT ix86_function_specific_print
31557
31558 #undef TARGET_CAN_INLINE_P
31559 #define TARGET_CAN_INLINE_P ix86_can_inline_p
31560
31561 #undef TARGET_EXPAND_TO_RTL_HOOK
31562 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
31563
31564 #undef TARGET_LEGITIMATE_ADDRESS_P
31565 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
31566
31567 #undef TARGET_IRA_COVER_CLASSES
31568 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
31569
31570 #undef TARGET_FRAME_POINTER_REQUIRED
31571 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
31572
31573 #undef TARGET_CAN_ELIMINATE
31574 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
31575
31576 #undef TARGET_ASM_CODE_END
31577 #define TARGET_ASM_CODE_END ix86_code_end
31578
31579 struct gcc_target targetm = TARGET_INITIALIZER;
31580 \f
31581 #include "gt-i386.h"