]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
d7e56bdd96d5ac74e13ca5a73475105e62f09b88
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "c-common.h"
39 #include "except.h"
40 #include "function.h"
41 #include "recog.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "basic-block.h"
46 #include "ggc.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "langhooks.h"
50 #include "cgraph.h"
51 #include "gimple.h"
52 #include "dwarf2.h"
53 #include "df.h"
54 #include "tm-constrs.h"
55 #include "params.h"
56 #include "cselib.h"
57
58 static int x86_builtin_vectorization_cost (bool);
59 static rtx legitimize_dllimport_symbol (rtx, bool);
60
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
63 #endif
64
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
71 : 4)
72
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
76
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
78
79 const
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
98 0, /* "large" insn */
99 2, /* MOVE_RATIO */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
125 2, /* Branch cost */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
147 };
148
149 /* Processor costs (relative to an add) */
150 static const
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
170 3, /* MOVE_RATIO */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
196 1, /* Branch cost */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
218 };
219
220 static const
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
240 3, /* MOVE_RATIO */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
268 1, /* Branch cost */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
290 };
291
292 static const
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
312 6, /* MOVE_RATIO */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
338 2, /* Branch cost */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
360 };
361
362 static const
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
382 6, /* MOVE_RATIO */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
408 2, /* Branch cost */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
437 };
438
439 static const
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
459 4, /* MOVE_RATIO */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
470
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
486 1, /* Branch cost */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
508 };
509
510 static const
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
530 4, /* MOVE_RATIO */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
559 1, /* Branch cost */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
581 };
582
583 static const
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
603 9, /* MOVE_RATIO */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
629 5, /* Branch cost */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
654 };
655
656 static const
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
676 9, /* MOVE_RATIO */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
705 time). */
706 100, /* number of parallel prefetches */
707 3, /* Branch cost */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
733 };
734
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
754 9, /* MOVE_RATIO */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
776 /* On K8
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
779 On AMDFAM10
780 MOVD reg64, xmmreg Double FADD 3
781 1/1 1/1
782 MOVD reg32, xmmreg Double FADD 3
783 1/1 1/1 */
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
791 time). */
792 100, /* number of parallel prefetches */
793 2, /* Branch cost */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
820 };
821
822 static const
823 struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
842 6, /* MOVE_RATIO */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
868 2, /* Branch cost */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 {-1, libcall}}},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
891 };
892
893 static const
894 struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
913 17, /* MOVE_RATIO */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
939 1, /* Branch cost */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {-1, libcall}}},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
964 };
965
966 static const
967 struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
986 16, /* MOVE_RATIO */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1037 };
1038
1039 /* Generic64 should produce code tuned for Nocona and K8. */
1040 static const
1041 struct processor_costs generic64_cost = {
1042 COSTS_N_INSNS (1), /* cost of an add instruction */
1043 /* On all chips taken into consideration lea is 2 cycles and more. With
1044 this cost however our current implementation of synth_mult results in
1045 use of unnecessary temporary registers causing regression on several
1046 SPECfp benchmarks. */
1047 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1048 COSTS_N_INSNS (1), /* variable shift costs */
1049 COSTS_N_INSNS (1), /* constant shift costs */
1050 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1051 COSTS_N_INSNS (4), /* HI */
1052 COSTS_N_INSNS (3), /* SI */
1053 COSTS_N_INSNS (4), /* DI */
1054 COSTS_N_INSNS (2)}, /* other */
1055 0, /* cost of multiply per each bit set */
1056 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1057 COSTS_N_INSNS (26), /* HI */
1058 COSTS_N_INSNS (42), /* SI */
1059 COSTS_N_INSNS (74), /* DI */
1060 COSTS_N_INSNS (74)}, /* other */
1061 COSTS_N_INSNS (1), /* cost of movsx */
1062 COSTS_N_INSNS (1), /* cost of movzx */
1063 8, /* "large" insn */
1064 17, /* MOVE_RATIO */
1065 4, /* cost for loading QImode using movzbl */
1066 {4, 4, 4}, /* cost of loading integer registers
1067 in QImode, HImode and SImode.
1068 Relative to reg-reg move (2). */
1069 {4, 4, 4}, /* cost of storing integer registers */
1070 4, /* cost of reg,reg fld/fst */
1071 {12, 12, 12}, /* cost of loading fp registers
1072 in SFmode, DFmode and XFmode */
1073 {6, 6, 8}, /* cost of storing fp registers
1074 in SFmode, DFmode and XFmode */
1075 2, /* cost of moving MMX register */
1076 {8, 8}, /* cost of loading MMX registers
1077 in SImode and DImode */
1078 {8, 8}, /* cost of storing MMX registers
1079 in SImode and DImode */
1080 2, /* cost of moving SSE register */
1081 {8, 8, 8}, /* cost of loading SSE registers
1082 in SImode, DImode and TImode */
1083 {8, 8, 8}, /* cost of storing SSE registers
1084 in SImode, DImode and TImode */
1085 5, /* MMX or SSE register to integer */
1086 32, /* size of l1 cache. */
1087 512, /* size of l2 cache. */
1088 64, /* size of prefetch block */
1089 6, /* number of parallel prefetches */
1090 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1091 is increased to perhaps more appropriate value of 5. */
1092 3, /* Branch cost */
1093 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1094 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1095 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1096 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1097 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1098 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 {DUMMY_STRINGOP_ALGS,
1102 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1103 1, /* scalar_stmt_cost. */
1104 1, /* scalar load_cost. */
1105 1, /* scalar_store_cost. */
1106 1, /* vec_stmt_cost. */
1107 1, /* vec_to_scalar_cost. */
1108 1, /* scalar_to_vec_cost. */
1109 1, /* vec_align_load_cost. */
1110 2, /* vec_unalign_load_cost. */
1111 1, /* vec_store_cost. */
1112 3, /* cond_taken_branch_cost. */
1113 1, /* cond_not_taken_branch_cost. */
1114 };
1115
1116 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1117 static const
1118 struct processor_costs generic32_cost = {
1119 COSTS_N_INSNS (1), /* cost of an add instruction */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 256, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 3, /* Branch cost */
1164 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1165 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1166 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1167 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1168 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1169 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1173 DUMMY_STRINGOP_ALGS},
1174 1, /* scalar_stmt_cost. */
1175 1, /* scalar load_cost. */
1176 1, /* scalar_store_cost. */
1177 1, /* vec_stmt_cost. */
1178 1, /* vec_to_scalar_cost. */
1179 1, /* scalar_to_vec_cost. */
1180 1, /* vec_align_load_cost. */
1181 2, /* vec_unalign_load_cost. */
1182 1, /* vec_store_cost. */
1183 3, /* cond_taken_branch_cost. */
1184 1, /* cond_not_taken_branch_cost. */
1185 };
1186
1187 const struct processor_costs *ix86_cost = &pentium_cost;
1188
1189 /* Processor feature/optimization bitmasks. */
1190 #define m_386 (1<<PROCESSOR_I386)
1191 #define m_486 (1<<PROCESSOR_I486)
1192 #define m_PENT (1<<PROCESSOR_PENTIUM)
1193 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1194 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1195 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1196 #define m_CORE2 (1<<PROCESSOR_CORE2)
1197
1198 #define m_GEODE (1<<PROCESSOR_GEODE)
1199 #define m_K6 (1<<PROCESSOR_K6)
1200 #define m_K6_GEODE (m_K6 | m_GEODE)
1201 #define m_K8 (1<<PROCESSOR_K8)
1202 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1203 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1204 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1205 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1206
1207 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1208 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1209
1210 /* Generic instruction choice should be common subset of supported CPUs
1211 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1212 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1213
1214 /* Feature tests against the various tunings. */
1215 unsigned char ix86_tune_features[X86_TUNE_LAST];
1216
1217 /* Feature tests against the various tunings used to create ix86_tune_features
1218 based on the processor mask. */
1219 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1220 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1221 negatively, so enabling for Generic64 seems like good code size
1222 tradeoff. We can't enable it for 32bit generic because it does not
1223 work well with PPro base chips. */
1224 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1225
1226 /* X86_TUNE_PUSH_MEMORY */
1227 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1228 | m_NOCONA | m_CORE2 | m_GENERIC,
1229
1230 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1231 m_486 | m_PENT,
1232
1233 /* X86_TUNE_UNROLL_STRLEN */
1234 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1235
1236 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1237 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1238
1239 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1240 on simulation result. But after P4 was made, no performance benefit
1241 was observed with branch hints. It also increases the code size.
1242 As a result, icc never generates branch hints. */
1243 0,
1244
1245 /* X86_TUNE_DOUBLE_WITH_ADD */
1246 ~m_386,
1247
1248 /* X86_TUNE_USE_SAHF */
1249 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1250 | m_NOCONA | m_CORE2 | m_GENERIC,
1251
1252 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1253 partial dependencies. */
1254 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1255 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1256
1257 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1258 register stalls on Generic32 compilation setting as well. However
1259 in current implementation the partial register stalls are not eliminated
1260 very well - they can be introduced via subregs synthesized by combine
1261 and can happen in caller/callee saving sequences. Because this option
1262 pays back little on PPro based chips and is in conflict with partial reg
1263 dependencies used by Athlon/P4 based chips, it is better to leave it off
1264 for generic32 for now. */
1265 m_PPRO,
1266
1267 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1268 m_CORE2 | m_GENERIC,
1269
1270 /* X86_TUNE_USE_HIMODE_FIOP */
1271 m_386 | m_486 | m_K6_GEODE,
1272
1273 /* X86_TUNE_USE_SIMODE_FIOP */
1274 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1275
1276 /* X86_TUNE_USE_MOV0 */
1277 m_K6,
1278
1279 /* X86_TUNE_USE_CLTD */
1280 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1281
1282 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1283 m_PENT4,
1284
1285 /* X86_TUNE_SPLIT_LONG_MOVES */
1286 m_PPRO,
1287
1288 /* X86_TUNE_READ_MODIFY_WRITE */
1289 ~m_PENT,
1290
1291 /* X86_TUNE_READ_MODIFY */
1292 ~(m_PENT | m_PPRO),
1293
1294 /* X86_TUNE_PROMOTE_QIMODE */
1295 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1296 | m_GENERIC /* | m_PENT4 ? */,
1297
1298 /* X86_TUNE_FAST_PREFIX */
1299 ~(m_PENT | m_486 | m_386),
1300
1301 /* X86_TUNE_SINGLE_STRINGOP */
1302 m_386 | m_PENT4 | m_NOCONA,
1303
1304 /* X86_TUNE_QIMODE_MATH */
1305 ~0,
1306
1307 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1308 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1309 might be considered for Generic32 if our scheme for avoiding partial
1310 stalls was more effective. */
1311 ~m_PPRO,
1312
1313 /* X86_TUNE_PROMOTE_QI_REGS */
1314 0,
1315
1316 /* X86_TUNE_PROMOTE_HI_REGS */
1317 m_PPRO,
1318
1319 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1320 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1321
1322 /* X86_TUNE_ADD_ESP_8 */
1323 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1324 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1325
1326 /* X86_TUNE_SUB_ESP_4 */
1327 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1328
1329 /* X86_TUNE_SUB_ESP_8 */
1330 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1331 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1332
1333 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1334 for DFmode copies */
1335 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1336 | m_GENERIC | m_GEODE),
1337
1338 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1339 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1340
1341 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1342 conflict here in between PPro/Pentium4 based chips that thread 128bit
1343 SSE registers as single units versus K8 based chips that divide SSE
1344 registers to two 64bit halves. This knob promotes all store destinations
1345 to be 128bit to allow register renaming on 128bit SSE units, but usually
1346 results in one extra microop on 64bit SSE units. Experimental results
1347 shows that disabling this option on P4 brings over 20% SPECfp regression,
1348 while enabling it on K8 brings roughly 2.4% regression that can be partly
1349 masked by careful scheduling of moves. */
1350 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1351
1352 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1353 m_AMDFAM10,
1354
1355 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1356 are resolved on SSE register parts instead of whole registers, so we may
1357 maintain just lower part of scalar values in proper format leaving the
1358 upper part undefined. */
1359 m_ATHLON_K8,
1360
1361 /* X86_TUNE_SSE_TYPELESS_STORES */
1362 m_AMD_MULTIPLE,
1363
1364 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1365 m_PPRO | m_PENT4 | m_NOCONA,
1366
1367 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1368 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1369
1370 /* X86_TUNE_PROLOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1372
1373 /* X86_TUNE_EPILOGUE_USING_MOVE */
1374 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1375
1376 /* X86_TUNE_SHIFT1 */
1377 ~m_486,
1378
1379 /* X86_TUNE_USE_FFREEP */
1380 m_AMD_MULTIPLE,
1381
1382 /* X86_TUNE_INTER_UNIT_MOVES */
1383 ~(m_AMD_MULTIPLE | m_GENERIC),
1384
1385 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1386 ~(m_AMDFAM10),
1387
1388 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1389 than 4 branch instructions in the 16 byte window. */
1390 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1391
1392 /* X86_TUNE_SCHEDULE */
1393 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1394
1395 /* X86_TUNE_USE_BT */
1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1397
1398 /* X86_TUNE_USE_INCDEC */
1399 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1400
1401 /* X86_TUNE_PAD_RETURNS */
1402 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1403
1404 /* X86_TUNE_EXT_80387_CONSTANTS */
1405 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1406
1407 /* X86_TUNE_SHORTEN_X87_SSE */
1408 ~m_K8,
1409
1410 /* X86_TUNE_AVOID_VECTOR_DECODE */
1411 m_K8 | m_GENERIC64,
1412
1413 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1414 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1415 ~(m_386 | m_486),
1416
1417 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1418 vector path on AMD machines. */
1419 m_K8 | m_GENERIC64 | m_AMDFAM10,
1420
1421 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1422 machines. */
1423 m_K8 | m_GENERIC64 | m_AMDFAM10,
1424
1425 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1426 than a MOV. */
1427 m_PENT,
1428
1429 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1430 but one byte longer. */
1431 m_PENT,
1432
1433 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1434 operand that cannot be represented using a modRM byte. The XOR
1435 replacement is long decoded, so this split helps here as well. */
1436 m_K6,
1437
1438 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1439 from FP to FP. */
1440 m_AMDFAM10 | m_GENERIC,
1441
1442 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1443 from integer to FP. */
1444 m_AMDFAM10,
1445
1446 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1447 with a subsequent conditional jump instruction into a single
1448 compare-and-branch uop. */
1449 m_CORE2,
1450 };
1451
1452 /* Feature tests against the various architecture variations. */
1453 unsigned char ix86_arch_features[X86_ARCH_LAST];
1454
1455 /* Feature tests against the various architecture variations, used to create
1456 ix86_arch_features based on the processor mask. */
1457 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1458 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1459 ~(m_386 | m_486 | m_PENT | m_K6),
1460
1461 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1462 ~m_386,
1463
1464 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1465 ~(m_386 | m_486),
1466
1467 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1468 ~m_386,
1469
1470 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1471 ~m_386,
1472 };
1473
1474 static const unsigned int x86_accumulate_outgoing_args
1475 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1476
1477 static const unsigned int x86_arch_always_fancy_math_387
1478 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1479 | m_NOCONA | m_CORE2 | m_GENERIC;
1480
1481 static enum stringop_alg stringop_alg = no_stringop;
1482
1483 /* In case the average insn count for single function invocation is
1484 lower than this constant, emit fast (but longer) prologue and
1485 epilogue code. */
1486 #define FAST_PROLOGUE_INSN_COUNT 20
1487
1488 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1489 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1490 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1491 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1492
1493 /* Array of the smallest class containing reg number REGNO, indexed by
1494 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1495
1496 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1497 {
1498 /* ax, dx, cx, bx */
1499 AREG, DREG, CREG, BREG,
1500 /* si, di, bp, sp */
1501 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1502 /* FP registers */
1503 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1504 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1505 /* arg pointer */
1506 NON_Q_REGS,
1507 /* flags, fpsr, fpcr, frame */
1508 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1509 /* SSE registers */
1510 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1511 SSE_REGS, SSE_REGS,
1512 /* MMX registers */
1513 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1514 MMX_REGS, MMX_REGS,
1515 /* REX registers */
1516 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1517 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1518 /* SSE REX registers */
1519 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1520 SSE_REGS, SSE_REGS,
1521 };
1522
1523 /* The "default" register map used in 32bit mode. */
1524
1525 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1526 {
1527 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1528 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1529 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1530 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1531 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1532 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1533 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1534 };
1535
1536 static int const x86_64_int_parameter_registers[6] =
1537 {
1538 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1539 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1540 };
1541
1542 static int const x86_64_ms_abi_int_parameter_registers[4] =
1543 {
1544 2 /*RCX*/, 1 /*RDX*/,
1545 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1546 };
1547
1548 static int const x86_64_int_return_registers[4] =
1549 {
1550 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1551 };
1552
1553 /* The "default" register map used in 64bit mode. */
1554 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1555 {
1556 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1557 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1558 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1559 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1560 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1561 8,9,10,11,12,13,14,15, /* extended integer registers */
1562 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1563 };
1564
1565 /* Define the register numbers to be used in Dwarf debugging information.
1566 The SVR4 reference port C compiler uses the following register numbers
1567 in its Dwarf output code:
1568 0 for %eax (gcc regno = 0)
1569 1 for %ecx (gcc regno = 2)
1570 2 for %edx (gcc regno = 1)
1571 3 for %ebx (gcc regno = 3)
1572 4 for %esp (gcc regno = 7)
1573 5 for %ebp (gcc regno = 6)
1574 6 for %esi (gcc regno = 4)
1575 7 for %edi (gcc regno = 5)
1576 The following three DWARF register numbers are never generated by
1577 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1578 believes these numbers have these meanings.
1579 8 for %eip (no gcc equivalent)
1580 9 for %eflags (gcc regno = 17)
1581 10 for %trapno (no gcc equivalent)
1582 It is not at all clear how we should number the FP stack registers
1583 for the x86 architecture. If the version of SDB on x86/svr4 were
1584 a bit less brain dead with respect to floating-point then we would
1585 have a precedent to follow with respect to DWARF register numbers
1586 for x86 FP registers, but the SDB on x86/svr4 is so completely
1587 broken with respect to FP registers that it is hardly worth thinking
1588 of it as something to strive for compatibility with.
1589 The version of x86/svr4 SDB I have at the moment does (partially)
1590 seem to believe that DWARF register number 11 is associated with
1591 the x86 register %st(0), but that's about all. Higher DWARF
1592 register numbers don't seem to be associated with anything in
1593 particular, and even for DWARF regno 11, SDB only seems to under-
1594 stand that it should say that a variable lives in %st(0) (when
1595 asked via an `=' command) if we said it was in DWARF regno 11,
1596 but SDB still prints garbage when asked for the value of the
1597 variable in question (via a `/' command).
1598 (Also note that the labels SDB prints for various FP stack regs
1599 when doing an `x' command are all wrong.)
1600 Note that these problems generally don't affect the native SVR4
1601 C compiler because it doesn't allow the use of -O with -g and
1602 because when it is *not* optimizing, it allocates a memory
1603 location for each floating-point variable, and the memory
1604 location is what gets described in the DWARF AT_location
1605 attribute for the variable in question.
1606 Regardless of the severe mental illness of the x86/svr4 SDB, we
1607 do something sensible here and we use the following DWARF
1608 register numbers. Note that these are all stack-top-relative
1609 numbers.
1610 11 for %st(0) (gcc regno = 8)
1611 12 for %st(1) (gcc regno = 9)
1612 13 for %st(2) (gcc regno = 10)
1613 14 for %st(3) (gcc regno = 11)
1614 15 for %st(4) (gcc regno = 12)
1615 16 for %st(5) (gcc regno = 13)
1616 17 for %st(6) (gcc regno = 14)
1617 18 for %st(7) (gcc regno = 15)
1618 */
1619 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1620 {
1621 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1622 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1623 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1624 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1625 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1626 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1627 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1628 };
1629
1630 /* Test and compare insns in i386.md store the information needed to
1631 generate branch and scc insns here. */
1632
1633 rtx ix86_compare_op0 = NULL_RTX;
1634 rtx ix86_compare_op1 = NULL_RTX;
1635 rtx ix86_compare_emitted = NULL_RTX;
1636
1637 /* Define the structure for the machine field in struct function. */
1638
1639 struct stack_local_entry GTY(())
1640 {
1641 unsigned short mode;
1642 unsigned short n;
1643 rtx rtl;
1644 struct stack_local_entry *next;
1645 };
1646
1647 /* Structure describing stack frame layout.
1648 Stack grows downward:
1649
1650 [arguments]
1651 <- ARG_POINTER
1652 saved pc
1653
1654 saved frame pointer if frame_pointer_needed
1655 <- HARD_FRAME_POINTER
1656 [saved regs]
1657
1658 [padding0]
1659
1660 [saved SSE regs]
1661
1662 [padding1] \
1663 )
1664 [va_arg registers] (
1665 > to_allocate <- FRAME_POINTER
1666 [frame] (
1667 )
1668 [padding2] /
1669 */
1670 struct ix86_frame
1671 {
1672 int padding0;
1673 int nsseregs;
1674 int nregs;
1675 int padding1;
1676 int va_arg_size;
1677 HOST_WIDE_INT frame;
1678 int padding2;
1679 int outgoing_arguments_size;
1680 int red_zone_size;
1681
1682 HOST_WIDE_INT to_allocate;
1683 /* The offsets relative to ARG_POINTER. */
1684 HOST_WIDE_INT frame_pointer_offset;
1685 HOST_WIDE_INT hard_frame_pointer_offset;
1686 HOST_WIDE_INT stack_pointer_offset;
1687
1688 /* When save_regs_using_mov is set, emit prologue using
1689 move instead of push instructions. */
1690 bool save_regs_using_mov;
1691 };
1692
1693 /* Code model option. */
1694 enum cmodel ix86_cmodel;
1695 /* Asm dialect. */
1696 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1697 /* TLS dialects. */
1698 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1699
1700 /* Which unit we are generating floating point math for. */
1701 enum fpmath_unit ix86_fpmath;
1702
1703 /* Which cpu are we scheduling for. */
1704 enum attr_cpu ix86_schedule;
1705
1706 /* Which cpu are we optimizing for. */
1707 enum processor_type ix86_tune;
1708
1709 /* Which instruction set architecture to use. */
1710 enum processor_type ix86_arch;
1711
1712 /* true if sse prefetch instruction is not NOOP. */
1713 int x86_prefetch_sse;
1714
1715 /* ix86_regparm_string as a number */
1716 static int ix86_regparm;
1717
1718 /* -mstackrealign option */
1719 extern int ix86_force_align_arg_pointer;
1720 static const char ix86_force_align_arg_pointer_string[]
1721 = "force_align_arg_pointer";
1722
1723 static rtx (*ix86_gen_leave) (void);
1724 static rtx (*ix86_gen_pop1) (rtx);
1725 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1726 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1727 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1728 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1729 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1730 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1731
1732 /* Preferred alignment for stack boundary in bits. */
1733 unsigned int ix86_preferred_stack_boundary;
1734
1735 /* Alignment for incoming stack boundary in bits specified at
1736 command line. */
1737 static unsigned int ix86_user_incoming_stack_boundary;
1738
1739 /* Default alignment for incoming stack boundary in bits. */
1740 static unsigned int ix86_default_incoming_stack_boundary;
1741
1742 /* Alignment for incoming stack boundary in bits. */
1743 unsigned int ix86_incoming_stack_boundary;
1744
1745 /* Values 1-5: see jump.c */
1746 int ix86_branch_cost;
1747
1748 /* Calling abi specific va_list type nodes. */
1749 static GTY(()) tree sysv_va_list_type_node;
1750 static GTY(()) tree ms_va_list_type_node;
1751
1752 /* Variables which are this size or smaller are put in the data/bss
1753 or ldata/lbss sections. */
1754
1755 int ix86_section_threshold = 65536;
1756
1757 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1758 char internal_label_prefix[16];
1759 int internal_label_prefix_len;
1760
1761 /* Fence to use after loop using movnt. */
1762 tree x86_mfence;
1763
1764 /* Register class used for passing given 64bit part of the argument.
1765 These represent classes as documented by the PS ABI, with the exception
1766 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1767 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1768
1769 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1770 whenever possible (upper half does contain padding). */
1771 enum x86_64_reg_class
1772 {
1773 X86_64_NO_CLASS,
1774 X86_64_INTEGER_CLASS,
1775 X86_64_INTEGERSI_CLASS,
1776 X86_64_AVX_CLASS,
1777 X86_64_SSE_CLASS,
1778 X86_64_SSESF_CLASS,
1779 X86_64_SSEDF_CLASS,
1780 X86_64_SSEUP_CLASS,
1781 X86_64_X87_CLASS,
1782 X86_64_X87UP_CLASS,
1783 X86_64_COMPLEX_X87_CLASS,
1784 X86_64_MEMORY_CLASS
1785 };
1786 static const char * const x86_64_reg_class_name[] =
1787 {
1788 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1789 "sseup", "x87", "x87up", "cplx87", "no"
1790 };
1791
1792 #define MAX_CLASSES 4
1793
1794 /* Table of constants used by fldpi, fldln2, etc.... */
1795 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1796 static bool ext_80387_constants_init = 0;
1797
1798 \f
1799 static struct machine_function * ix86_init_machine_status (void);
1800 static rtx ix86_function_value (const_tree, const_tree, bool);
1801 static int ix86_function_regparm (const_tree, const_tree);
1802 static void ix86_compute_frame_layout (struct ix86_frame *);
1803 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1804 rtx, rtx, int);
1805 static void ix86_add_new_builtins (int);
1806
1807 enum ix86_function_specific_strings
1808 {
1809 IX86_FUNCTION_SPECIFIC_ARCH,
1810 IX86_FUNCTION_SPECIFIC_TUNE,
1811 IX86_FUNCTION_SPECIFIC_FPMATH,
1812 IX86_FUNCTION_SPECIFIC_MAX
1813 };
1814
1815 static char *ix86_target_string (int, int, const char *, const char *,
1816 const char *, bool);
1817 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1818 static void ix86_function_specific_save (struct cl_target_option *);
1819 static void ix86_function_specific_restore (struct cl_target_option *);
1820 static void ix86_function_specific_print (FILE *, int,
1821 struct cl_target_option *);
1822 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1823 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1824 static bool ix86_can_inline_p (tree, tree);
1825 static void ix86_set_current_function (tree);
1826
1827 \f
1828 /* The svr4 ABI for the i386 says that records and unions are returned
1829 in memory. */
1830 #ifndef DEFAULT_PCC_STRUCT_RETURN
1831 #define DEFAULT_PCC_STRUCT_RETURN 1
1832 #endif
1833
1834 /* Whether -mtune= or -march= were specified */
1835 static int ix86_tune_defaulted;
1836 static int ix86_arch_specified;
1837
1838 /* Bit flags that specify the ISA we are compiling for. */
1839 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1840
1841 /* A mask of ix86_isa_flags that includes bit X if X
1842 was set or cleared on the command line. */
1843 static int ix86_isa_flags_explicit;
1844
1845 /* Define a set of ISAs which are available when a given ISA is
1846 enabled. MMX and SSE ISAs are handled separately. */
1847
1848 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1849 #define OPTION_MASK_ISA_3DNOW_SET \
1850 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1851
1852 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1853 #define OPTION_MASK_ISA_SSE2_SET \
1854 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1855 #define OPTION_MASK_ISA_SSE3_SET \
1856 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1857 #define OPTION_MASK_ISA_SSSE3_SET \
1858 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1859 #define OPTION_MASK_ISA_SSE4_1_SET \
1860 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1861 #define OPTION_MASK_ISA_SSE4_2_SET \
1862 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1863 #define OPTION_MASK_ISA_AVX_SET \
1864 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1865 #define OPTION_MASK_ISA_FMA_SET \
1866 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1867
1868 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1869 as -msse4.2. */
1870 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1871
1872 #define OPTION_MASK_ISA_SSE4A_SET \
1873 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1874 #define OPTION_MASK_ISA_SSE5_SET \
1875 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1876
1877 /* AES and PCLMUL need SSE2 because they use xmm registers */
1878 #define OPTION_MASK_ISA_AES_SET \
1879 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1880 #define OPTION_MASK_ISA_PCLMUL_SET \
1881 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1882
1883 #define OPTION_MASK_ISA_ABM_SET \
1884 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1885 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1886 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1887 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1888
1889 /* Define a set of ISAs which aren't available when a given ISA is
1890 disabled. MMX and SSE ISAs are handled separately. */
1891
1892 #define OPTION_MASK_ISA_MMX_UNSET \
1893 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1894 #define OPTION_MASK_ISA_3DNOW_UNSET \
1895 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1896 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1897
1898 #define OPTION_MASK_ISA_SSE_UNSET \
1899 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1900 #define OPTION_MASK_ISA_SSE2_UNSET \
1901 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1902 #define OPTION_MASK_ISA_SSE3_UNSET \
1903 (OPTION_MASK_ISA_SSE3 \
1904 | OPTION_MASK_ISA_SSSE3_UNSET \
1905 | OPTION_MASK_ISA_SSE4A_UNSET )
1906 #define OPTION_MASK_ISA_SSSE3_UNSET \
1907 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1908 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1909 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1910 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1911 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1912 #define OPTION_MASK_ISA_AVX_UNSET \
1913 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1914 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1915
1916 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1917 as -mno-sse4.1. */
1918 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1919
1920 #define OPTION_MASK_ISA_SSE4A_UNSET \
1921 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1922 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1923 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1924 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1925 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1926 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1927 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1928 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1929
1930 /* Vectorization library interface and handlers. */
1931 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1932 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1933 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1934
1935 /* Processor target table, indexed by processor number */
1936 struct ptt
1937 {
1938 const struct processor_costs *cost; /* Processor costs */
1939 const int align_loop; /* Default alignments. */
1940 const int align_loop_max_skip;
1941 const int align_jump;
1942 const int align_jump_max_skip;
1943 const int align_func;
1944 };
1945
1946 static const struct ptt processor_target_table[PROCESSOR_max] =
1947 {
1948 {&i386_cost, 4, 3, 4, 3, 4},
1949 {&i486_cost, 16, 15, 16, 15, 16},
1950 {&pentium_cost, 16, 7, 16, 7, 16},
1951 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1952 {&geode_cost, 0, 0, 0, 0, 0},
1953 {&k6_cost, 32, 7, 32, 7, 32},
1954 {&athlon_cost, 16, 7, 16, 7, 16},
1955 {&pentium4_cost, 0, 0, 0, 0, 0},
1956 {&k8_cost, 16, 7, 16, 7, 16},
1957 {&nocona_cost, 0, 0, 0, 0, 0},
1958 {&core2_cost, 16, 10, 16, 10, 16},
1959 {&generic32_cost, 16, 7, 16, 7, 16},
1960 {&generic64_cost, 16, 10, 16, 10, 16},
1961 {&amdfam10_cost, 32, 24, 32, 7, 32}
1962 };
1963
1964 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1965 {
1966 "generic",
1967 "i386",
1968 "i486",
1969 "pentium",
1970 "pentium-mmx",
1971 "pentiumpro",
1972 "pentium2",
1973 "pentium3",
1974 "pentium4",
1975 "pentium-m",
1976 "prescott",
1977 "nocona",
1978 "core2",
1979 "geode",
1980 "k6",
1981 "k6-2",
1982 "k6-3",
1983 "athlon",
1984 "athlon-4",
1985 "k8",
1986 "amdfam10"
1987 };
1988 \f
1989 /* Implement TARGET_HANDLE_OPTION. */
1990
1991 static bool
1992 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1993 {
1994 switch (code)
1995 {
1996 case OPT_mmmx:
1997 if (value)
1998 {
1999 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2000 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2001 }
2002 else
2003 {
2004 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2005 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2006 }
2007 return true;
2008
2009 case OPT_m3dnow:
2010 if (value)
2011 {
2012 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2013 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2014 }
2015 else
2016 {
2017 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2018 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2019 }
2020 return true;
2021
2022 case OPT_m3dnowa:
2023 return false;
2024
2025 case OPT_msse:
2026 if (value)
2027 {
2028 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2029 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2030 }
2031 else
2032 {
2033 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2034 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2035 }
2036 return true;
2037
2038 case OPT_msse2:
2039 if (value)
2040 {
2041 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2042 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2043 }
2044 else
2045 {
2046 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2047 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2048 }
2049 return true;
2050
2051 case OPT_msse3:
2052 if (value)
2053 {
2054 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2055 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2056 }
2057 else
2058 {
2059 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2060 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2061 }
2062 return true;
2063
2064 case OPT_mssse3:
2065 if (value)
2066 {
2067 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2068 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2069 }
2070 else
2071 {
2072 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2073 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2074 }
2075 return true;
2076
2077 case OPT_msse4_1:
2078 if (value)
2079 {
2080 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2081 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2082 }
2083 else
2084 {
2085 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2086 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2087 }
2088 return true;
2089
2090 case OPT_msse4_2:
2091 if (value)
2092 {
2093 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2094 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2095 }
2096 else
2097 {
2098 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2099 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2100 }
2101 return true;
2102
2103 case OPT_mavx:
2104 if (value)
2105 {
2106 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2107 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2108 }
2109 else
2110 {
2111 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2112 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2113 }
2114 return true;
2115
2116 case OPT_mfma:
2117 if (value)
2118 {
2119 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2120 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2121 }
2122 else
2123 {
2124 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2125 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2126 }
2127 return true;
2128
2129 case OPT_msse4:
2130 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2131 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2132 return true;
2133
2134 case OPT_mno_sse4:
2135 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2136 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2137 return true;
2138
2139 case OPT_msse4a:
2140 if (value)
2141 {
2142 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2143 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2144 }
2145 else
2146 {
2147 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2148 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2149 }
2150 return true;
2151
2152 case OPT_msse5:
2153 if (value)
2154 {
2155 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2156 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2157 }
2158 else
2159 {
2160 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2161 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2162 }
2163 return true;
2164
2165 case OPT_mabm:
2166 if (value)
2167 {
2168 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2169 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2170 }
2171 else
2172 {
2173 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2174 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2175 }
2176 return true;
2177
2178 case OPT_mpopcnt:
2179 if (value)
2180 {
2181 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2182 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2183 }
2184 else
2185 {
2186 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2187 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2188 }
2189 return true;
2190
2191 case OPT_msahf:
2192 if (value)
2193 {
2194 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2195 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2196 }
2197 else
2198 {
2199 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2200 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2201 }
2202 return true;
2203
2204 case OPT_mcx16:
2205 if (value)
2206 {
2207 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2208 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2209 }
2210 else
2211 {
2212 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2213 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2214 }
2215 return true;
2216
2217 case OPT_maes:
2218 if (value)
2219 {
2220 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2221 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2222 }
2223 else
2224 {
2225 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2226 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2227 }
2228 return true;
2229
2230 case OPT_mpclmul:
2231 if (value)
2232 {
2233 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2235 }
2236 else
2237 {
2238 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2239 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2240 }
2241 return true;
2242
2243 default:
2244 return true;
2245 }
2246 }
2247 \f
2248 /* Return a string the documents the current -m options. The caller is
2249 responsible for freeing the string. */
2250
2251 static char *
2252 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2253 const char *fpmath, bool add_nl_p)
2254 {
2255 struct ix86_target_opts
2256 {
2257 const char *option; /* option string */
2258 int mask; /* isa mask options */
2259 };
2260
2261 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2262 preceding options while match those first. */
2263 static struct ix86_target_opts isa_opts[] =
2264 {
2265 { "-m64", OPTION_MASK_ISA_64BIT },
2266 { "-msse5", OPTION_MASK_ISA_SSE5 },
2267 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2268 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2269 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2270 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2271 { "-msse3", OPTION_MASK_ISA_SSE3 },
2272 { "-msse2", OPTION_MASK_ISA_SSE2 },
2273 { "-msse", OPTION_MASK_ISA_SSE },
2274 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2275 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2276 { "-mmmx", OPTION_MASK_ISA_MMX },
2277 { "-mabm", OPTION_MASK_ISA_ABM },
2278 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2279 { "-maes", OPTION_MASK_ISA_AES },
2280 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2281 };
2282
2283 /* Flag options. */
2284 static struct ix86_target_opts flag_opts[] =
2285 {
2286 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2287 { "-m80387", MASK_80387 },
2288 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2289 { "-malign-double", MASK_ALIGN_DOUBLE },
2290 { "-mcld", MASK_CLD },
2291 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2292 { "-mieee-fp", MASK_IEEE_FP },
2293 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2294 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2295 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2296 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2297 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2298 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2299 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2300 { "-mno-red-zone", MASK_NO_RED_ZONE },
2301 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2302 { "-mrecip", MASK_RECIP },
2303 { "-mrtd", MASK_RTD },
2304 { "-msseregparm", MASK_SSEREGPARM },
2305 { "-mstack-arg-probe", MASK_STACK_PROBE },
2306 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2307 };
2308
2309 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2310
2311 char isa_other[40];
2312 char target_other[40];
2313 unsigned num = 0;
2314 unsigned i, j;
2315 char *ret;
2316 char *ptr;
2317 size_t len;
2318 size_t line_len;
2319 size_t sep_len;
2320
2321 memset (opts, '\0', sizeof (opts));
2322
2323 /* Add -march= option. */
2324 if (arch)
2325 {
2326 opts[num][0] = "-march=";
2327 opts[num++][1] = arch;
2328 }
2329
2330 /* Add -mtune= option. */
2331 if (tune)
2332 {
2333 opts[num][0] = "-mtune=";
2334 opts[num++][1] = tune;
2335 }
2336
2337 /* Pick out the options in isa options. */
2338 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2339 {
2340 if ((isa & isa_opts[i].mask) != 0)
2341 {
2342 opts[num++][0] = isa_opts[i].option;
2343 isa &= ~ isa_opts[i].mask;
2344 }
2345 }
2346
2347 if (isa && add_nl_p)
2348 {
2349 opts[num++][0] = isa_other;
2350 sprintf (isa_other, "(other isa: 0x%x)", isa);
2351 }
2352
2353 /* Add flag options. */
2354 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2355 {
2356 if ((flags & flag_opts[i].mask) != 0)
2357 {
2358 opts[num++][0] = flag_opts[i].option;
2359 flags &= ~ flag_opts[i].mask;
2360 }
2361 }
2362
2363 if (flags && add_nl_p)
2364 {
2365 opts[num++][0] = target_other;
2366 sprintf (target_other, "(other flags: 0x%x)", isa);
2367 }
2368
2369 /* Add -fpmath= option. */
2370 if (fpmath)
2371 {
2372 opts[num][0] = "-mfpmath=";
2373 opts[num++][1] = fpmath;
2374 }
2375
2376 /* Any options? */
2377 if (num == 0)
2378 return NULL;
2379
2380 gcc_assert (num < ARRAY_SIZE (opts));
2381
2382 /* Size the string. */
2383 len = 0;
2384 sep_len = (add_nl_p) ? 3 : 1;
2385 for (i = 0; i < num; i++)
2386 {
2387 len += sep_len;
2388 for (j = 0; j < 2; j++)
2389 if (opts[i][j])
2390 len += strlen (opts[i][j]);
2391 }
2392
2393 /* Build the string. */
2394 ret = ptr = (char *) xmalloc (len);
2395 line_len = 0;
2396
2397 for (i = 0; i < num; i++)
2398 {
2399 size_t len2[2];
2400
2401 for (j = 0; j < 2; j++)
2402 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2403
2404 if (i != 0)
2405 {
2406 *ptr++ = ' ';
2407 line_len++;
2408
2409 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2410 {
2411 *ptr++ = '\\';
2412 *ptr++ = '\n';
2413 line_len = 0;
2414 }
2415 }
2416
2417 for (j = 0; j < 2; j++)
2418 if (opts[i][j])
2419 {
2420 memcpy (ptr, opts[i][j], len2[j]);
2421 ptr += len2[j];
2422 line_len += len2[j];
2423 }
2424 }
2425
2426 *ptr = '\0';
2427 gcc_assert (ret + len >= ptr);
2428
2429 return ret;
2430 }
2431
2432 /* Function that is callable from the debugger to print the current
2433 options. */
2434 void
2435 ix86_debug_options (void)
2436 {
2437 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2438 ix86_arch_string, ix86_tune_string,
2439 ix86_fpmath_string, true);
2440
2441 if (opts)
2442 {
2443 fprintf (stderr, "%s\n\n", opts);
2444 free (opts);
2445 }
2446 else
2447 fprintf (stderr, "<no options>\n\n");
2448
2449 return;
2450 }
2451 \f
2452 /* Sometimes certain combinations of command options do not make
2453 sense on a particular target machine. You can define a macro
2454 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2455 defined, is executed once just after all the command options have
2456 been parsed.
2457
2458 Don't use this macro to turn on various extra optimizations for
2459 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2460
2461 void
2462 override_options (bool main_args_p)
2463 {
2464 int i;
2465 unsigned int ix86_arch_mask, ix86_tune_mask;
2466 const char *prefix;
2467 const char *suffix;
2468 const char *sw;
2469
2470 /* Comes from final.c -- no real reason to change it. */
2471 #define MAX_CODE_ALIGN 16
2472
2473 enum pta_flags
2474 {
2475 PTA_SSE = 1 << 0,
2476 PTA_SSE2 = 1 << 1,
2477 PTA_SSE3 = 1 << 2,
2478 PTA_MMX = 1 << 3,
2479 PTA_PREFETCH_SSE = 1 << 4,
2480 PTA_3DNOW = 1 << 5,
2481 PTA_3DNOW_A = 1 << 6,
2482 PTA_64BIT = 1 << 7,
2483 PTA_SSSE3 = 1 << 8,
2484 PTA_CX16 = 1 << 9,
2485 PTA_POPCNT = 1 << 10,
2486 PTA_ABM = 1 << 11,
2487 PTA_SSE4A = 1 << 12,
2488 PTA_NO_SAHF = 1 << 13,
2489 PTA_SSE4_1 = 1 << 14,
2490 PTA_SSE4_2 = 1 << 15,
2491 PTA_SSE5 = 1 << 16,
2492 PTA_AES = 1 << 17,
2493 PTA_PCLMUL = 1 << 18,
2494 PTA_AVX = 1 << 19,
2495 PTA_FMA = 1 << 20
2496 };
2497
2498 static struct pta
2499 {
2500 const char *const name; /* processor name or nickname. */
2501 const enum processor_type processor;
2502 const enum attr_cpu schedule;
2503 const unsigned /*enum pta_flags*/ flags;
2504 }
2505 const processor_alias_table[] =
2506 {
2507 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2508 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2509 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2510 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2511 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2512 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2513 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2514 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2515 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2516 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2517 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2518 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2519 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2520 PTA_MMX | PTA_SSE},
2521 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2522 PTA_MMX | PTA_SSE},
2523 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2524 PTA_MMX | PTA_SSE | PTA_SSE2},
2525 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2526 PTA_MMX |PTA_SSE | PTA_SSE2},
2527 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2528 PTA_MMX | PTA_SSE | PTA_SSE2},
2529 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2530 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2531 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2532 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2533 | PTA_CX16 | PTA_NO_SAHF},
2534 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2535 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2536 | PTA_SSSE3 | PTA_CX16},
2537 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2538 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2539 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2540 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2541 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2542 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2543 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2544 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2545 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2546 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2547 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2548 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2549 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2550 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2551 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2552 {"x86-64", PROCESSOR_K8, CPU_K8,
2553 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2554 {"k8", PROCESSOR_K8, CPU_K8,
2555 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2556 | PTA_SSE2 | PTA_NO_SAHF},
2557 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2558 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2559 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2560 {"opteron", PROCESSOR_K8, CPU_K8,
2561 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2562 | PTA_SSE2 | PTA_NO_SAHF},
2563 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2564 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2565 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2566 {"athlon64", PROCESSOR_K8, CPU_K8,
2567 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2568 | PTA_SSE2 | PTA_NO_SAHF},
2569 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2570 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2571 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2572 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2573 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2574 | PTA_SSE2 | PTA_NO_SAHF},
2575 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2576 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2577 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2578 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2579 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2580 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2581 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2582 0 /* flags are only used for -march switch. */ },
2583 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2584 PTA_64BIT /* flags are only used for -march switch. */ },
2585 };
2586
2587 int const pta_size = ARRAY_SIZE (processor_alias_table);
2588
2589 /* Set up prefix/suffix so the error messages refer to either the command
2590 line argument, or the attribute(target). */
2591 if (main_args_p)
2592 {
2593 prefix = "-m";
2594 suffix = "";
2595 sw = "switch";
2596 }
2597 else
2598 {
2599 prefix = "option(\"";
2600 suffix = "\")";
2601 sw = "attribute";
2602 }
2603
2604 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2605 SUBTARGET_OVERRIDE_OPTIONS;
2606 #endif
2607
2608 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2609 SUBSUBTARGET_OVERRIDE_OPTIONS;
2610 #endif
2611
2612 /* -fPIC is the default for x86_64. */
2613 if (TARGET_MACHO && TARGET_64BIT)
2614 flag_pic = 2;
2615
2616 /* Set the default values for switches whose default depends on TARGET_64BIT
2617 in case they weren't overwritten by command line options. */
2618 if (TARGET_64BIT)
2619 {
2620 /* Mach-O doesn't support omitting the frame pointer for now. */
2621 if (flag_omit_frame_pointer == 2)
2622 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2623 if (flag_asynchronous_unwind_tables == 2)
2624 flag_asynchronous_unwind_tables = 1;
2625 if (flag_pcc_struct_return == 2)
2626 flag_pcc_struct_return = 0;
2627 }
2628 else
2629 {
2630 if (flag_omit_frame_pointer == 2)
2631 flag_omit_frame_pointer = 0;
2632 if (flag_asynchronous_unwind_tables == 2)
2633 flag_asynchronous_unwind_tables = 0;
2634 if (flag_pcc_struct_return == 2)
2635 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2636 }
2637
2638 /* Need to check -mtune=generic first. */
2639 if (ix86_tune_string)
2640 {
2641 if (!strcmp (ix86_tune_string, "generic")
2642 || !strcmp (ix86_tune_string, "i686")
2643 /* As special support for cross compilers we read -mtune=native
2644 as -mtune=generic. With native compilers we won't see the
2645 -mtune=native, as it was changed by the driver. */
2646 || !strcmp (ix86_tune_string, "native"))
2647 {
2648 if (TARGET_64BIT)
2649 ix86_tune_string = "generic64";
2650 else
2651 ix86_tune_string = "generic32";
2652 }
2653 /* If this call is for setting the option attribute, allow the
2654 generic32/generic64 that was previously set. */
2655 else if (!main_args_p
2656 && (!strcmp (ix86_tune_string, "generic32")
2657 || !strcmp (ix86_tune_string, "generic64")))
2658 ;
2659 else if (!strncmp (ix86_tune_string, "generic", 7))
2660 error ("bad value (%s) for %stune=%s %s",
2661 ix86_tune_string, prefix, suffix, sw);
2662 }
2663 else
2664 {
2665 if (ix86_arch_string)
2666 ix86_tune_string = ix86_arch_string;
2667 if (!ix86_tune_string)
2668 {
2669 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2670 ix86_tune_defaulted = 1;
2671 }
2672
2673 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2674 need to use a sensible tune option. */
2675 if (!strcmp (ix86_tune_string, "generic")
2676 || !strcmp (ix86_tune_string, "x86-64")
2677 || !strcmp (ix86_tune_string, "i686"))
2678 {
2679 if (TARGET_64BIT)
2680 ix86_tune_string = "generic64";
2681 else
2682 ix86_tune_string = "generic32";
2683 }
2684 }
2685 if (ix86_stringop_string)
2686 {
2687 if (!strcmp (ix86_stringop_string, "rep_byte"))
2688 stringop_alg = rep_prefix_1_byte;
2689 else if (!strcmp (ix86_stringop_string, "libcall"))
2690 stringop_alg = libcall;
2691 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2692 stringop_alg = rep_prefix_4_byte;
2693 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2694 && TARGET_64BIT)
2695 /* rep; movq isn't available in 32-bit code. */
2696 stringop_alg = rep_prefix_8_byte;
2697 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2698 stringop_alg = loop_1_byte;
2699 else if (!strcmp (ix86_stringop_string, "loop"))
2700 stringop_alg = loop;
2701 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2702 stringop_alg = unrolled_loop;
2703 else
2704 error ("bad value (%s) for %sstringop-strategy=%s %s",
2705 ix86_stringop_string, prefix, suffix, sw);
2706 }
2707 if (!strcmp (ix86_tune_string, "x86-64"))
2708 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2709 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2710 prefix, suffix, prefix, suffix, prefix, suffix);
2711
2712 if (!ix86_arch_string)
2713 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2714 else
2715 ix86_arch_specified = 1;
2716
2717 if (!strcmp (ix86_arch_string, "generic"))
2718 error ("generic CPU can be used only for %stune=%s %s",
2719 prefix, suffix, sw);
2720 if (!strncmp (ix86_arch_string, "generic", 7))
2721 error ("bad value (%s) for %sarch=%s %s",
2722 ix86_arch_string, prefix, suffix, sw);
2723
2724 if (ix86_cmodel_string != 0)
2725 {
2726 if (!strcmp (ix86_cmodel_string, "small"))
2727 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2728 else if (!strcmp (ix86_cmodel_string, "medium"))
2729 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2730 else if (!strcmp (ix86_cmodel_string, "large"))
2731 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2732 else if (flag_pic)
2733 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2734 else if (!strcmp (ix86_cmodel_string, "32"))
2735 ix86_cmodel = CM_32;
2736 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2737 ix86_cmodel = CM_KERNEL;
2738 else
2739 error ("bad value (%s) for %scmodel=%s %s",
2740 ix86_cmodel_string, prefix, suffix, sw);
2741 }
2742 else
2743 {
2744 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2745 use of rip-relative addressing. This eliminates fixups that
2746 would otherwise be needed if this object is to be placed in a
2747 DLL, and is essentially just as efficient as direct addressing. */
2748 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2749 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2750 else if (TARGET_64BIT)
2751 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2752 else
2753 ix86_cmodel = CM_32;
2754 }
2755 if (ix86_asm_string != 0)
2756 {
2757 if (! TARGET_MACHO
2758 && !strcmp (ix86_asm_string, "intel"))
2759 ix86_asm_dialect = ASM_INTEL;
2760 else if (!strcmp (ix86_asm_string, "att"))
2761 ix86_asm_dialect = ASM_ATT;
2762 else
2763 error ("bad value (%s) for %sasm=%s %s",
2764 ix86_asm_string, prefix, suffix, sw);
2765 }
2766 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2767 error ("code model %qs not supported in the %s bit mode",
2768 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2769 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2770 sorry ("%i-bit mode not compiled in",
2771 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2772
2773 for (i = 0; i < pta_size; i++)
2774 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2775 {
2776 ix86_schedule = processor_alias_table[i].schedule;
2777 ix86_arch = processor_alias_table[i].processor;
2778 /* Default cpu tuning to the architecture. */
2779 ix86_tune = ix86_arch;
2780
2781 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2782 error ("CPU you selected does not support x86-64 "
2783 "instruction set");
2784
2785 if (processor_alias_table[i].flags & PTA_MMX
2786 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2787 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2788 if (processor_alias_table[i].flags & PTA_3DNOW
2789 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2790 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2791 if (processor_alias_table[i].flags & PTA_3DNOW_A
2792 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2793 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2794 if (processor_alias_table[i].flags & PTA_SSE
2795 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2796 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2797 if (processor_alias_table[i].flags & PTA_SSE2
2798 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2799 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2800 if (processor_alias_table[i].flags & PTA_SSE3
2801 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2802 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2803 if (processor_alias_table[i].flags & PTA_SSSE3
2804 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2805 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2806 if (processor_alias_table[i].flags & PTA_SSE4_1
2807 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2808 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2809 if (processor_alias_table[i].flags & PTA_SSE4_2
2810 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2811 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2812 if (processor_alias_table[i].flags & PTA_AVX
2813 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2814 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2815 if (processor_alias_table[i].flags & PTA_FMA
2816 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2817 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2818 if (processor_alias_table[i].flags & PTA_SSE4A
2819 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2820 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2821 if (processor_alias_table[i].flags & PTA_SSE5
2822 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2823 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2824 if (processor_alias_table[i].flags & PTA_ABM
2825 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2826 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2827 if (processor_alias_table[i].flags & PTA_CX16
2828 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2829 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2830 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2831 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2832 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2833 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2834 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2835 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2836 if (processor_alias_table[i].flags & PTA_AES
2837 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2838 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2839 if (processor_alias_table[i].flags & PTA_PCLMUL
2840 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2841 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2842 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2843 x86_prefetch_sse = true;
2844
2845 break;
2846 }
2847
2848 if (i == pta_size)
2849 error ("bad value (%s) for %sarch=%s %s",
2850 ix86_arch_string, prefix, suffix, sw);
2851
2852 ix86_arch_mask = 1u << ix86_arch;
2853 for (i = 0; i < X86_ARCH_LAST; ++i)
2854 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2855
2856 for (i = 0; i < pta_size; i++)
2857 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2858 {
2859 ix86_schedule = processor_alias_table[i].schedule;
2860 ix86_tune = processor_alias_table[i].processor;
2861 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2862 {
2863 if (ix86_tune_defaulted)
2864 {
2865 ix86_tune_string = "x86-64";
2866 for (i = 0; i < pta_size; i++)
2867 if (! strcmp (ix86_tune_string,
2868 processor_alias_table[i].name))
2869 break;
2870 ix86_schedule = processor_alias_table[i].schedule;
2871 ix86_tune = processor_alias_table[i].processor;
2872 }
2873 else
2874 error ("CPU you selected does not support x86-64 "
2875 "instruction set");
2876 }
2877 /* Intel CPUs have always interpreted SSE prefetch instructions as
2878 NOPs; so, we can enable SSE prefetch instructions even when
2879 -mtune (rather than -march) points us to a processor that has them.
2880 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2881 higher processors. */
2882 if (TARGET_CMOVE
2883 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2884 x86_prefetch_sse = true;
2885 break;
2886 }
2887 if (i == pta_size)
2888 error ("bad value (%s) for %stune=%s %s",
2889 ix86_tune_string, prefix, suffix, sw);
2890
2891 ix86_tune_mask = 1u << ix86_tune;
2892 for (i = 0; i < X86_TUNE_LAST; ++i)
2893 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2894
2895 if (optimize_size)
2896 ix86_cost = &ix86_size_cost;
2897 else
2898 ix86_cost = processor_target_table[ix86_tune].cost;
2899
2900 /* Arrange to set up i386_stack_locals for all functions. */
2901 init_machine_status = ix86_init_machine_status;
2902
2903 /* Validate -mregparm= value. */
2904 if (ix86_regparm_string)
2905 {
2906 if (TARGET_64BIT)
2907 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2908 i = atoi (ix86_regparm_string);
2909 if (i < 0 || i > REGPARM_MAX)
2910 error ("%sregparm=%d%s is not between 0 and %d",
2911 prefix, i, suffix, REGPARM_MAX);
2912 else
2913 ix86_regparm = i;
2914 }
2915 if (TARGET_64BIT)
2916 ix86_regparm = REGPARM_MAX;
2917
2918 /* If the user has provided any of the -malign-* options,
2919 warn and use that value only if -falign-* is not set.
2920 Remove this code in GCC 3.2 or later. */
2921 if (ix86_align_loops_string)
2922 {
2923 warning (0, "%salign-loops%s is obsolete, use %salign-loops%s",
2924 prefix, suffix, prefix, suffix);
2925 if (align_loops == 0)
2926 {
2927 i = atoi (ix86_align_loops_string);
2928 if (i < 0 || i > MAX_CODE_ALIGN)
2929 error ("%salign-loops=%d%s is not between 0 and %d",
2930 prefix, i, suffix, MAX_CODE_ALIGN);
2931 else
2932 align_loops = 1 << i;
2933 }
2934 }
2935
2936 if (ix86_align_jumps_string)
2937 {
2938 warning (0, "%salign-jumps%s is obsolete, use %salign-jumps%s",
2939 prefix, suffix, prefix, suffix);
2940 if (align_jumps == 0)
2941 {
2942 i = atoi (ix86_align_jumps_string);
2943 if (i < 0 || i > MAX_CODE_ALIGN)
2944 error ("%salign-loops=%d%s is not between 0 and %d",
2945 prefix, i, suffix, MAX_CODE_ALIGN);
2946 else
2947 align_jumps = 1 << i;
2948 }
2949 }
2950
2951 if (ix86_align_funcs_string)
2952 {
2953 warning (0, "%salign-functions%s is obsolete, use %salign-functions%s",
2954 prefix, suffix, prefix, suffix);
2955 if (align_functions == 0)
2956 {
2957 i = atoi (ix86_align_funcs_string);
2958 if (i < 0 || i > MAX_CODE_ALIGN)
2959 error ("%salign-loops=%d%s is not between 0 and %d",
2960 prefix, i, suffix, MAX_CODE_ALIGN);
2961 else
2962 align_functions = 1 << i;
2963 }
2964 }
2965
2966 /* Default align_* from the processor table. */
2967 if (align_loops == 0)
2968 {
2969 align_loops = processor_target_table[ix86_tune].align_loop;
2970 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2971 }
2972 if (align_jumps == 0)
2973 {
2974 align_jumps = processor_target_table[ix86_tune].align_jump;
2975 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2976 }
2977 if (align_functions == 0)
2978 {
2979 align_functions = processor_target_table[ix86_tune].align_func;
2980 }
2981
2982 /* Validate -mbranch-cost= value, or provide default. */
2983 ix86_branch_cost = ix86_cost->branch_cost;
2984 if (ix86_branch_cost_string)
2985 {
2986 i = atoi (ix86_branch_cost_string);
2987 if (i < 0 || i > 5)
2988 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
2989 else
2990 ix86_branch_cost = i;
2991 }
2992 if (ix86_section_threshold_string)
2993 {
2994 i = atoi (ix86_section_threshold_string);
2995 if (i < 0)
2996 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
2997 else
2998 ix86_section_threshold = i;
2999 }
3000
3001 if (ix86_tls_dialect_string)
3002 {
3003 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3004 ix86_tls_dialect = TLS_DIALECT_GNU;
3005 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3006 ix86_tls_dialect = TLS_DIALECT_GNU2;
3007 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3008 ix86_tls_dialect = TLS_DIALECT_SUN;
3009 else
3010 error ("bad value (%s) for %stls-dialect=%s %s",
3011 ix86_tls_dialect_string, prefix, suffix, sw);
3012 }
3013
3014 if (ix87_precision_string)
3015 {
3016 i = atoi (ix87_precision_string);
3017 if (i != 32 && i != 64 && i != 80)
3018 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3019 }
3020
3021 if (TARGET_64BIT)
3022 {
3023 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3024
3025 /* Enable by default the SSE and MMX builtins. Do allow the user to
3026 explicitly disable any of these. In particular, disabling SSE and
3027 MMX for kernel code is extremely useful. */
3028 if (!ix86_arch_specified)
3029 ix86_isa_flags
3030 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3031 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3032
3033 if (TARGET_RTD)
3034 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3035 }
3036 else
3037 {
3038 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3039
3040 if (!ix86_arch_specified)
3041 ix86_isa_flags
3042 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3043
3044 /* i386 ABI does not specify red zone. It still makes sense to use it
3045 when programmer takes care to stack from being destroyed. */
3046 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3047 target_flags |= MASK_NO_RED_ZONE;
3048 }
3049
3050 /* Keep nonleaf frame pointers. */
3051 if (flag_omit_frame_pointer)
3052 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3053 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3054 flag_omit_frame_pointer = 1;
3055
3056 /* If we're doing fast math, we don't care about comparison order
3057 wrt NaNs. This lets us use a shorter comparison sequence. */
3058 if (flag_finite_math_only)
3059 target_flags &= ~MASK_IEEE_FP;
3060
3061 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3062 since the insns won't need emulation. */
3063 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3064 target_flags &= ~MASK_NO_FANCY_MATH_387;
3065
3066 /* Likewise, if the target doesn't have a 387, or we've specified
3067 software floating point, don't use 387 inline intrinsics. */
3068 if (!TARGET_80387)
3069 target_flags |= MASK_NO_FANCY_MATH_387;
3070
3071 /* Turn on MMX builtins for -msse. */
3072 if (TARGET_SSE)
3073 {
3074 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3075 x86_prefetch_sse = true;
3076 }
3077
3078 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3079 if (TARGET_SSE4_2 || TARGET_ABM)
3080 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3081
3082 /* Validate -mpreferred-stack-boundary= value or default it to
3083 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3084 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3085 if (ix86_preferred_stack_boundary_string)
3086 {
3087 i = atoi (ix86_preferred_stack_boundary_string);
3088 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3089 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3090 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3091 else
3092 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3093 }
3094
3095 /* Set the default value for -mstackrealign. */
3096 if (ix86_force_align_arg_pointer == -1)
3097 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3098
3099 /* Validate -mincoming-stack-boundary= value or default it to
3100 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3101 if (ix86_force_align_arg_pointer)
3102 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3103 else
3104 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3105 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3106 if (ix86_incoming_stack_boundary_string)
3107 {
3108 i = atoi (ix86_incoming_stack_boundary_string);
3109 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3110 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3111 i, TARGET_64BIT ? 4 : 2);
3112 else
3113 {
3114 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3115 ix86_incoming_stack_boundary
3116 = ix86_user_incoming_stack_boundary;
3117 }
3118 }
3119
3120 /* Accept -msseregparm only if at least SSE support is enabled. */
3121 if (TARGET_SSEREGPARM
3122 && ! TARGET_SSE)
3123 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3124
3125 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3126 if (ix86_fpmath_string != 0)
3127 {
3128 if (! strcmp (ix86_fpmath_string, "387"))
3129 ix86_fpmath = FPMATH_387;
3130 else if (! strcmp (ix86_fpmath_string, "sse"))
3131 {
3132 if (!TARGET_SSE)
3133 {
3134 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3135 ix86_fpmath = FPMATH_387;
3136 }
3137 else
3138 ix86_fpmath = FPMATH_SSE;
3139 }
3140 else if (! strcmp (ix86_fpmath_string, "387,sse")
3141 || ! strcmp (ix86_fpmath_string, "387+sse")
3142 || ! strcmp (ix86_fpmath_string, "sse,387")
3143 || ! strcmp (ix86_fpmath_string, "sse+387")
3144 || ! strcmp (ix86_fpmath_string, "both"))
3145 {
3146 if (!TARGET_SSE)
3147 {
3148 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3149 ix86_fpmath = FPMATH_387;
3150 }
3151 else if (!TARGET_80387)
3152 {
3153 warning (0, "387 instruction set disabled, using SSE arithmetics");
3154 ix86_fpmath = FPMATH_SSE;
3155 }
3156 else
3157 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3158 }
3159 else
3160 error ("bad value (%s) for %sfpmath=%s %s",
3161 ix86_fpmath_string, prefix, suffix, sw);
3162 }
3163
3164 /* If the i387 is disabled, then do not return values in it. */
3165 if (!TARGET_80387)
3166 target_flags &= ~MASK_FLOAT_RETURNS;
3167
3168 /* Use external vectorized library in vectorizing intrinsics. */
3169 if (ix86_veclibabi_string)
3170 {
3171 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3172 ix86_veclib_handler = ix86_veclibabi_svml;
3173 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3174 ix86_veclib_handler = ix86_veclibabi_acml;
3175 else
3176 error ("unknown vectorization library ABI type (%s) for "
3177 "%sveclibabi=%s %s", ix86_veclibabi_string,
3178 prefix, suffix, sw);
3179 }
3180
3181 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3182 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3183 && !optimize_size)
3184 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3185
3186 /* ??? Unwind info is not correct around the CFG unless either a frame
3187 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3188 unwind info generation to be aware of the CFG and propagating states
3189 around edges. */
3190 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3191 || flag_exceptions || flag_non_call_exceptions)
3192 && flag_omit_frame_pointer
3193 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3194 {
3195 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3196 warning (0, "unwind tables currently require either a frame pointer "
3197 "or %saccumulate-outgoing-args%s for correctness",
3198 prefix, suffix);
3199 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3200 }
3201
3202 /* If stack probes are required, the space used for large function
3203 arguments on the stack must also be probed, so enable
3204 -maccumulate-outgoing-args so this happens in the prologue. */
3205 if (TARGET_STACK_PROBE
3206 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3207 {
3208 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3209 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3210 "for correctness", prefix, suffix);
3211 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3212 }
3213
3214 /* For sane SSE instruction set generation we need fcomi instruction.
3215 It is safe to enable all CMOVE instructions. */
3216 if (TARGET_SSE)
3217 TARGET_CMOVE = 1;
3218
3219 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3220 {
3221 char *p;
3222 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3223 p = strchr (internal_label_prefix, 'X');
3224 internal_label_prefix_len = p - internal_label_prefix;
3225 *p = '\0';
3226 }
3227
3228 /* When scheduling description is not available, disable scheduler pass
3229 so it won't slow down the compilation and make x87 code slower. */
3230 if (!TARGET_SCHEDULE)
3231 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3232
3233 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3234 set_param_value ("simultaneous-prefetches",
3235 ix86_cost->simultaneous_prefetches);
3236 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3237 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3238 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3239 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3240 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3241 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3242
3243 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3244 can be optimized to ap = __builtin_next_arg (0). */
3245 if (!TARGET_64BIT)
3246 targetm.expand_builtin_va_start = NULL;
3247
3248 if (TARGET_64BIT)
3249 {
3250 ix86_gen_leave = gen_leave_rex64;
3251 ix86_gen_pop1 = gen_popdi1;
3252 ix86_gen_add3 = gen_adddi3;
3253 ix86_gen_sub3 = gen_subdi3;
3254 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3255 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3256 ix86_gen_monitor = gen_sse3_monitor64;
3257 ix86_gen_andsp = gen_anddi3;
3258 }
3259 else
3260 {
3261 ix86_gen_leave = gen_leave;
3262 ix86_gen_pop1 = gen_popsi1;
3263 ix86_gen_add3 = gen_addsi3;
3264 ix86_gen_sub3 = gen_subsi3;
3265 ix86_gen_sub3_carry = gen_subsi3_carry;
3266 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3267 ix86_gen_monitor = gen_sse3_monitor;
3268 ix86_gen_andsp = gen_andsi3;
3269 }
3270
3271 #ifdef USE_IX86_CLD
3272 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3273 if (!TARGET_64BIT)
3274 target_flags |= MASK_CLD & ~target_flags_explicit;
3275 #endif
3276
3277 /* Save the initial options in case the user does function specific options */
3278 if (main_args_p)
3279 target_option_default_node = target_option_current_node
3280 = build_target_option_node ();
3281 }
3282 \f
3283 /* Save the current options */
3284
3285 static void
3286 ix86_function_specific_save (struct cl_target_option *ptr)
3287 {
3288 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3289 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3290 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3291 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3292 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3293
3294 ptr->arch = ix86_arch;
3295 ptr->schedule = ix86_schedule;
3296 ptr->tune = ix86_tune;
3297 ptr->fpmath = ix86_fpmath;
3298 ptr->branch_cost = ix86_branch_cost;
3299 ptr->tune_defaulted = ix86_tune_defaulted;
3300 ptr->arch_specified = ix86_arch_specified;
3301 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3302 ptr->target_flags_explicit = target_flags_explicit;
3303 }
3304
3305 /* Restore the current options */
3306
3307 static void
3308 ix86_function_specific_restore (struct cl_target_option *ptr)
3309 {
3310 enum processor_type old_tune = ix86_tune;
3311 enum processor_type old_arch = ix86_arch;
3312 unsigned int ix86_arch_mask, ix86_tune_mask;
3313 int i;
3314
3315 ix86_arch = ptr->arch;
3316 ix86_schedule = ptr->schedule;
3317 ix86_tune = ptr->tune;
3318 ix86_fpmath = ptr->fpmath;
3319 ix86_branch_cost = ptr->branch_cost;
3320 ix86_tune_defaulted = ptr->tune_defaulted;
3321 ix86_arch_specified = ptr->arch_specified;
3322 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3323 target_flags_explicit = ptr->target_flags_explicit;
3324
3325 /* Recreate the arch feature tests if the arch changed */
3326 if (old_arch != ix86_arch)
3327 {
3328 ix86_arch_mask = 1u << ix86_arch;
3329 for (i = 0; i < X86_ARCH_LAST; ++i)
3330 ix86_arch_features[i]
3331 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3332 }
3333
3334 /* Recreate the tune optimization tests */
3335 if (old_tune != ix86_tune)
3336 {
3337 ix86_tune_mask = 1u << ix86_tune;
3338 for (i = 0; i < X86_TUNE_LAST; ++i)
3339 ix86_tune_features[i]
3340 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3341 }
3342 }
3343
3344 /* Print the current options */
3345
3346 static void
3347 ix86_function_specific_print (FILE *file, int indent,
3348 struct cl_target_option *ptr)
3349 {
3350 char *target_string
3351 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3352 NULL, NULL, NULL, false);
3353
3354 fprintf (file, "%*sarch = %d (%s)\n",
3355 indent, "",
3356 ptr->arch,
3357 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3358 ? cpu_names[ptr->arch]
3359 : "<unknown>"));
3360
3361 fprintf (file, "%*stune = %d (%s)\n",
3362 indent, "",
3363 ptr->tune,
3364 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3365 ? cpu_names[ptr->tune]
3366 : "<unknown>"));
3367
3368 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3369 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3370 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3371 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3372
3373 if (target_string)
3374 {
3375 fprintf (file, "%*s%s\n", indent, "", target_string);
3376 free (target_string);
3377 }
3378 }
3379
3380 \f
3381 /* Inner function to process the attribute((target(...))), take an argument and
3382 set the current options from the argument. If we have a list, recursively go
3383 over the list. */
3384
3385 static bool
3386 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3387 {
3388 char *next_optstr;
3389 bool ret = true;
3390
3391 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3392 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3393 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3394 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3395
3396 enum ix86_opt_type
3397 {
3398 ix86_opt_unknown,
3399 ix86_opt_yes,
3400 ix86_opt_no,
3401 ix86_opt_str,
3402 ix86_opt_isa
3403 };
3404
3405 static const struct
3406 {
3407 const char *string;
3408 size_t len;
3409 enum ix86_opt_type type;
3410 int opt;
3411 int mask;
3412 } attrs[] = {
3413 /* isa options */
3414 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3415 IX86_ATTR_ISA ("abm", OPT_mabm),
3416 IX86_ATTR_ISA ("aes", OPT_maes),
3417 IX86_ATTR_ISA ("avx", OPT_mavx),
3418 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3419 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3420 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3421 IX86_ATTR_ISA ("sse", OPT_msse),
3422 IX86_ATTR_ISA ("sse2", OPT_msse2),
3423 IX86_ATTR_ISA ("sse3", OPT_msse3),
3424 IX86_ATTR_ISA ("sse4", OPT_msse4),
3425 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3426 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3427 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3428 IX86_ATTR_ISA ("sse5", OPT_msse5),
3429 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3430
3431 /* string options */
3432 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3433 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3434 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3435
3436 /* flag options */
3437 IX86_ATTR_YES ("cld",
3438 OPT_mcld,
3439 MASK_CLD),
3440
3441 IX86_ATTR_NO ("fancy-math-387",
3442 OPT_mfancy_math_387,
3443 MASK_NO_FANCY_MATH_387),
3444
3445 IX86_ATTR_NO ("fused-madd",
3446 OPT_mfused_madd,
3447 MASK_NO_FUSED_MADD),
3448
3449 IX86_ATTR_YES ("ieee-fp",
3450 OPT_mieee_fp,
3451 MASK_IEEE_FP),
3452
3453 IX86_ATTR_YES ("inline-all-stringops",
3454 OPT_minline_all_stringops,
3455 MASK_INLINE_ALL_STRINGOPS),
3456
3457 IX86_ATTR_YES ("inline-stringops-dynamically",
3458 OPT_minline_stringops_dynamically,
3459 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3460
3461 IX86_ATTR_NO ("align-stringops",
3462 OPT_mno_align_stringops,
3463 MASK_NO_ALIGN_STRINGOPS),
3464
3465 IX86_ATTR_YES ("recip",
3466 OPT_mrecip,
3467 MASK_RECIP),
3468
3469 };
3470
3471 /* If this is a list, recurse to get the options. */
3472 if (TREE_CODE (args) == TREE_LIST)
3473 {
3474 bool ret = true;
3475
3476 for (; args; args = TREE_CHAIN (args))
3477 if (TREE_VALUE (args)
3478 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3479 ret = false;
3480
3481 return ret;
3482 }
3483
3484 else if (TREE_CODE (args) != STRING_CST)
3485 gcc_unreachable ();
3486
3487 /* Handle multiple arguments separated by commas. */
3488 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3489
3490 while (next_optstr && *next_optstr != '\0')
3491 {
3492 char *p = next_optstr;
3493 char *orig_p = p;
3494 char *comma = strchr (next_optstr, ',');
3495 const char *opt_string;
3496 size_t len, opt_len;
3497 int opt;
3498 bool opt_set_p;
3499 char ch;
3500 unsigned i;
3501 enum ix86_opt_type type = ix86_opt_unknown;
3502 int mask = 0;
3503
3504 if (comma)
3505 {
3506 *comma = '\0';
3507 len = comma - next_optstr;
3508 next_optstr = comma + 1;
3509 }
3510 else
3511 {
3512 len = strlen (p);
3513 next_optstr = NULL;
3514 }
3515
3516 /* Recognize no-xxx. */
3517 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3518 {
3519 opt_set_p = false;
3520 p += 3;
3521 len -= 3;
3522 }
3523 else
3524 opt_set_p = true;
3525
3526 /* Find the option. */
3527 ch = *p;
3528 opt = N_OPTS;
3529 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3530 {
3531 type = attrs[i].type;
3532 opt_len = attrs[i].len;
3533 if (ch == attrs[i].string[0]
3534 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3535 && memcmp (p, attrs[i].string, opt_len) == 0)
3536 {
3537 opt = attrs[i].opt;
3538 mask = attrs[i].mask;
3539 opt_string = attrs[i].string;
3540 break;
3541 }
3542 }
3543
3544 /* Process the option. */
3545 if (opt == N_OPTS)
3546 {
3547 error ("attribute(target(\"%s\")) is unknown", orig_p);
3548 ret = false;
3549 }
3550
3551 else if (type == ix86_opt_isa)
3552 ix86_handle_option (opt, p, opt_set_p);
3553
3554 else if (type == ix86_opt_yes || type == ix86_opt_no)
3555 {
3556 if (type == ix86_opt_no)
3557 opt_set_p = !opt_set_p;
3558
3559 if (opt_set_p)
3560 target_flags |= mask;
3561 else
3562 target_flags &= ~mask;
3563 }
3564
3565 else if (type == ix86_opt_str)
3566 {
3567 if (p_strings[opt])
3568 {
3569 error ("option(\"%s\") was already specified", opt_string);
3570 ret = false;
3571 }
3572 else
3573 p_strings[opt] = xstrdup (p + opt_len);
3574 }
3575
3576 else
3577 gcc_unreachable ();
3578 }
3579
3580 return ret;
3581 }
3582
3583 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3584
3585 tree
3586 ix86_valid_target_attribute_tree (tree args)
3587 {
3588 const char *orig_arch_string = ix86_arch_string;
3589 const char *orig_tune_string = ix86_tune_string;
3590 const char *orig_fpmath_string = ix86_fpmath_string;
3591 int orig_tune_defaulted = ix86_tune_defaulted;
3592 int orig_arch_specified = ix86_arch_specified;
3593 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3594 tree t = NULL_TREE;
3595 int i;
3596 struct cl_target_option *def
3597 = TREE_TARGET_OPTION (target_option_default_node);
3598
3599 /* Process each of the options on the chain. */
3600 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3601 return NULL_TREE;
3602
3603 /* If the changed options are different from the default, rerun override_options,
3604 and then save the options away. The string options are are attribute options,
3605 and will be undone when we copy the save structure. */
3606 if (ix86_isa_flags != def->ix86_isa_flags
3607 || target_flags != def->target_flags
3608 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3609 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3610 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3611 {
3612 /* If we are using the default tune= or arch=, undo the string assigned,
3613 and use the default. */
3614 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3615 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3616 else if (!orig_arch_specified)
3617 ix86_arch_string = NULL;
3618
3619 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3620 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3621 else if (orig_tune_defaulted)
3622 ix86_tune_string = NULL;
3623
3624 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3625 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3626 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3627 else if (!TARGET_64BIT && TARGET_SSE)
3628 ix86_fpmath_string = "sse,387";
3629
3630 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3631 override_options (false);
3632
3633 /* Add any builtin functions with the new isa if any. */
3634 ix86_add_new_builtins (ix86_isa_flags);
3635
3636 /* Save the current options unless we are validating options for
3637 #pragma. */
3638 t = build_target_option_node ();
3639
3640 ix86_arch_string = orig_arch_string;
3641 ix86_tune_string = orig_tune_string;
3642 ix86_fpmath_string = orig_fpmath_string;
3643
3644 /* Free up memory allocated to hold the strings */
3645 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3646 if (option_strings[i])
3647 free (option_strings[i]);
3648 }
3649
3650 return t;
3651 }
3652
3653 /* Hook to validate attribute((target("string"))). */
3654
3655 static bool
3656 ix86_valid_target_attribute_p (tree fndecl,
3657 tree ARG_UNUSED (name),
3658 tree args,
3659 int ARG_UNUSED (flags))
3660 {
3661 struct cl_target_option cur_target;
3662 bool ret = true;
3663 tree old_optimize = build_optimization_node ();
3664 tree new_target, new_optimize;
3665 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3666
3667 /* If the function changed the optimization levels as well as setting target
3668 options, start with the optimizations specified. */
3669 if (func_optimize && func_optimize != old_optimize)
3670 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3671
3672 /* The target attributes may also change some optimization flags, so update
3673 the optimization options if necessary. */
3674 cl_target_option_save (&cur_target);
3675 new_target = ix86_valid_target_attribute_tree (args);
3676 new_optimize = build_optimization_node ();
3677
3678 if (!new_target)
3679 ret = false;
3680
3681 else if (fndecl)
3682 {
3683 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3684
3685 if (old_optimize != new_optimize)
3686 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3687 }
3688
3689 cl_target_option_restore (&cur_target);
3690
3691 if (old_optimize != new_optimize)
3692 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3693
3694 return ret;
3695 }
3696
3697 \f
3698 /* Hook to determine if one function can safely inline another. */
3699
3700 static bool
3701 ix86_can_inline_p (tree caller, tree callee)
3702 {
3703 bool ret = false;
3704 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3705 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3706
3707 /* If callee has no option attributes, then it is ok to inline. */
3708 if (!callee_tree)
3709 ret = true;
3710
3711 /* If caller has no option attributes, but callee does then it is not ok to
3712 inline. */
3713 else if (!caller_tree)
3714 ret = false;
3715
3716 else
3717 {
3718 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3719 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3720
3721 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3722 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3723 function. */
3724 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3725 != callee_opts->ix86_isa_flags)
3726 ret = false;
3727
3728 /* See if we have the same non-isa options. */
3729 else if (caller_opts->target_flags != callee_opts->target_flags)
3730 ret = false;
3731
3732 /* See if arch, tune, etc. are the same. */
3733 else if (caller_opts->arch != callee_opts->arch)
3734 ret = false;
3735
3736 else if (caller_opts->tune != callee_opts->tune)
3737 ret = false;
3738
3739 else if (caller_opts->fpmath != callee_opts->fpmath)
3740 ret = false;
3741
3742 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3743 ret = false;
3744
3745 else
3746 ret = true;
3747 }
3748
3749 return ret;
3750 }
3751
3752 \f
3753 /* Remember the last target of ix86_set_current_function. */
3754 static GTY(()) tree ix86_previous_fndecl;
3755
3756 /* Establish appropriate back-end context for processing the function
3757 FNDECL. The argument might be NULL to indicate processing at top
3758 level, outside of any function scope. */
3759 static void
3760 ix86_set_current_function (tree fndecl)
3761 {
3762 /* Only change the context if the function changes. This hook is called
3763 several times in the course of compiling a function, and we don't want to
3764 slow things down too much or call target_reinit when it isn't safe. */
3765 if (fndecl && fndecl != ix86_previous_fndecl)
3766 {
3767 tree old_tree = (ix86_previous_fndecl
3768 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3769 : NULL_TREE);
3770
3771 tree new_tree = (fndecl
3772 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3773 : NULL_TREE);
3774
3775 ix86_previous_fndecl = fndecl;
3776 if (old_tree == new_tree)
3777 ;
3778
3779 else if (new_tree)
3780 {
3781 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3782 target_reinit ();
3783 }
3784
3785 else if (old_tree)
3786 {
3787 struct cl_target_option *def
3788 = TREE_TARGET_OPTION (target_option_current_node);
3789
3790 cl_target_option_restore (def);
3791 target_reinit ();
3792 }
3793 }
3794 }
3795
3796 \f
3797 /* Return true if this goes in large data/bss. */
3798
3799 static bool
3800 ix86_in_large_data_p (tree exp)
3801 {
3802 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3803 return false;
3804
3805 /* Functions are never large data. */
3806 if (TREE_CODE (exp) == FUNCTION_DECL)
3807 return false;
3808
3809 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3810 {
3811 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3812 if (strcmp (section, ".ldata") == 0
3813 || strcmp (section, ".lbss") == 0)
3814 return true;
3815 return false;
3816 }
3817 else
3818 {
3819 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3820
3821 /* If this is an incomplete type with size 0, then we can't put it
3822 in data because it might be too big when completed. */
3823 if (!size || size > ix86_section_threshold)
3824 return true;
3825 }
3826
3827 return false;
3828 }
3829
3830 /* Switch to the appropriate section for output of DECL.
3831 DECL is either a `VAR_DECL' node or a constant of some sort.
3832 RELOC indicates whether forming the initial value of DECL requires
3833 link-time relocations. */
3834
3835 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3836 ATTRIBUTE_UNUSED;
3837
3838 static section *
3839 x86_64_elf_select_section (tree decl, int reloc,
3840 unsigned HOST_WIDE_INT align)
3841 {
3842 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3843 && ix86_in_large_data_p (decl))
3844 {
3845 const char *sname = NULL;
3846 unsigned int flags = SECTION_WRITE;
3847 switch (categorize_decl_for_section (decl, reloc))
3848 {
3849 case SECCAT_DATA:
3850 sname = ".ldata";
3851 break;
3852 case SECCAT_DATA_REL:
3853 sname = ".ldata.rel";
3854 break;
3855 case SECCAT_DATA_REL_LOCAL:
3856 sname = ".ldata.rel.local";
3857 break;
3858 case SECCAT_DATA_REL_RO:
3859 sname = ".ldata.rel.ro";
3860 break;
3861 case SECCAT_DATA_REL_RO_LOCAL:
3862 sname = ".ldata.rel.ro.local";
3863 break;
3864 case SECCAT_BSS:
3865 sname = ".lbss";
3866 flags |= SECTION_BSS;
3867 break;
3868 case SECCAT_RODATA:
3869 case SECCAT_RODATA_MERGE_STR:
3870 case SECCAT_RODATA_MERGE_STR_INIT:
3871 case SECCAT_RODATA_MERGE_CONST:
3872 sname = ".lrodata";
3873 flags = 0;
3874 break;
3875 case SECCAT_SRODATA:
3876 case SECCAT_SDATA:
3877 case SECCAT_SBSS:
3878 gcc_unreachable ();
3879 case SECCAT_TEXT:
3880 case SECCAT_TDATA:
3881 case SECCAT_TBSS:
3882 /* We don't split these for medium model. Place them into
3883 default sections and hope for best. */
3884 break;
3885 case SECCAT_EMUTLS_VAR:
3886 case SECCAT_EMUTLS_TMPL:
3887 gcc_unreachable ();
3888 }
3889 if (sname)
3890 {
3891 /* We might get called with string constants, but get_named_section
3892 doesn't like them as they are not DECLs. Also, we need to set
3893 flags in that case. */
3894 if (!DECL_P (decl))
3895 return get_section (sname, flags, NULL);
3896 return get_named_section (decl, sname, reloc);
3897 }
3898 }
3899 return default_elf_select_section (decl, reloc, align);
3900 }
3901
3902 /* Build up a unique section name, expressed as a
3903 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
3904 RELOC indicates whether the initial value of EXP requires
3905 link-time relocations. */
3906
3907 static void ATTRIBUTE_UNUSED
3908 x86_64_elf_unique_section (tree decl, int reloc)
3909 {
3910 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3911 && ix86_in_large_data_p (decl))
3912 {
3913 const char *prefix = NULL;
3914 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
3915 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
3916
3917 switch (categorize_decl_for_section (decl, reloc))
3918 {
3919 case SECCAT_DATA:
3920 case SECCAT_DATA_REL:
3921 case SECCAT_DATA_REL_LOCAL:
3922 case SECCAT_DATA_REL_RO:
3923 case SECCAT_DATA_REL_RO_LOCAL:
3924 prefix = one_only ? ".ld" : ".ldata";
3925 break;
3926 case SECCAT_BSS:
3927 prefix = one_only ? ".lb" : ".lbss";
3928 break;
3929 case SECCAT_RODATA:
3930 case SECCAT_RODATA_MERGE_STR:
3931 case SECCAT_RODATA_MERGE_STR_INIT:
3932 case SECCAT_RODATA_MERGE_CONST:
3933 prefix = one_only ? ".lr" : ".lrodata";
3934 break;
3935 case SECCAT_SRODATA:
3936 case SECCAT_SDATA:
3937 case SECCAT_SBSS:
3938 gcc_unreachable ();
3939 case SECCAT_TEXT:
3940 case SECCAT_TDATA:
3941 case SECCAT_TBSS:
3942 /* We don't split these for medium model. Place them into
3943 default sections and hope for best. */
3944 break;
3945 case SECCAT_EMUTLS_VAR:
3946 prefix = targetm.emutls.var_section;
3947 break;
3948 case SECCAT_EMUTLS_TMPL:
3949 prefix = targetm.emutls.tmpl_section;
3950 break;
3951 }
3952 if (prefix)
3953 {
3954 const char *name, *linkonce;
3955 char *string;
3956
3957 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
3958 name = targetm.strip_name_encoding (name);
3959
3960 /* If we're using one_only, then there needs to be a .gnu.linkonce
3961 prefix to the section name. */
3962 linkonce = one_only ? ".gnu.linkonce" : "";
3963
3964 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
3965
3966 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
3967 return;
3968 }
3969 }
3970 default_unique_section (decl, reloc);
3971 }
3972
3973 #ifdef COMMON_ASM_OP
3974 /* This says how to output assembler code to declare an
3975 uninitialized external linkage data object.
3976
3977 For medium model x86-64 we need to use .largecomm opcode for
3978 large objects. */
3979 void
3980 x86_elf_aligned_common (FILE *file,
3981 const char *name, unsigned HOST_WIDE_INT size,
3982 int align)
3983 {
3984 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3985 && size > (unsigned int)ix86_section_threshold)
3986 fprintf (file, ".largecomm\t");
3987 else
3988 fprintf (file, "%s", COMMON_ASM_OP);
3989 assemble_name (file, name);
3990 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
3991 size, align / BITS_PER_UNIT);
3992 }
3993 #endif
3994
3995 /* Utility function for targets to use in implementing
3996 ASM_OUTPUT_ALIGNED_BSS. */
3997
3998 void
3999 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4000 const char *name, unsigned HOST_WIDE_INT size,
4001 int align)
4002 {
4003 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4004 && size > (unsigned int)ix86_section_threshold)
4005 switch_to_section (get_named_section (decl, ".lbss", 0));
4006 else
4007 switch_to_section (bss_section);
4008 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4009 #ifdef ASM_DECLARE_OBJECT_NAME
4010 last_assemble_variable_decl = decl;
4011 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4012 #else
4013 /* Standard thing is just output label for the object. */
4014 ASM_OUTPUT_LABEL (file, name);
4015 #endif /* ASM_DECLARE_OBJECT_NAME */
4016 ASM_OUTPUT_SKIP (file, size ? size : 1);
4017 }
4018 \f
4019 void
4020 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4021 {
4022 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4023 make the problem with not enough registers even worse. */
4024 #ifdef INSN_SCHEDULING
4025 if (level > 1)
4026 flag_schedule_insns = 0;
4027 #endif
4028
4029 if (TARGET_MACHO)
4030 /* The Darwin libraries never set errno, so we might as well
4031 avoid calling them when that's the only reason we would. */
4032 flag_errno_math = 0;
4033
4034 /* The default values of these switches depend on the TARGET_64BIT
4035 that is not known at this moment. Mark these values with 2 and
4036 let user the to override these. In case there is no command line option
4037 specifying them, we will set the defaults in override_options. */
4038 if (optimize >= 1)
4039 flag_omit_frame_pointer = 2;
4040 flag_pcc_struct_return = 2;
4041 flag_asynchronous_unwind_tables = 2;
4042 flag_vect_cost_model = 1;
4043 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4044 SUBTARGET_OPTIMIZATION_OPTIONS;
4045 #endif
4046 }
4047 \f
4048 /* Decide whether we can make a sibling call to a function. DECL is the
4049 declaration of the function being targeted by the call and EXP is the
4050 CALL_EXPR representing the call. */
4051
4052 static bool
4053 ix86_function_ok_for_sibcall (tree decl, tree exp)
4054 {
4055 tree func;
4056 rtx a, b;
4057
4058 /* If we are generating position-independent code, we cannot sibcall
4059 optimize any indirect call, or a direct call to a global function,
4060 as the PLT requires %ebx be live. */
4061 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4062 return false;
4063
4064 if (decl)
4065 func = decl;
4066 else
4067 {
4068 func = TREE_TYPE (CALL_EXPR_FN (exp));
4069 if (POINTER_TYPE_P (func))
4070 func = TREE_TYPE (func);
4071 }
4072
4073 /* Check that the return value locations are the same. Like
4074 if we are returning floats on the 80387 register stack, we cannot
4075 make a sibcall from a function that doesn't return a float to a
4076 function that does or, conversely, from a function that does return
4077 a float to a function that doesn't; the necessary stack adjustment
4078 would not be executed. This is also the place we notice
4079 differences in the return value ABI. Note that it is ok for one
4080 of the functions to have void return type as long as the return
4081 value of the other is passed in a register. */
4082 a = ix86_function_value (TREE_TYPE (exp), func, false);
4083 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4084 cfun->decl, false);
4085 if (STACK_REG_P (a) || STACK_REG_P (b))
4086 {
4087 if (!rtx_equal_p (a, b))
4088 return false;
4089 }
4090 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4091 ;
4092 else if (!rtx_equal_p (a, b))
4093 return false;
4094
4095 /* If this call is indirect, we'll need to be able to use a call-clobbered
4096 register for the address of the target function. Make sure that all
4097 such registers are not used for passing parameters. */
4098 if (!decl && !TARGET_64BIT)
4099 {
4100 tree type;
4101
4102 /* We're looking at the CALL_EXPR, we need the type of the function. */
4103 type = CALL_EXPR_FN (exp); /* pointer expression */
4104 type = TREE_TYPE (type); /* pointer type */
4105 type = TREE_TYPE (type); /* function type */
4106
4107 if (ix86_function_regparm (type, NULL) >= 3)
4108 {
4109 /* ??? Need to count the actual number of registers to be used,
4110 not the possible number of registers. Fix later. */
4111 return false;
4112 }
4113 }
4114
4115 /* Dllimport'd functions are also called indirectly. */
4116 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4117 && !TARGET_64BIT
4118 && decl && DECL_DLLIMPORT_P (decl)
4119 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4120 return false;
4121
4122 /* If we need to align the outgoing stack, then sibcalling would
4123 unalign the stack, which may break the called function. */
4124 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4125 return false;
4126
4127 /* Otherwise okay. That also includes certain types of indirect calls. */
4128 return true;
4129 }
4130
4131 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4132 calling convention attributes;
4133 arguments as in struct attribute_spec.handler. */
4134
4135 static tree
4136 ix86_handle_cconv_attribute (tree *node, tree name,
4137 tree args,
4138 int flags ATTRIBUTE_UNUSED,
4139 bool *no_add_attrs)
4140 {
4141 if (TREE_CODE (*node) != FUNCTION_TYPE
4142 && TREE_CODE (*node) != METHOD_TYPE
4143 && TREE_CODE (*node) != FIELD_DECL
4144 && TREE_CODE (*node) != TYPE_DECL)
4145 {
4146 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4147 IDENTIFIER_POINTER (name));
4148 *no_add_attrs = true;
4149 return NULL_TREE;
4150 }
4151
4152 /* Can combine regparm with all attributes but fastcall. */
4153 if (is_attribute_p ("regparm", name))
4154 {
4155 tree cst;
4156
4157 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4158 {
4159 error ("fastcall and regparm attributes are not compatible");
4160 }
4161
4162 cst = TREE_VALUE (args);
4163 if (TREE_CODE (cst) != INTEGER_CST)
4164 {
4165 warning (OPT_Wattributes,
4166 "%qs attribute requires an integer constant argument",
4167 IDENTIFIER_POINTER (name));
4168 *no_add_attrs = true;
4169 }
4170 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4171 {
4172 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4173 IDENTIFIER_POINTER (name), REGPARM_MAX);
4174 *no_add_attrs = true;
4175 }
4176
4177 return NULL_TREE;
4178 }
4179
4180 if (TARGET_64BIT)
4181 {
4182 /* Do not warn when emulating the MS ABI. */
4183 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4184 warning (OPT_Wattributes, "%qs attribute ignored",
4185 IDENTIFIER_POINTER (name));
4186 *no_add_attrs = true;
4187 return NULL_TREE;
4188 }
4189
4190 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4191 if (is_attribute_p ("fastcall", name))
4192 {
4193 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4194 {
4195 error ("fastcall and cdecl attributes are not compatible");
4196 }
4197 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4198 {
4199 error ("fastcall and stdcall attributes are not compatible");
4200 }
4201 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4202 {
4203 error ("fastcall and regparm attributes are not compatible");
4204 }
4205 }
4206
4207 /* Can combine stdcall with fastcall (redundant), regparm and
4208 sseregparm. */
4209 else if (is_attribute_p ("stdcall", name))
4210 {
4211 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4212 {
4213 error ("stdcall and cdecl attributes are not compatible");
4214 }
4215 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4216 {
4217 error ("stdcall and fastcall attributes are not compatible");
4218 }
4219 }
4220
4221 /* Can combine cdecl with regparm and sseregparm. */
4222 else if (is_attribute_p ("cdecl", name))
4223 {
4224 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4225 {
4226 error ("stdcall and cdecl attributes are not compatible");
4227 }
4228 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4229 {
4230 error ("fastcall and cdecl attributes are not compatible");
4231 }
4232 }
4233
4234 /* Can combine sseregparm with all attributes. */
4235
4236 return NULL_TREE;
4237 }
4238
4239 /* Return 0 if the attributes for two types are incompatible, 1 if they
4240 are compatible, and 2 if they are nearly compatible (which causes a
4241 warning to be generated). */
4242
4243 static int
4244 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4245 {
4246 /* Check for mismatch of non-default calling convention. */
4247 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4248
4249 if (TREE_CODE (type1) != FUNCTION_TYPE
4250 && TREE_CODE (type1) != METHOD_TYPE)
4251 return 1;
4252
4253 /* Check for mismatched fastcall/regparm types. */
4254 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4255 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4256 || (ix86_function_regparm (type1, NULL)
4257 != ix86_function_regparm (type2, NULL)))
4258 return 0;
4259
4260 /* Check for mismatched sseregparm types. */
4261 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4262 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4263 return 0;
4264
4265 /* Check for mismatched return types (cdecl vs stdcall). */
4266 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4267 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4268 return 0;
4269
4270 return 1;
4271 }
4272 \f
4273 /* Return the regparm value for a function with the indicated TYPE and DECL.
4274 DECL may be NULL when calling function indirectly
4275 or considering a libcall. */
4276
4277 static int
4278 ix86_function_regparm (const_tree type, const_tree decl)
4279 {
4280 tree attr;
4281 int regparm = ix86_regparm;
4282
4283 static bool error_issued;
4284
4285 if (TARGET_64BIT)
4286 {
4287 if (ix86_function_type_abi (type) == DEFAULT_ABI)
4288 return regparm;
4289 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
4290 }
4291
4292 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4293 if (attr)
4294 {
4295 regparm
4296 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4297
4298 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4299 {
4300 /* We can't use regparm(3) for nested functions because
4301 these pass static chain pointer in %ecx register. */
4302 if (!error_issued && regparm == 3
4303 && decl_function_context (decl)
4304 && !DECL_NO_STATIC_CHAIN (decl))
4305 {
4306 error ("nested functions are limited to 2 register parameters");
4307 error_issued = true;
4308 return 0;
4309 }
4310 }
4311
4312 return regparm;
4313 }
4314
4315 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4316 return 2;
4317
4318 /* Use register calling convention for local functions when possible. */
4319 if (decl && TREE_CODE (decl) == FUNCTION_DECL
4320 && !profile_flag)
4321 {
4322 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4323 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4324 if (i && i->local)
4325 {
4326 int local_regparm, globals = 0, regno;
4327 struct function *f;
4328
4329 /* Make sure no regparm register is taken by a
4330 fixed register variable. */
4331 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4332 if (fixed_regs[local_regparm])
4333 break;
4334
4335 /* We can't use regparm(3) for nested functions as these use
4336 static chain pointer in third argument. */
4337 if (local_regparm == 3
4338 && decl_function_context (decl)
4339 && !DECL_NO_STATIC_CHAIN (decl))
4340 local_regparm = 2;
4341
4342 /* If the function realigns its stackpointer, the prologue will
4343 clobber %ecx. If we've already generated code for the callee,
4344 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4345 scanning the attributes for the self-realigning property. */
4346 f = DECL_STRUCT_FUNCTION (decl);
4347 /* Since current internal arg pointer won't conflict with
4348 parameter passing regs, so no need to change stack
4349 realignment and adjust regparm number.
4350
4351 Each fixed register usage increases register pressure,
4352 so less registers should be used for argument passing.
4353 This functionality can be overriden by an explicit
4354 regparm value. */
4355 for (regno = 0; regno <= DI_REG; regno++)
4356 if (fixed_regs[regno])
4357 globals++;
4358
4359 local_regparm
4360 = globals < local_regparm ? local_regparm - globals : 0;
4361
4362 if (local_regparm > regparm)
4363 regparm = local_regparm;
4364 }
4365 }
4366
4367 return regparm;
4368 }
4369
4370 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4371 DFmode (2) arguments in SSE registers for a function with the
4372 indicated TYPE and DECL. DECL may be NULL when calling function
4373 indirectly or considering a libcall. Otherwise return 0. */
4374
4375 static int
4376 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4377 {
4378 gcc_assert (!TARGET_64BIT);
4379
4380 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4381 by the sseregparm attribute. */
4382 if (TARGET_SSEREGPARM
4383 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4384 {
4385 if (!TARGET_SSE)
4386 {
4387 if (warn)
4388 {
4389 if (decl)
4390 error ("Calling %qD with attribute sseregparm without "
4391 "SSE/SSE2 enabled", decl);
4392 else
4393 error ("Calling %qT with attribute sseregparm without "
4394 "SSE/SSE2 enabled", type);
4395 }
4396 return 0;
4397 }
4398
4399 return 2;
4400 }
4401
4402 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4403 (and DFmode for SSE2) arguments in SSE registers. */
4404 if (decl && TARGET_SSE_MATH && !profile_flag)
4405 {
4406 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4407 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4408 if (i && i->local)
4409 return TARGET_SSE2 ? 2 : 1;
4410 }
4411
4412 return 0;
4413 }
4414
4415 /* Return true if EAX is live at the start of the function. Used by
4416 ix86_expand_prologue to determine if we need special help before
4417 calling allocate_stack_worker. */
4418
4419 static bool
4420 ix86_eax_live_at_start_p (void)
4421 {
4422 /* Cheat. Don't bother working forward from ix86_function_regparm
4423 to the function type to whether an actual argument is located in
4424 eax. Instead just look at cfg info, which is still close enough
4425 to correct at this point. This gives false positives for broken
4426 functions that might use uninitialized data that happens to be
4427 allocated in eax, but who cares? */
4428 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4429 }
4430
4431 /* Value is the number of bytes of arguments automatically
4432 popped when returning from a subroutine call.
4433 FUNDECL is the declaration node of the function (as a tree),
4434 FUNTYPE is the data type of the function (as a tree),
4435 or for a library call it is an identifier node for the subroutine name.
4436 SIZE is the number of bytes of arguments passed on the stack.
4437
4438 On the 80386, the RTD insn may be used to pop them if the number
4439 of args is fixed, but if the number is variable then the caller
4440 must pop them all. RTD can't be used for library calls now
4441 because the library is compiled with the Unix compiler.
4442 Use of RTD is a selectable option, since it is incompatible with
4443 standard Unix calling sequences. If the option is not selected,
4444 the caller must always pop the args.
4445
4446 The attribute stdcall is equivalent to RTD on a per module basis. */
4447
4448 int
4449 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4450 {
4451 int rtd;
4452
4453 /* None of the 64-bit ABIs pop arguments. */
4454 if (TARGET_64BIT)
4455 return 0;
4456
4457 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4458
4459 /* Cdecl functions override -mrtd, and never pop the stack. */
4460 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4461 {
4462 /* Stdcall and fastcall functions will pop the stack if not
4463 variable args. */
4464 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4465 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4466 rtd = 1;
4467
4468 if (rtd && ! stdarg_p (funtype))
4469 return size;
4470 }
4471
4472 /* Lose any fake structure return argument if it is passed on the stack. */
4473 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4474 && !KEEP_AGGREGATE_RETURN_POINTER)
4475 {
4476 int nregs = ix86_function_regparm (funtype, fundecl);
4477 if (nregs == 0)
4478 return GET_MODE_SIZE (Pmode);
4479 }
4480
4481 return 0;
4482 }
4483 \f
4484 /* Argument support functions. */
4485
4486 /* Return true when register may be used to pass function parameters. */
4487 bool
4488 ix86_function_arg_regno_p (int regno)
4489 {
4490 int i;
4491 const int *parm_regs;
4492
4493 if (!TARGET_64BIT)
4494 {
4495 if (TARGET_MACHO)
4496 return (regno < REGPARM_MAX
4497 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4498 else
4499 return (regno < REGPARM_MAX
4500 || (TARGET_MMX && MMX_REGNO_P (regno)
4501 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4502 || (TARGET_SSE && SSE_REGNO_P (regno)
4503 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4504 }
4505
4506 if (TARGET_MACHO)
4507 {
4508 if (SSE_REGNO_P (regno) && TARGET_SSE)
4509 return true;
4510 }
4511 else
4512 {
4513 if (TARGET_SSE && SSE_REGNO_P (regno)
4514 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4515 return true;
4516 }
4517
4518 /* TODO: The function should depend on current function ABI but
4519 builtins.c would need updating then. Therefore we use the
4520 default ABI. */
4521
4522 /* RAX is used as hidden argument to va_arg functions. */
4523 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
4524 return true;
4525
4526 if (DEFAULT_ABI == MS_ABI)
4527 parm_regs = x86_64_ms_abi_int_parameter_registers;
4528 else
4529 parm_regs = x86_64_int_parameter_registers;
4530 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
4531 : X86_64_REGPARM_MAX); i++)
4532 if (regno == parm_regs[i])
4533 return true;
4534 return false;
4535 }
4536
4537 /* Return if we do not know how to pass TYPE solely in registers. */
4538
4539 static bool
4540 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4541 {
4542 if (must_pass_in_stack_var_size_or_pad (mode, type))
4543 return true;
4544
4545 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4546 The layout_type routine is crafty and tries to trick us into passing
4547 currently unsupported vector types on the stack by using TImode. */
4548 return (!TARGET_64BIT && mode == TImode
4549 && type && TREE_CODE (type) != VECTOR_TYPE);
4550 }
4551
4552 /* It returns the size, in bytes, of the area reserved for arguments passed
4553 in registers for the function represented by fndecl dependent to the used
4554 abi format. */
4555 int
4556 ix86_reg_parm_stack_space (const_tree fndecl)
4557 {
4558 int call_abi = SYSV_ABI;
4559 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4560 call_abi = ix86_function_abi (fndecl);
4561 else
4562 call_abi = ix86_function_type_abi (fndecl);
4563 if (call_abi == MS_ABI)
4564 return 32;
4565 return 0;
4566 }
4567
4568 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4569 call abi used. */
4570 int
4571 ix86_function_type_abi (const_tree fntype)
4572 {
4573 if (TARGET_64BIT && fntype != NULL)
4574 {
4575 int abi;
4576 if (DEFAULT_ABI == SYSV_ABI)
4577 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
4578 else
4579 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
4580
4581 return abi;
4582 }
4583 return DEFAULT_ABI;
4584 }
4585
4586 int
4587 ix86_function_abi (const_tree fndecl)
4588 {
4589 if (! fndecl)
4590 return DEFAULT_ABI;
4591 return ix86_function_type_abi (TREE_TYPE (fndecl));
4592 }
4593
4594 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4595 call abi used. */
4596 int
4597 ix86_cfun_abi (void)
4598 {
4599 if (! cfun || ! TARGET_64BIT)
4600 return DEFAULT_ABI;
4601 return cfun->machine->call_abi;
4602 }
4603
4604 /* regclass.c */
4605 extern void init_regs (void);
4606
4607 /* Implementation of call abi switching target hook. Specific to FNDECL
4608 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4609 for more details. */
4610 void
4611 ix86_call_abi_override (const_tree fndecl)
4612 {
4613 if (fndecl == NULL_TREE)
4614 cfun->machine->call_abi = DEFAULT_ABI;
4615 else
4616 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4617 }
4618
4619 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4620 re-initialization of init_regs each time we switch function context since
4621 this is needed only during RTL expansion. */
4622 static void
4623 ix86_maybe_switch_abi (void)
4624 {
4625 if (TARGET_64BIT &&
4626 call_used_regs[4 /*RSI*/] == (cfun->machine->call_abi == MS_ABI))
4627 reinit_regs ();
4628 }
4629
4630 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4631 for a call to a function whose data type is FNTYPE.
4632 For a library call, FNTYPE is 0. */
4633
4634 void
4635 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4636 tree fntype, /* tree ptr for function decl */
4637 rtx libname, /* SYMBOL_REF of library name or 0 */
4638 tree fndecl)
4639 {
4640 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4641 memset (cum, 0, sizeof (*cum));
4642
4643 if (fndecl)
4644 cum->call_abi = ix86_function_abi (fndecl);
4645 else
4646 cum->call_abi = ix86_function_type_abi (fntype);
4647 /* Set up the number of registers to use for passing arguments. */
4648
4649 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4650 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4651 cum->nregs = ix86_regparm;
4652 if (TARGET_64BIT)
4653 {
4654 if (cum->call_abi != DEFAULT_ABI)
4655 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
4656 : X64_REGPARM_MAX;
4657 }
4658 if (TARGET_SSE)
4659 {
4660 cum->sse_nregs = SSE_REGPARM_MAX;
4661 if (TARGET_64BIT)
4662 {
4663 if (cum->call_abi != DEFAULT_ABI)
4664 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4665 : X64_SSE_REGPARM_MAX;
4666 }
4667 }
4668 if (TARGET_MMX)
4669 cum->mmx_nregs = MMX_REGPARM_MAX;
4670 cum->warn_avx = true;
4671 cum->warn_sse = true;
4672 cum->warn_mmx = true;
4673
4674 /* Because type might mismatch in between caller and callee, we need to
4675 use actual type of function for local calls.
4676 FIXME: cgraph_analyze can be told to actually record if function uses
4677 va_start so for local functions maybe_vaarg can be made aggressive
4678 helping K&R code.
4679 FIXME: once typesytem is fixed, we won't need this code anymore. */
4680 if (i && i->local)
4681 fntype = TREE_TYPE (fndecl);
4682 cum->maybe_vaarg = (fntype
4683 ? (!prototype_p (fntype) || stdarg_p (fntype))
4684 : !libname);
4685
4686 if (!TARGET_64BIT)
4687 {
4688 /* If there are variable arguments, then we won't pass anything
4689 in registers in 32-bit mode. */
4690 if (stdarg_p (fntype))
4691 {
4692 cum->nregs = 0;
4693 cum->sse_nregs = 0;
4694 cum->mmx_nregs = 0;
4695 cum->warn_avx = 0;
4696 cum->warn_sse = 0;
4697 cum->warn_mmx = 0;
4698 return;
4699 }
4700
4701 /* Use ecx and edx registers if function has fastcall attribute,
4702 else look for regparm information. */
4703 if (fntype)
4704 {
4705 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4706 {
4707 cum->nregs = 2;
4708 cum->fastcall = 1;
4709 }
4710 else
4711 cum->nregs = ix86_function_regparm (fntype, fndecl);
4712 }
4713
4714 /* Set up the number of SSE registers used for passing SFmode
4715 and DFmode arguments. Warn for mismatching ABI. */
4716 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4717 }
4718 }
4719
4720 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4721 But in the case of vector types, it is some vector mode.
4722
4723 When we have only some of our vector isa extensions enabled, then there
4724 are some modes for which vector_mode_supported_p is false. For these
4725 modes, the generic vector support in gcc will choose some non-vector mode
4726 in order to implement the type. By computing the natural mode, we'll
4727 select the proper ABI location for the operand and not depend on whatever
4728 the middle-end decides to do with these vector types. */
4729
4730 static enum machine_mode
4731 type_natural_mode (const_tree type)
4732 {
4733 enum machine_mode mode = TYPE_MODE (type);
4734
4735 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4736 {
4737 HOST_WIDE_INT size = int_size_in_bytes (type);
4738 if ((size == 8 || size == 16)
4739 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4740 && TYPE_VECTOR_SUBPARTS (type) > 1)
4741 {
4742 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4743
4744 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4745 mode = MIN_MODE_VECTOR_FLOAT;
4746 else
4747 mode = MIN_MODE_VECTOR_INT;
4748
4749 /* Get the mode which has this inner mode and number of units. */
4750 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4751 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4752 && GET_MODE_INNER (mode) == innermode)
4753 return mode;
4754
4755 gcc_unreachable ();
4756 }
4757 }
4758
4759 return mode;
4760 }
4761
4762 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4763 this may not agree with the mode that the type system has chosen for the
4764 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4765 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4766
4767 static rtx
4768 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4769 unsigned int regno)
4770 {
4771 rtx tmp;
4772
4773 if (orig_mode != BLKmode)
4774 tmp = gen_rtx_REG (orig_mode, regno);
4775 else
4776 {
4777 tmp = gen_rtx_REG (mode, regno);
4778 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4779 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4780 }
4781
4782 return tmp;
4783 }
4784
4785 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4786 of this code is to classify each 8bytes of incoming argument by the register
4787 class and assign registers accordingly. */
4788
4789 /* Return the union class of CLASS1 and CLASS2.
4790 See the x86-64 PS ABI for details. */
4791
4792 static enum x86_64_reg_class
4793 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4794 {
4795 /* Rule #1: If both classes are equal, this is the resulting class. */
4796 if (class1 == class2)
4797 return class1;
4798
4799 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4800 the other class. */
4801 if (class1 == X86_64_NO_CLASS)
4802 return class2;
4803 if (class2 == X86_64_NO_CLASS)
4804 return class1;
4805
4806 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4807 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4808 return X86_64_MEMORY_CLASS;
4809
4810 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4811 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4812 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4813 return X86_64_INTEGERSI_CLASS;
4814 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4815 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4816 return X86_64_INTEGER_CLASS;
4817
4818 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4819 MEMORY is used. */
4820 if (class1 == X86_64_X87_CLASS
4821 || class1 == X86_64_X87UP_CLASS
4822 || class1 == X86_64_COMPLEX_X87_CLASS
4823 || class2 == X86_64_X87_CLASS
4824 || class2 == X86_64_X87UP_CLASS
4825 || class2 == X86_64_COMPLEX_X87_CLASS)
4826 return X86_64_MEMORY_CLASS;
4827
4828 /* Rule #6: Otherwise class SSE is used. */
4829 return X86_64_SSE_CLASS;
4830 }
4831
4832 /* Classify the argument of type TYPE and mode MODE.
4833 CLASSES will be filled by the register class used to pass each word
4834 of the operand. The number of words is returned. In case the parameter
4835 should be passed in memory, 0 is returned. As a special case for zero
4836 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4837
4838 BIT_OFFSET is used internally for handling records and specifies offset
4839 of the offset in bits modulo 256 to avoid overflow cases.
4840
4841 See the x86-64 PS ABI for details.
4842 */
4843
4844 static int
4845 classify_argument (enum machine_mode mode, const_tree type,
4846 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4847 {
4848 HOST_WIDE_INT bytes =
4849 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4850 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4851
4852 /* Variable sized entities are always passed/returned in memory. */
4853 if (bytes < 0)
4854 return 0;
4855
4856 if (mode != VOIDmode
4857 && targetm.calls.must_pass_in_stack (mode, type))
4858 return 0;
4859
4860 if (type && AGGREGATE_TYPE_P (type))
4861 {
4862 int i;
4863 tree field;
4864 enum x86_64_reg_class subclasses[MAX_CLASSES];
4865
4866 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
4867 if (bytes > 16)
4868 return 0;
4869
4870 for (i = 0; i < words; i++)
4871 classes[i] = X86_64_NO_CLASS;
4872
4873 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
4874 signalize memory class, so handle it as special case. */
4875 if (!words)
4876 {
4877 classes[0] = X86_64_NO_CLASS;
4878 return 1;
4879 }
4880
4881 /* Classify each field of record and merge classes. */
4882 switch (TREE_CODE (type))
4883 {
4884 case RECORD_TYPE:
4885 /* And now merge the fields of structure. */
4886 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4887 {
4888 if (TREE_CODE (field) == FIELD_DECL)
4889 {
4890 int num;
4891
4892 if (TREE_TYPE (field) == error_mark_node)
4893 continue;
4894
4895 /* Bitfields are always classified as integer. Handle them
4896 early, since later code would consider them to be
4897 misaligned integers. */
4898 if (DECL_BIT_FIELD (field))
4899 {
4900 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4901 i < ((int_bit_position (field) + (bit_offset % 64))
4902 + tree_low_cst (DECL_SIZE (field), 0)
4903 + 63) / 8 / 8; i++)
4904 classes[i] =
4905 merge_classes (X86_64_INTEGER_CLASS,
4906 classes[i]);
4907 }
4908 else
4909 {
4910 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4911 TREE_TYPE (field), subclasses,
4912 (int_bit_position (field)
4913 + bit_offset) % 256);
4914 if (!num)
4915 return 0;
4916 for (i = 0; i < num; i++)
4917 {
4918 int pos =
4919 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4920 classes[i + pos] =
4921 merge_classes (subclasses[i], classes[i + pos]);
4922 }
4923 }
4924 }
4925 }
4926 break;
4927
4928 case ARRAY_TYPE:
4929 /* Arrays are handled as small records. */
4930 {
4931 int num;
4932 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
4933 TREE_TYPE (type), subclasses, bit_offset);
4934 if (!num)
4935 return 0;
4936
4937 /* The partial classes are now full classes. */
4938 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
4939 subclasses[0] = X86_64_SSE_CLASS;
4940 if (subclasses[0] == X86_64_INTEGERSI_CLASS
4941 && !((bit_offset % 64) == 0 && bytes == 4))
4942 subclasses[0] = X86_64_INTEGER_CLASS;
4943
4944 for (i = 0; i < words; i++)
4945 classes[i] = subclasses[i % num];
4946
4947 break;
4948 }
4949 case UNION_TYPE:
4950 case QUAL_UNION_TYPE:
4951 /* Unions are similar to RECORD_TYPE but offset is always 0.
4952 */
4953 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4954 {
4955 if (TREE_CODE (field) == FIELD_DECL)
4956 {
4957 int num;
4958
4959 if (TREE_TYPE (field) == error_mark_node)
4960 continue;
4961
4962 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4963 TREE_TYPE (field), subclasses,
4964 bit_offset);
4965 if (!num)
4966 return 0;
4967 for (i = 0; i < num; i++)
4968 classes[i] = merge_classes (subclasses[i], classes[i]);
4969 }
4970 }
4971 break;
4972
4973 default:
4974 gcc_unreachable ();
4975 }
4976
4977 /* Final merger cleanup. */
4978 for (i = 0; i < words; i++)
4979 {
4980 /* If one class is MEMORY, everything should be passed in
4981 memory. */
4982 if (classes[i] == X86_64_MEMORY_CLASS)
4983 return 0;
4984
4985 /* The X86_64_SSEUP_CLASS should be always preceded by
4986 X86_64_SSE_CLASS. */
4987 if (classes[i] == X86_64_SSEUP_CLASS
4988 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
4989 classes[i] = X86_64_SSE_CLASS;
4990
4991 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
4992 if (classes[i] == X86_64_X87UP_CLASS
4993 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
4994 classes[i] = X86_64_SSE_CLASS;
4995 }
4996 return words;
4997 }
4998
4999 /* Compute alignment needed. We align all types to natural boundaries with
5000 exception of XFmode that is aligned to 64bits. */
5001 if (mode != VOIDmode && mode != BLKmode)
5002 {
5003 int mode_alignment = GET_MODE_BITSIZE (mode);
5004
5005 if (mode == XFmode)
5006 mode_alignment = 128;
5007 else if (mode == XCmode)
5008 mode_alignment = 256;
5009 if (COMPLEX_MODE_P (mode))
5010 mode_alignment /= 2;
5011 /* Misaligned fields are always returned in memory. */
5012 if (bit_offset % mode_alignment)
5013 return 0;
5014 }
5015
5016 /* for V1xx modes, just use the base mode */
5017 if (VECTOR_MODE_P (mode) && mode != V1DImode
5018 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5019 mode = GET_MODE_INNER (mode);
5020
5021 /* Classification of atomic types. */
5022 switch (mode)
5023 {
5024 case SDmode:
5025 case DDmode:
5026 classes[0] = X86_64_SSE_CLASS;
5027 return 1;
5028 case TDmode:
5029 classes[0] = X86_64_SSE_CLASS;
5030 classes[1] = X86_64_SSEUP_CLASS;
5031 return 2;
5032 case DImode:
5033 case SImode:
5034 case HImode:
5035 case QImode:
5036 case CSImode:
5037 case CHImode:
5038 case CQImode:
5039 {
5040 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5041
5042 if (size <= 32)
5043 {
5044 classes[0] = X86_64_INTEGERSI_CLASS;
5045 return 1;
5046 }
5047 else if (size <= 64)
5048 {
5049 classes[0] = X86_64_INTEGER_CLASS;
5050 return 1;
5051 }
5052 else if (size <= 64+32)
5053 {
5054 classes[0] = X86_64_INTEGER_CLASS;
5055 classes[1] = X86_64_INTEGERSI_CLASS;
5056 return 2;
5057 }
5058 else if (size <= 64+64)
5059 {
5060 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5061 return 2;
5062 }
5063 else
5064 gcc_unreachable ();
5065 }
5066 case CDImode:
5067 case TImode:
5068 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5069 return 2;
5070 case CTImode:
5071 case COImode:
5072 case OImode:
5073 return 0;
5074 case SFmode:
5075 if (!(bit_offset % 64))
5076 classes[0] = X86_64_SSESF_CLASS;
5077 else
5078 classes[0] = X86_64_SSE_CLASS;
5079 return 1;
5080 case DFmode:
5081 classes[0] = X86_64_SSEDF_CLASS;
5082 return 1;
5083 case XFmode:
5084 classes[0] = X86_64_X87_CLASS;
5085 classes[1] = X86_64_X87UP_CLASS;
5086 return 2;
5087 case TFmode:
5088 classes[0] = X86_64_SSE_CLASS;
5089 classes[1] = X86_64_SSEUP_CLASS;
5090 return 2;
5091 case SCmode:
5092 classes[0] = X86_64_SSE_CLASS;
5093 return 1;
5094 case DCmode:
5095 classes[0] = X86_64_SSEDF_CLASS;
5096 classes[1] = X86_64_SSEDF_CLASS;
5097 return 2;
5098 case XCmode:
5099 classes[0] = X86_64_COMPLEX_X87_CLASS;
5100 return 1;
5101 case TCmode:
5102 /* This modes is larger than 16 bytes. */
5103 return 0;
5104 case V8SFmode:
5105 case V8SImode:
5106 case V32QImode:
5107 case V16HImode:
5108 case V4DFmode:
5109 case V4DImode:
5110 classes[0] = X86_64_AVX_CLASS;
5111 return 1;
5112 case V4SFmode:
5113 case V4SImode:
5114 case V16QImode:
5115 case V8HImode:
5116 case V2DFmode:
5117 case V2DImode:
5118 classes[0] = X86_64_SSE_CLASS;
5119 classes[1] = X86_64_SSEUP_CLASS;
5120 return 2;
5121 case V1DImode:
5122 case V2SFmode:
5123 case V2SImode:
5124 case V4HImode:
5125 case V8QImode:
5126 classes[0] = X86_64_SSE_CLASS;
5127 return 1;
5128 case BLKmode:
5129 case VOIDmode:
5130 return 0;
5131 default:
5132 gcc_assert (VECTOR_MODE_P (mode));
5133
5134 if (bytes > 16)
5135 return 0;
5136
5137 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5138
5139 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5140 classes[0] = X86_64_INTEGERSI_CLASS;
5141 else
5142 classes[0] = X86_64_INTEGER_CLASS;
5143 classes[1] = X86_64_INTEGER_CLASS;
5144 return 1 + (bytes > 8);
5145 }
5146 }
5147
5148 /* Examine the argument and return set number of register required in each
5149 class. Return 0 iff parameter should be passed in memory. */
5150 static int
5151 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5152 int *int_nregs, int *sse_nregs)
5153 {
5154 enum x86_64_reg_class regclass[MAX_CLASSES];
5155 int n = classify_argument (mode, type, regclass, 0);
5156
5157 *int_nregs = 0;
5158 *sse_nregs = 0;
5159 if (!n)
5160 return 0;
5161 for (n--; n >= 0; n--)
5162 switch (regclass[n])
5163 {
5164 case X86_64_INTEGER_CLASS:
5165 case X86_64_INTEGERSI_CLASS:
5166 (*int_nregs)++;
5167 break;
5168 case X86_64_AVX_CLASS:
5169 case X86_64_SSE_CLASS:
5170 case X86_64_SSESF_CLASS:
5171 case X86_64_SSEDF_CLASS:
5172 (*sse_nregs)++;
5173 break;
5174 case X86_64_NO_CLASS:
5175 case X86_64_SSEUP_CLASS:
5176 break;
5177 case X86_64_X87_CLASS:
5178 case X86_64_X87UP_CLASS:
5179 if (!in_return)
5180 return 0;
5181 break;
5182 case X86_64_COMPLEX_X87_CLASS:
5183 return in_return ? 2 : 0;
5184 case X86_64_MEMORY_CLASS:
5185 gcc_unreachable ();
5186 }
5187 return 1;
5188 }
5189
5190 /* Construct container for the argument used by GCC interface. See
5191 FUNCTION_ARG for the detailed description. */
5192
5193 static rtx
5194 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5195 const_tree type, int in_return, int nintregs, int nsseregs,
5196 const int *intreg, int sse_regno)
5197 {
5198 /* The following variables hold the static issued_error state. */
5199 static bool issued_sse_arg_error;
5200 static bool issued_sse_ret_error;
5201 static bool issued_x87_ret_error;
5202
5203 enum machine_mode tmpmode;
5204 int bytes =
5205 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5206 enum x86_64_reg_class regclass[MAX_CLASSES];
5207 int n;
5208 int i;
5209 int nexps = 0;
5210 int needed_sseregs, needed_intregs;
5211 rtx exp[MAX_CLASSES];
5212 rtx ret;
5213
5214 n = classify_argument (mode, type, regclass, 0);
5215 if (!n)
5216 return NULL;
5217 if (!examine_argument (mode, type, in_return, &needed_intregs,
5218 &needed_sseregs))
5219 return NULL;
5220 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5221 return NULL;
5222
5223 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5224 some less clueful developer tries to use floating-point anyway. */
5225 if (needed_sseregs && !TARGET_SSE)
5226 {
5227 if (in_return)
5228 {
5229 if (!issued_sse_ret_error)
5230 {
5231 error ("SSE register return with SSE disabled");
5232 issued_sse_ret_error = true;
5233 }
5234 }
5235 else if (!issued_sse_arg_error)
5236 {
5237 error ("SSE register argument with SSE disabled");
5238 issued_sse_arg_error = true;
5239 }
5240 return NULL;
5241 }
5242
5243 /* Likewise, error if the ABI requires us to return values in the
5244 x87 registers and the user specified -mno-80387. */
5245 if (!TARGET_80387 && in_return)
5246 for (i = 0; i < n; i++)
5247 if (regclass[i] == X86_64_X87_CLASS
5248 || regclass[i] == X86_64_X87UP_CLASS
5249 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5250 {
5251 if (!issued_x87_ret_error)
5252 {
5253 error ("x87 register return with x87 disabled");
5254 issued_x87_ret_error = true;
5255 }
5256 return NULL;
5257 }
5258
5259 /* First construct simple cases. Avoid SCmode, since we want to use
5260 single register to pass this type. */
5261 if (n == 1 && mode != SCmode)
5262 switch (regclass[0])
5263 {
5264 case X86_64_INTEGER_CLASS:
5265 case X86_64_INTEGERSI_CLASS:
5266 return gen_rtx_REG (mode, intreg[0]);
5267 case X86_64_AVX_CLASS:
5268 case X86_64_SSE_CLASS:
5269 case X86_64_SSESF_CLASS:
5270 case X86_64_SSEDF_CLASS:
5271 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
5272 case X86_64_X87_CLASS:
5273 case X86_64_COMPLEX_X87_CLASS:
5274 return gen_rtx_REG (mode, FIRST_STACK_REG);
5275 case X86_64_NO_CLASS:
5276 /* Zero sized array, struct or class. */
5277 return NULL;
5278 default:
5279 gcc_unreachable ();
5280 }
5281 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5282 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5283 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5284
5285 if (n == 2
5286 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5287 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5288 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5289 && regclass[1] == X86_64_INTEGER_CLASS
5290 && (mode == CDImode || mode == TImode || mode == TFmode)
5291 && intreg[0] + 1 == intreg[1])
5292 return gen_rtx_REG (mode, intreg[0]);
5293
5294 /* Otherwise figure out the entries of the PARALLEL. */
5295 for (i = 0; i < n; i++)
5296 {
5297 switch (regclass[i])
5298 {
5299 case X86_64_NO_CLASS:
5300 break;
5301 case X86_64_INTEGER_CLASS:
5302 case X86_64_INTEGERSI_CLASS:
5303 /* Merge TImodes on aligned occasions here too. */
5304 if (i * 8 + 8 > bytes)
5305 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5306 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5307 tmpmode = SImode;
5308 else
5309 tmpmode = DImode;
5310 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5311 if (tmpmode == BLKmode)
5312 tmpmode = DImode;
5313 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5314 gen_rtx_REG (tmpmode, *intreg),
5315 GEN_INT (i*8));
5316 intreg++;
5317 break;
5318 case X86_64_SSESF_CLASS:
5319 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5320 gen_rtx_REG (SFmode,
5321 SSE_REGNO (sse_regno)),
5322 GEN_INT (i*8));
5323 sse_regno++;
5324 break;
5325 case X86_64_SSEDF_CLASS:
5326 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5327 gen_rtx_REG (DFmode,
5328 SSE_REGNO (sse_regno)),
5329 GEN_INT (i*8));
5330 sse_regno++;
5331 break;
5332 case X86_64_SSE_CLASS:
5333 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
5334 tmpmode = TImode;
5335 else
5336 tmpmode = DImode;
5337 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5338 gen_rtx_REG (tmpmode,
5339 SSE_REGNO (sse_regno)),
5340 GEN_INT (i*8));
5341 if (tmpmode == TImode)
5342 i++;
5343 sse_regno++;
5344 break;
5345 default:
5346 gcc_unreachable ();
5347 }
5348 }
5349
5350 /* Empty aligned struct, union or class. */
5351 if (nexps == 0)
5352 return NULL;
5353
5354 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5355 for (i = 0; i < nexps; i++)
5356 XVECEXP (ret, 0, i) = exp [i];
5357 return ret;
5358 }
5359
5360 /* Update the data in CUM to advance over an argument of mode MODE
5361 and data type TYPE. (TYPE is null for libcalls where that information
5362 may not be available.) */
5363
5364 static void
5365 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5366 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5367 {
5368 switch (mode)
5369 {
5370 default:
5371 break;
5372
5373 case BLKmode:
5374 if (bytes < 0)
5375 break;
5376 /* FALLTHRU */
5377
5378 case DImode:
5379 case SImode:
5380 case HImode:
5381 case QImode:
5382 cum->words += words;
5383 cum->nregs -= words;
5384 cum->regno += words;
5385
5386 if (cum->nregs <= 0)
5387 {
5388 cum->nregs = 0;
5389 cum->regno = 0;
5390 }
5391 break;
5392
5393 case DFmode:
5394 if (cum->float_in_sse < 2)
5395 break;
5396 case SFmode:
5397 if (cum->float_in_sse < 1)
5398 break;
5399 /* FALLTHRU */
5400
5401 case OImode:
5402 case V8SFmode:
5403 case V8SImode:
5404 case V32QImode:
5405 case V16HImode:
5406 case V4DFmode:
5407 case V4DImode:
5408 case TImode:
5409 case V16QImode:
5410 case V8HImode:
5411 case V4SImode:
5412 case V2DImode:
5413 case V4SFmode:
5414 case V2DFmode:
5415 if (!type || !AGGREGATE_TYPE_P (type))
5416 {
5417 cum->sse_words += words;
5418 cum->sse_nregs -= 1;
5419 cum->sse_regno += 1;
5420 if (cum->sse_nregs <= 0)
5421 {
5422 cum->sse_nregs = 0;
5423 cum->sse_regno = 0;
5424 }
5425 }
5426 break;
5427
5428 case V8QImode:
5429 case V4HImode:
5430 case V2SImode:
5431 case V2SFmode:
5432 case V1DImode:
5433 if (!type || !AGGREGATE_TYPE_P (type))
5434 {
5435 cum->mmx_words += words;
5436 cum->mmx_nregs -= 1;
5437 cum->mmx_regno += 1;
5438 if (cum->mmx_nregs <= 0)
5439 {
5440 cum->mmx_nregs = 0;
5441 cum->mmx_regno = 0;
5442 }
5443 }
5444 break;
5445 }
5446 }
5447
5448 static void
5449 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5450 tree type, HOST_WIDE_INT words, int named)
5451 {
5452 int int_nregs, sse_nregs;
5453
5454 /* Unnamed 256bit vector mode parameters are passed on stack. */
5455 if (!named && VALID_AVX256_REG_MODE (mode))
5456 return;
5457
5458 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5459 cum->words += words;
5460 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5461 {
5462 cum->nregs -= int_nregs;
5463 cum->sse_nregs -= sse_nregs;
5464 cum->regno += int_nregs;
5465 cum->sse_regno += sse_nregs;
5466 }
5467 else
5468 cum->words += words;
5469 }
5470
5471 static void
5472 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5473 HOST_WIDE_INT words)
5474 {
5475 /* Otherwise, this should be passed indirect. */
5476 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5477
5478 cum->words += words;
5479 if (cum->nregs > 0)
5480 {
5481 cum->nregs -= 1;
5482 cum->regno += 1;
5483 }
5484 }
5485
5486 void
5487 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5488 tree type, int named)
5489 {
5490 HOST_WIDE_INT bytes, words;
5491
5492 if (mode == BLKmode)
5493 bytes = int_size_in_bytes (type);
5494 else
5495 bytes = GET_MODE_SIZE (mode);
5496 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5497
5498 if (type)
5499 mode = type_natural_mode (type);
5500
5501 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5502 function_arg_advance_ms_64 (cum, bytes, words);
5503 else if (TARGET_64BIT)
5504 function_arg_advance_64 (cum, mode, type, words, named);
5505 else
5506 function_arg_advance_32 (cum, mode, type, bytes, words);
5507 }
5508
5509 /* Define where to put the arguments to a function.
5510 Value is zero to push the argument on the stack,
5511 or a hard register in which to store the argument.
5512
5513 MODE is the argument's machine mode.
5514 TYPE is the data type of the argument (as a tree).
5515 This is null for libcalls where that information may
5516 not be available.
5517 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5518 the preceding args and about the function being called.
5519 NAMED is nonzero if this argument is a named parameter
5520 (otherwise it is an extra parameter matching an ellipsis). */
5521
5522 static rtx
5523 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5524 enum machine_mode orig_mode, tree type,
5525 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5526 {
5527 static bool warnedavx, warnedsse, warnedmmx;
5528
5529 /* Avoid the AL settings for the Unix64 ABI. */
5530 if (mode == VOIDmode)
5531 return constm1_rtx;
5532
5533 switch (mode)
5534 {
5535 default:
5536 break;
5537
5538 case BLKmode:
5539 if (bytes < 0)
5540 break;
5541 /* FALLTHRU */
5542 case DImode:
5543 case SImode:
5544 case HImode:
5545 case QImode:
5546 if (words <= cum->nregs)
5547 {
5548 int regno = cum->regno;
5549
5550 /* Fastcall allocates the first two DWORD (SImode) or
5551 smaller arguments to ECX and EDX if it isn't an
5552 aggregate type . */
5553 if (cum->fastcall)
5554 {
5555 if (mode == BLKmode
5556 || mode == DImode
5557 || (type && AGGREGATE_TYPE_P (type)))
5558 break;
5559
5560 /* ECX not EAX is the first allocated register. */
5561 if (regno == AX_REG)
5562 regno = CX_REG;
5563 }
5564 return gen_rtx_REG (mode, regno);
5565 }
5566 break;
5567
5568 case DFmode:
5569 if (cum->float_in_sse < 2)
5570 break;
5571 case SFmode:
5572 if (cum->float_in_sse < 1)
5573 break;
5574 /* FALLTHRU */
5575 case TImode:
5576 /* In 32bit, we pass TImode in xmm registers. */
5577 case V16QImode:
5578 case V8HImode:
5579 case V4SImode:
5580 case V2DImode:
5581 case V4SFmode:
5582 case V2DFmode:
5583 if (!type || !AGGREGATE_TYPE_P (type))
5584 {
5585 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5586 {
5587 warnedsse = true;
5588 warning (0, "SSE vector argument without SSE enabled "
5589 "changes the ABI");
5590 }
5591 if (cum->sse_nregs)
5592 return gen_reg_or_parallel (mode, orig_mode,
5593 cum->sse_regno + FIRST_SSE_REG);
5594 }
5595 break;
5596
5597 case OImode:
5598 /* In 32bit, we pass OImode in ymm registers. */
5599 case V8SFmode:
5600 case V8SImode:
5601 case V32QImode:
5602 case V16HImode:
5603 case V4DFmode:
5604 case V4DImode:
5605 if (!type || !AGGREGATE_TYPE_P (type))
5606 {
5607 if (!TARGET_AVX && !warnedavx && cum->warn_avx)
5608 {
5609 warnedavx = true;
5610 warning (0, "AVX vector argument without AVX enabled "
5611 "changes the ABI");
5612 }
5613 if (cum->sse_nregs)
5614 return gen_reg_or_parallel (mode, orig_mode,
5615 cum->sse_regno + FIRST_SSE_REG);
5616 }
5617 break;
5618
5619 case V8QImode:
5620 case V4HImode:
5621 case V2SImode:
5622 case V2SFmode:
5623 case V1DImode:
5624 if (!type || !AGGREGATE_TYPE_P (type))
5625 {
5626 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5627 {
5628 warnedmmx = true;
5629 warning (0, "MMX vector argument without MMX enabled "
5630 "changes the ABI");
5631 }
5632 if (cum->mmx_nregs)
5633 return gen_reg_or_parallel (mode, orig_mode,
5634 cum->mmx_regno + FIRST_MMX_REG);
5635 }
5636 break;
5637 }
5638
5639 return NULL_RTX;
5640 }
5641
5642 static rtx
5643 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5644 enum machine_mode orig_mode, tree type, int named)
5645 {
5646 static bool warnedavx;
5647
5648 /* Handle a hidden AL argument containing number of registers
5649 for varargs x86-64 functions. */
5650 if (mode == VOIDmode)
5651 return GEN_INT (cum->maybe_vaarg
5652 ? (cum->sse_nregs < 0
5653 ? (cum->call_abi == DEFAULT_ABI
5654 ? SSE_REGPARM_MAX
5655 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5656 : X64_SSE_REGPARM_MAX))
5657 : cum->sse_regno)
5658 : -1);
5659
5660 switch (mode)
5661 {
5662 default:
5663 break;
5664
5665 case V8SFmode:
5666 case V8SImode:
5667 case V32QImode:
5668 case V16HImode:
5669 case V4DFmode:
5670 case V4DImode:
5671 /* In 64bit, we pass TImode in interger registers and OImode on
5672 stack. */
5673 if (!type || !AGGREGATE_TYPE_P (type))
5674 {
5675 if (!TARGET_AVX && !warnedavx && cum->warn_avx)
5676 {
5677 warnedavx = true;
5678 warning (0, "AVX vector argument without AVX enabled "
5679 "changes the ABI");
5680 }
5681 }
5682
5683 /* Unnamed 256bit vector mode parameters are passed on stack. */
5684 if (!named)
5685 return NULL;
5686 break;
5687 }
5688
5689 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5690 cum->sse_nregs,
5691 &x86_64_int_parameter_registers [cum->regno],
5692 cum->sse_regno);
5693 }
5694
5695 static rtx
5696 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5697 enum machine_mode orig_mode, int named,
5698 HOST_WIDE_INT bytes)
5699 {
5700 unsigned int regno;
5701
5702 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5703 We use value of -2 to specify that current function call is MSABI. */
5704 if (mode == VOIDmode)
5705 return GEN_INT (-2);
5706
5707 /* If we've run out of registers, it goes on the stack. */
5708 if (cum->nregs == 0)
5709 return NULL_RTX;
5710
5711 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5712
5713 /* Only floating point modes are passed in anything but integer regs. */
5714 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5715 {
5716 if (named)
5717 regno = cum->regno + FIRST_SSE_REG;
5718 else
5719 {
5720 rtx t1, t2;
5721
5722 /* Unnamed floating parameters are passed in both the
5723 SSE and integer registers. */
5724 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5725 t2 = gen_rtx_REG (mode, regno);
5726 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5727 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5728 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5729 }
5730 }
5731 /* Handle aggregated types passed in register. */
5732 if (orig_mode == BLKmode)
5733 {
5734 if (bytes > 0 && bytes <= 8)
5735 mode = (bytes > 4 ? DImode : SImode);
5736 if (mode == BLKmode)
5737 mode = DImode;
5738 }
5739
5740 return gen_reg_or_parallel (mode, orig_mode, regno);
5741 }
5742
5743 rtx
5744 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5745 tree type, int named)
5746 {
5747 enum machine_mode mode = omode;
5748 HOST_WIDE_INT bytes, words;
5749
5750 if (mode == BLKmode)
5751 bytes = int_size_in_bytes (type);
5752 else
5753 bytes = GET_MODE_SIZE (mode);
5754 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5755
5756 /* To simplify the code below, represent vector types with a vector mode
5757 even if MMX/SSE are not active. */
5758 if (type && TREE_CODE (type) == VECTOR_TYPE)
5759 mode = type_natural_mode (type);
5760
5761 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5762 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5763 else if (TARGET_64BIT)
5764 return function_arg_64 (cum, mode, omode, type, named);
5765 else
5766 return function_arg_32 (cum, mode, omode, type, bytes, words);
5767 }
5768
5769 /* A C expression that indicates when an argument must be passed by
5770 reference. If nonzero for an argument, a copy of that argument is
5771 made in memory and a pointer to the argument is passed instead of
5772 the argument itself. The pointer is passed in whatever way is
5773 appropriate for passing a pointer to that type. */
5774
5775 static bool
5776 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5777 enum machine_mode mode ATTRIBUTE_UNUSED,
5778 const_tree type, bool named ATTRIBUTE_UNUSED)
5779 {
5780 /* See Windows x64 Software Convention. */
5781 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5782 {
5783 int msize = (int) GET_MODE_SIZE (mode);
5784 if (type)
5785 {
5786 /* Arrays are passed by reference. */
5787 if (TREE_CODE (type) == ARRAY_TYPE)
5788 return true;
5789
5790 if (AGGREGATE_TYPE_P (type))
5791 {
5792 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
5793 are passed by reference. */
5794 msize = int_size_in_bytes (type);
5795 }
5796 }
5797
5798 /* __m128 is passed by reference. */
5799 switch (msize) {
5800 case 1: case 2: case 4: case 8:
5801 break;
5802 default:
5803 return true;
5804 }
5805 }
5806 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
5807 return 1;
5808
5809 return 0;
5810 }
5811
5812 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
5813 ABI. */
5814 static bool
5815 contains_aligned_value_p (tree type)
5816 {
5817 enum machine_mode mode = TYPE_MODE (type);
5818 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
5819 || mode == TDmode
5820 || mode == TFmode
5821 || mode == TCmode)
5822 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
5823 return true;
5824 if (TYPE_ALIGN (type) < 128)
5825 return false;
5826
5827 if (AGGREGATE_TYPE_P (type))
5828 {
5829 /* Walk the aggregates recursively. */
5830 switch (TREE_CODE (type))
5831 {
5832 case RECORD_TYPE:
5833 case UNION_TYPE:
5834 case QUAL_UNION_TYPE:
5835 {
5836 tree field;
5837
5838 /* Walk all the structure fields. */
5839 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5840 {
5841 if (TREE_CODE (field) == FIELD_DECL
5842 && contains_aligned_value_p (TREE_TYPE (field)))
5843 return true;
5844 }
5845 break;
5846 }
5847
5848 case ARRAY_TYPE:
5849 /* Just for use if some languages passes arrays by value. */
5850 if (contains_aligned_value_p (TREE_TYPE (type)))
5851 return true;
5852 break;
5853
5854 default:
5855 gcc_unreachable ();
5856 }
5857 }
5858 return false;
5859 }
5860
5861 /* Gives the alignment boundary, in bits, of an argument with the
5862 specified mode and type. */
5863
5864 int
5865 ix86_function_arg_boundary (enum machine_mode mode, tree type)
5866 {
5867 int align;
5868 if (type)
5869 {
5870 /* Since canonical type is used for call, we convert it to
5871 canonical type if needed. */
5872 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
5873 type = TYPE_CANONICAL (type);
5874 align = TYPE_ALIGN (type);
5875 }
5876 else
5877 align = GET_MODE_ALIGNMENT (mode);
5878 if (align < PARM_BOUNDARY)
5879 align = PARM_BOUNDARY;
5880 /* In 32bit, only _Decimal128 and __float128 are aligned to their
5881 natural boundaries. */
5882 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
5883 {
5884 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
5885 make an exception for SSE modes since these require 128bit
5886 alignment.
5887
5888 The handling here differs from field_alignment. ICC aligns MMX
5889 arguments to 4 byte boundaries, while structure fields are aligned
5890 to 8 byte boundaries. */
5891 if (!type)
5892 {
5893 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
5894 align = PARM_BOUNDARY;
5895 }
5896 else
5897 {
5898 if (!contains_aligned_value_p (type))
5899 align = PARM_BOUNDARY;
5900 }
5901 }
5902 if (align > BIGGEST_ALIGNMENT)
5903 align = BIGGEST_ALIGNMENT;
5904 return align;
5905 }
5906
5907 /* Return true if N is a possible register number of function value. */
5908
5909 bool
5910 ix86_function_value_regno_p (int regno)
5911 {
5912 switch (regno)
5913 {
5914 case 0:
5915 return true;
5916
5917 case FIRST_FLOAT_REG:
5918 /* TODO: The function should depend on current function ABI but
5919 builtins.c would need updating then. Therefore we use the
5920 default ABI. */
5921 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
5922 return false;
5923 return TARGET_FLOAT_RETURNS_IN_80387;
5924
5925 case FIRST_SSE_REG:
5926 return TARGET_SSE;
5927
5928 case FIRST_MMX_REG:
5929 if (TARGET_MACHO || TARGET_64BIT)
5930 return false;
5931 return TARGET_MMX;
5932 }
5933
5934 return false;
5935 }
5936
5937 /* Define how to find the value returned by a function.
5938 VALTYPE is the data type of the value (as a tree).
5939 If the precise function being called is known, FUNC is its FUNCTION_DECL;
5940 otherwise, FUNC is 0. */
5941
5942 static rtx
5943 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
5944 const_tree fntype, const_tree fn)
5945 {
5946 unsigned int regno;
5947
5948 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
5949 we normally prevent this case when mmx is not available. However
5950 some ABIs may require the result to be returned like DImode. */
5951 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5952 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
5953
5954 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
5955 we prevent this case when sse is not available. However some ABIs
5956 may require the result to be returned like integer TImode. */
5957 else if (mode == TImode
5958 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5959 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
5960
5961 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
5962 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
5963 regno = FIRST_FLOAT_REG;
5964 else
5965 /* Most things go in %eax. */
5966 regno = AX_REG;
5967
5968 /* Override FP return register with %xmm0 for local functions when
5969 SSE math is enabled or for functions with sseregparm attribute. */
5970 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
5971 {
5972 int sse_level = ix86_function_sseregparm (fntype, fn, false);
5973 if ((sse_level >= 1 && mode == SFmode)
5974 || (sse_level == 2 && mode == DFmode))
5975 regno = FIRST_SSE_REG;
5976 }
5977
5978 return gen_rtx_REG (orig_mode, regno);
5979 }
5980
5981 static rtx
5982 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
5983 const_tree valtype)
5984 {
5985 rtx ret;
5986
5987 /* Handle libcalls, which don't provide a type node. */
5988 if (valtype == NULL)
5989 {
5990 switch (mode)
5991 {
5992 case SFmode:
5993 case SCmode:
5994 case DFmode:
5995 case DCmode:
5996 case TFmode:
5997 case SDmode:
5998 case DDmode:
5999 case TDmode:
6000 return gen_rtx_REG (mode, FIRST_SSE_REG);
6001 case XFmode:
6002 case XCmode:
6003 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6004 case TCmode:
6005 return NULL;
6006 default:
6007 return gen_rtx_REG (mode, AX_REG);
6008 }
6009 }
6010
6011 ret = construct_container (mode, orig_mode, valtype, 1,
6012 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6013 x86_64_int_return_registers, 0);
6014
6015 /* For zero sized structures, construct_container returns NULL, but we
6016 need to keep rest of compiler happy by returning meaningful value. */
6017 if (!ret)
6018 ret = gen_rtx_REG (orig_mode, AX_REG);
6019
6020 return ret;
6021 }
6022
6023 static rtx
6024 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6025 {
6026 unsigned int regno = AX_REG;
6027
6028 if (TARGET_SSE)
6029 {
6030 switch (GET_MODE_SIZE (mode))
6031 {
6032 case 16:
6033 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6034 && !COMPLEX_MODE_P (mode))
6035 regno = FIRST_SSE_REG;
6036 break;
6037 case 8:
6038 case 4:
6039 if (mode == SFmode || mode == DFmode)
6040 regno = FIRST_SSE_REG;
6041 break;
6042 default:
6043 break;
6044 }
6045 }
6046 return gen_rtx_REG (orig_mode, regno);
6047 }
6048
6049 static rtx
6050 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6051 enum machine_mode orig_mode, enum machine_mode mode)
6052 {
6053 const_tree fn, fntype;
6054
6055 fn = NULL_TREE;
6056 if (fntype_or_decl && DECL_P (fntype_or_decl))
6057 fn = fntype_or_decl;
6058 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6059
6060 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6061 return function_value_ms_64 (orig_mode, mode);
6062 else if (TARGET_64BIT)
6063 return function_value_64 (orig_mode, mode, valtype);
6064 else
6065 return function_value_32 (orig_mode, mode, fntype, fn);
6066 }
6067
6068 static rtx
6069 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6070 bool outgoing ATTRIBUTE_UNUSED)
6071 {
6072 enum machine_mode mode, orig_mode;
6073
6074 orig_mode = TYPE_MODE (valtype);
6075 mode = type_natural_mode (valtype);
6076 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6077 }
6078
6079 rtx
6080 ix86_libcall_value (enum machine_mode mode)
6081 {
6082 return ix86_function_value_1 (NULL, NULL, mode, mode);
6083 }
6084
6085 /* Return true iff type is returned in memory. */
6086
6087 static int ATTRIBUTE_UNUSED
6088 return_in_memory_32 (const_tree type, enum machine_mode mode)
6089 {
6090 HOST_WIDE_INT size;
6091
6092 if (mode == BLKmode)
6093 return 1;
6094
6095 size = int_size_in_bytes (type);
6096
6097 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6098 return 0;
6099
6100 if (VECTOR_MODE_P (mode) || mode == TImode)
6101 {
6102 /* User-created vectors small enough to fit in EAX. */
6103 if (size < 8)
6104 return 0;
6105
6106 /* MMX/3dNow values are returned in MM0,
6107 except when it doesn't exits. */
6108 if (size == 8)
6109 return (TARGET_MMX ? 0 : 1);
6110
6111 /* SSE values are returned in XMM0, except when it doesn't exist. */
6112 if (size == 16)
6113 return (TARGET_SSE ? 0 : 1);
6114 }
6115
6116 if (mode == XFmode)
6117 return 0;
6118
6119 if (size > 12)
6120 return 1;
6121 return 0;
6122 }
6123
6124 static int ATTRIBUTE_UNUSED
6125 return_in_memory_64 (const_tree type, enum machine_mode mode)
6126 {
6127 int needed_intregs, needed_sseregs;
6128 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6129 }
6130
6131 static int ATTRIBUTE_UNUSED
6132 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6133 {
6134 HOST_WIDE_INT size = int_size_in_bytes (type);
6135
6136 /* __m128 is returned in xmm0. */
6137 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6138 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6139 return 0;
6140
6141 /* Otherwise, the size must be exactly in [1248]. */
6142 return (size != 1 && size != 2 && size != 4 && size != 8);
6143 }
6144
6145 static bool
6146 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6147 {
6148 #ifdef SUBTARGET_RETURN_IN_MEMORY
6149 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6150 #else
6151 const enum machine_mode mode = type_natural_mode (type);
6152
6153 if (TARGET_64BIT_MS_ABI)
6154 return return_in_memory_ms_64 (type, mode);
6155 else if (TARGET_64BIT)
6156 return return_in_memory_64 (type, mode);
6157 else
6158 return return_in_memory_32 (type, mode);
6159 #endif
6160 }
6161
6162 /* Return false iff TYPE is returned in memory. This version is used
6163 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6164 but differs notably in that when MMX is available, 8-byte vectors
6165 are returned in memory, rather than in MMX registers. */
6166
6167 bool
6168 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6169 {
6170 int size;
6171 enum machine_mode mode = type_natural_mode (type);
6172
6173 if (TARGET_64BIT)
6174 return return_in_memory_64 (type, mode);
6175
6176 if (mode == BLKmode)
6177 return 1;
6178
6179 size = int_size_in_bytes (type);
6180
6181 if (VECTOR_MODE_P (mode))
6182 {
6183 /* Return in memory only if MMX registers *are* available. This
6184 seems backwards, but it is consistent with the existing
6185 Solaris x86 ABI. */
6186 if (size == 8)
6187 return TARGET_MMX;
6188 if (size == 16)
6189 return !TARGET_SSE;
6190 }
6191 else if (mode == TImode)
6192 return !TARGET_SSE;
6193 else if (mode == XFmode)
6194 return 0;
6195
6196 return size > 12;
6197 }
6198
6199 /* When returning SSE vector types, we have a choice of either
6200 (1) being abi incompatible with a -march switch, or
6201 (2) generating an error.
6202 Given no good solution, I think the safest thing is one warning.
6203 The user won't be able to use -Werror, but....
6204
6205 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6206 called in response to actually generating a caller or callee that
6207 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6208 via aggregate_value_p for general type probing from tree-ssa. */
6209
6210 static rtx
6211 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6212 {
6213 static bool warnedsse, warnedmmx;
6214
6215 if (!TARGET_64BIT && type)
6216 {
6217 /* Look at the return type of the function, not the function type. */
6218 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6219
6220 if (!TARGET_SSE && !warnedsse)
6221 {
6222 if (mode == TImode
6223 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6224 {
6225 warnedsse = true;
6226 warning (0, "SSE vector return without SSE enabled "
6227 "changes the ABI");
6228 }
6229 }
6230
6231 if (!TARGET_MMX && !warnedmmx)
6232 {
6233 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6234 {
6235 warnedmmx = true;
6236 warning (0, "MMX vector return without MMX enabled "
6237 "changes the ABI");
6238 }
6239 }
6240 }
6241
6242 return NULL;
6243 }
6244
6245 \f
6246 /* Create the va_list data type. */
6247
6248 /* Returns the calling convention specific va_list date type.
6249 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6250
6251 static tree
6252 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6253 {
6254 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6255
6256 /* For i386 we use plain pointer to argument area. */
6257 if (!TARGET_64BIT || abi == MS_ABI)
6258 return build_pointer_type (char_type_node);
6259
6260 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6261 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6262
6263 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6264 unsigned_type_node);
6265 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6266 unsigned_type_node);
6267 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6268 ptr_type_node);
6269 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6270 ptr_type_node);
6271
6272 va_list_gpr_counter_field = f_gpr;
6273 va_list_fpr_counter_field = f_fpr;
6274
6275 DECL_FIELD_CONTEXT (f_gpr) = record;
6276 DECL_FIELD_CONTEXT (f_fpr) = record;
6277 DECL_FIELD_CONTEXT (f_ovf) = record;
6278 DECL_FIELD_CONTEXT (f_sav) = record;
6279
6280 TREE_CHAIN (record) = type_decl;
6281 TYPE_NAME (record) = type_decl;
6282 TYPE_FIELDS (record) = f_gpr;
6283 TREE_CHAIN (f_gpr) = f_fpr;
6284 TREE_CHAIN (f_fpr) = f_ovf;
6285 TREE_CHAIN (f_ovf) = f_sav;
6286
6287 layout_type (record);
6288
6289 /* The correct type is an array type of one element. */
6290 return build_array_type (record, build_index_type (size_zero_node));
6291 }
6292
6293 /* Setup the builtin va_list data type and for 64-bit the additional
6294 calling convention specific va_list data types. */
6295
6296 static tree
6297 ix86_build_builtin_va_list (void)
6298 {
6299 tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
6300
6301 /* Initialize abi specific va_list builtin types. */
6302 if (TARGET_64BIT)
6303 {
6304 tree t;
6305 if (DEFAULT_ABI == MS_ABI)
6306 {
6307 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6308 if (TREE_CODE (t) != RECORD_TYPE)
6309 t = build_variant_type_copy (t);
6310 sysv_va_list_type_node = t;
6311 }
6312 else
6313 {
6314 t = ret;
6315 if (TREE_CODE (t) != RECORD_TYPE)
6316 t = build_variant_type_copy (t);
6317 sysv_va_list_type_node = t;
6318 }
6319 if (DEFAULT_ABI != MS_ABI)
6320 {
6321 t = ix86_build_builtin_va_list_abi (MS_ABI);
6322 if (TREE_CODE (t) != RECORD_TYPE)
6323 t = build_variant_type_copy (t);
6324 ms_va_list_type_node = t;
6325 }
6326 else
6327 {
6328 t = ret;
6329 if (TREE_CODE (t) != RECORD_TYPE)
6330 t = build_variant_type_copy (t);
6331 ms_va_list_type_node = t;
6332 }
6333 }
6334
6335 return ret;
6336 }
6337
6338 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6339
6340 static void
6341 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6342 {
6343 rtx save_area, mem;
6344 rtx label;
6345 rtx label_ref;
6346 rtx tmp_reg;
6347 rtx nsse_reg;
6348 alias_set_type set;
6349 int i;
6350 int regparm = ix86_regparm;
6351
6352 if (cum->call_abi != DEFAULT_ABI)
6353 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6354
6355 /* GPR size of varargs save area. */
6356 if (cfun->va_list_gpr_size)
6357 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6358 else
6359 ix86_varargs_gpr_size = 0;
6360
6361 /* FPR size of varargs save area. We don't need it if we don't pass
6362 anything in SSE registers. */
6363 if (cum->sse_nregs && cfun->va_list_fpr_size)
6364 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6365 else
6366 ix86_varargs_fpr_size = 0;
6367
6368 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6369 return;
6370
6371 save_area = frame_pointer_rtx;
6372 set = get_varargs_alias_set ();
6373
6374 for (i = cum->regno;
6375 i < regparm
6376 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6377 i++)
6378 {
6379 mem = gen_rtx_MEM (Pmode,
6380 plus_constant (save_area, i * UNITS_PER_WORD));
6381 MEM_NOTRAP_P (mem) = 1;
6382 set_mem_alias_set (mem, set);
6383 emit_move_insn (mem, gen_rtx_REG (Pmode,
6384 x86_64_int_parameter_registers[i]));
6385 }
6386
6387 if (ix86_varargs_fpr_size)
6388 {
6389 /* Now emit code to save SSE registers. The AX parameter contains number
6390 of SSE parameter registers used to call this function. We use
6391 sse_prologue_save insn template that produces computed jump across
6392 SSE saves. We need some preparation work to get this working. */
6393
6394 label = gen_label_rtx ();
6395 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6396
6397 /* Compute address to jump to :
6398 label - eax*4 + nnamed_sse_arguments*4 Or
6399 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6400 tmp_reg = gen_reg_rtx (Pmode);
6401 nsse_reg = gen_reg_rtx (Pmode);
6402 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6403 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6404 gen_rtx_MULT (Pmode, nsse_reg,
6405 GEN_INT (4))));
6406
6407 /* vmovaps is one byte longer than movaps. */
6408 if (TARGET_AVX)
6409 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6410 gen_rtx_PLUS (Pmode, tmp_reg,
6411 nsse_reg)));
6412
6413 if (cum->sse_regno)
6414 emit_move_insn
6415 (nsse_reg,
6416 gen_rtx_CONST (DImode,
6417 gen_rtx_PLUS (DImode,
6418 label_ref,
6419 GEN_INT (cum->sse_regno
6420 * (TARGET_AVX ? 5 : 4)))));
6421 else
6422 emit_move_insn (nsse_reg, label_ref);
6423 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6424
6425 /* Compute address of memory block we save into. We always use pointer
6426 pointing 127 bytes after first byte to store - this is needed to keep
6427 instruction size limited by 4 bytes (5 bytes for AVX) with one
6428 byte displacement. */
6429 tmp_reg = gen_reg_rtx (Pmode);
6430 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6431 plus_constant (save_area,
6432 ix86_varargs_gpr_size + 127)));
6433 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6434 MEM_NOTRAP_P (mem) = 1;
6435 set_mem_alias_set (mem, set);
6436 set_mem_align (mem, BITS_PER_WORD);
6437
6438 /* And finally do the dirty job! */
6439 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6440 GEN_INT (cum->sse_regno), label));
6441 }
6442 }
6443
6444 static void
6445 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6446 {
6447 alias_set_type set = get_varargs_alias_set ();
6448 int i;
6449
6450 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6451 {
6452 rtx reg, mem;
6453
6454 mem = gen_rtx_MEM (Pmode,
6455 plus_constant (virtual_incoming_args_rtx,
6456 i * UNITS_PER_WORD));
6457 MEM_NOTRAP_P (mem) = 1;
6458 set_mem_alias_set (mem, set);
6459
6460 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6461 emit_move_insn (mem, reg);
6462 }
6463 }
6464
6465 static void
6466 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6467 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6468 int no_rtl)
6469 {
6470 CUMULATIVE_ARGS next_cum;
6471 tree fntype;
6472
6473 /* This argument doesn't appear to be used anymore. Which is good,
6474 because the old code here didn't suppress rtl generation. */
6475 gcc_assert (!no_rtl);
6476
6477 if (!TARGET_64BIT)
6478 return;
6479
6480 fntype = TREE_TYPE (current_function_decl);
6481
6482 /* For varargs, we do not want to skip the dummy va_dcl argument.
6483 For stdargs, we do want to skip the last named argument. */
6484 next_cum = *cum;
6485 if (stdarg_p (fntype))
6486 function_arg_advance (&next_cum, mode, type, 1);
6487
6488 if (cum->call_abi == MS_ABI)
6489 setup_incoming_varargs_ms_64 (&next_cum);
6490 else
6491 setup_incoming_varargs_64 (&next_cum);
6492 }
6493
6494 /* Checks if TYPE is of kind va_list char *. */
6495
6496 static bool
6497 is_va_list_char_pointer (tree type)
6498 {
6499 tree canonic;
6500
6501 /* For 32-bit it is always true. */
6502 if (!TARGET_64BIT)
6503 return true;
6504 canonic = ix86_canonical_va_list_type (type);
6505 return (canonic == ms_va_list_type_node
6506 || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
6507 }
6508
6509 /* Implement va_start. */
6510
6511 static void
6512 ix86_va_start (tree valist, rtx nextarg)
6513 {
6514 HOST_WIDE_INT words, n_gpr, n_fpr;
6515 tree f_gpr, f_fpr, f_ovf, f_sav;
6516 tree gpr, fpr, ovf, sav, t;
6517 tree type;
6518
6519 /* Only 64bit target needs something special. */
6520 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6521 {
6522 std_expand_builtin_va_start (valist, nextarg);
6523 return;
6524 }
6525
6526 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6527 f_fpr = TREE_CHAIN (f_gpr);
6528 f_ovf = TREE_CHAIN (f_fpr);
6529 f_sav = TREE_CHAIN (f_ovf);
6530
6531 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6532 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6533 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6534 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6535 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6536
6537 /* Count number of gp and fp argument registers used. */
6538 words = crtl->args.info.words;
6539 n_gpr = crtl->args.info.regno;
6540 n_fpr = crtl->args.info.sse_regno;
6541
6542 if (cfun->va_list_gpr_size)
6543 {
6544 type = TREE_TYPE (gpr);
6545 t = build2 (MODIFY_EXPR, type,
6546 gpr, build_int_cst (type, n_gpr * 8));
6547 TREE_SIDE_EFFECTS (t) = 1;
6548 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6549 }
6550
6551 if (TARGET_SSE && cfun->va_list_fpr_size)
6552 {
6553 type = TREE_TYPE (fpr);
6554 t = build2 (MODIFY_EXPR, type, fpr,
6555 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6556 TREE_SIDE_EFFECTS (t) = 1;
6557 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6558 }
6559
6560 /* Find the overflow area. */
6561 type = TREE_TYPE (ovf);
6562 t = make_tree (type, crtl->args.internal_arg_pointer);
6563 if (words != 0)
6564 t = build2 (POINTER_PLUS_EXPR, type, t,
6565 size_int (words * UNITS_PER_WORD));
6566 t = build2 (MODIFY_EXPR, type, ovf, t);
6567 TREE_SIDE_EFFECTS (t) = 1;
6568 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6569
6570 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6571 {
6572 /* Find the register save area.
6573 Prologue of the function save it right above stack frame. */
6574 type = TREE_TYPE (sav);
6575 t = make_tree (type, frame_pointer_rtx);
6576 if (!ix86_varargs_gpr_size)
6577 t = build2 (POINTER_PLUS_EXPR, type, t,
6578 size_int (-8 * X86_64_REGPARM_MAX));
6579 t = build2 (MODIFY_EXPR, type, sav, t);
6580 TREE_SIDE_EFFECTS (t) = 1;
6581 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6582 }
6583 }
6584
6585 /* Implement va_arg. */
6586
6587 static tree
6588 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6589 gimple_seq *post_p)
6590 {
6591 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6592 tree f_gpr, f_fpr, f_ovf, f_sav;
6593 tree gpr, fpr, ovf, sav, t;
6594 int size, rsize;
6595 tree lab_false, lab_over = NULL_TREE;
6596 tree addr, t2;
6597 rtx container;
6598 int indirect_p = 0;
6599 tree ptrtype;
6600 enum machine_mode nat_mode;
6601 int arg_boundary;
6602
6603 /* Only 64bit target needs something special. */
6604 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6605 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6606
6607 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6608 f_fpr = TREE_CHAIN (f_gpr);
6609 f_ovf = TREE_CHAIN (f_fpr);
6610 f_sav = TREE_CHAIN (f_ovf);
6611
6612 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6613 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6614 valist = build_va_arg_indirect_ref (valist);
6615 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6616 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6617 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6618
6619 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6620 if (indirect_p)
6621 type = build_pointer_type (type);
6622 size = int_size_in_bytes (type);
6623 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6624
6625 nat_mode = type_natural_mode (type);
6626 switch (nat_mode)
6627 {
6628 case V8SFmode:
6629 case V8SImode:
6630 case V32QImode:
6631 case V16HImode:
6632 case V4DFmode:
6633 case V4DImode:
6634 /* Unnamed 256bit vector mode parameters are passed on stack. */
6635 if (ix86_cfun_abi () == SYSV_ABI)
6636 {
6637 container = NULL;
6638 break;
6639 }
6640
6641 default:
6642 container = construct_container (nat_mode, TYPE_MODE (type),
6643 type, 0, X86_64_REGPARM_MAX,
6644 X86_64_SSE_REGPARM_MAX, intreg,
6645 0);
6646 break;
6647 }
6648
6649 /* Pull the value out of the saved registers. */
6650
6651 addr = create_tmp_var (ptr_type_node, "addr");
6652 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6653
6654 if (container)
6655 {
6656 int needed_intregs, needed_sseregs;
6657 bool need_temp;
6658 tree int_addr, sse_addr;
6659
6660 lab_false = create_artificial_label ();
6661 lab_over = create_artificial_label ();
6662
6663 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6664
6665 need_temp = (!REG_P (container)
6666 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6667 || TYPE_ALIGN (type) > 128));
6668
6669 /* In case we are passing structure, verify that it is consecutive block
6670 on the register save area. If not we need to do moves. */
6671 if (!need_temp && !REG_P (container))
6672 {
6673 /* Verify that all registers are strictly consecutive */
6674 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6675 {
6676 int i;
6677
6678 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6679 {
6680 rtx slot = XVECEXP (container, 0, i);
6681 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6682 || INTVAL (XEXP (slot, 1)) != i * 16)
6683 need_temp = 1;
6684 }
6685 }
6686 else
6687 {
6688 int i;
6689
6690 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6691 {
6692 rtx slot = XVECEXP (container, 0, i);
6693 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6694 || INTVAL (XEXP (slot, 1)) != i * 8)
6695 need_temp = 1;
6696 }
6697 }
6698 }
6699 if (!need_temp)
6700 {
6701 int_addr = addr;
6702 sse_addr = addr;
6703 }
6704 else
6705 {
6706 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6707 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6708 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6709 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6710 }
6711
6712 /* First ensure that we fit completely in registers. */
6713 if (needed_intregs)
6714 {
6715 t = build_int_cst (TREE_TYPE (gpr),
6716 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6717 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6718 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6719 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6720 gimplify_and_add (t, pre_p);
6721 }
6722 if (needed_sseregs)
6723 {
6724 t = build_int_cst (TREE_TYPE (fpr),
6725 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6726 + X86_64_REGPARM_MAX * 8);
6727 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6728 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6729 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6730 gimplify_and_add (t, pre_p);
6731 }
6732
6733 /* Compute index to start of area used for integer regs. */
6734 if (needed_intregs)
6735 {
6736 /* int_addr = gpr + sav; */
6737 t = fold_convert (sizetype, gpr);
6738 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6739 gimplify_assign (int_addr, t, pre_p);
6740 }
6741 if (needed_sseregs)
6742 {
6743 /* sse_addr = fpr + sav; */
6744 t = fold_convert (sizetype, fpr);
6745 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6746 gimplify_assign (sse_addr, t, pre_p);
6747 }
6748 if (need_temp)
6749 {
6750 int i;
6751 tree temp = create_tmp_var (type, "va_arg_tmp");
6752
6753 /* addr = &temp; */
6754 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6755 gimplify_assign (addr, t, pre_p);
6756
6757 for (i = 0; i < XVECLEN (container, 0); i++)
6758 {
6759 rtx slot = XVECEXP (container, 0, i);
6760 rtx reg = XEXP (slot, 0);
6761 enum machine_mode mode = GET_MODE (reg);
6762 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6763 tree addr_type = build_pointer_type (piece_type);
6764 tree daddr_type = build_pointer_type_for_mode (piece_type,
6765 ptr_mode, true);
6766 tree src_addr, src;
6767 int src_offset;
6768 tree dest_addr, dest;
6769
6770 if (SSE_REGNO_P (REGNO (reg)))
6771 {
6772 src_addr = sse_addr;
6773 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
6774 }
6775 else
6776 {
6777 src_addr = int_addr;
6778 src_offset = REGNO (reg) * 8;
6779 }
6780 src_addr = fold_convert (addr_type, src_addr);
6781 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
6782 size_int (src_offset));
6783 src = build_va_arg_indirect_ref (src_addr);
6784
6785 dest_addr = fold_convert (daddr_type, addr);
6786 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
6787 size_int (INTVAL (XEXP (slot, 1))));
6788 dest = build_va_arg_indirect_ref (dest_addr);
6789
6790 gimplify_assign (dest, src, pre_p);
6791 }
6792 }
6793
6794 if (needed_intregs)
6795 {
6796 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
6797 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
6798 gimplify_assign (gpr, t, pre_p);
6799 }
6800
6801 if (needed_sseregs)
6802 {
6803 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
6804 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
6805 gimplify_assign (fpr, t, pre_p);
6806 }
6807
6808 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
6809
6810 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
6811 }
6812
6813 /* ... otherwise out of the overflow area. */
6814
6815 /* When we align parameter on stack for caller, if the parameter
6816 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
6817 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
6818 here with caller. */
6819 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
6820 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
6821 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
6822
6823 /* Care for on-stack alignment if needed. */
6824 if (arg_boundary <= 64
6825 || integer_zerop (TYPE_SIZE (type)))
6826 t = ovf;
6827 else
6828 {
6829 HOST_WIDE_INT align = arg_boundary / 8;
6830 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
6831 size_int (align - 1));
6832 t = fold_convert (sizetype, t);
6833 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6834 size_int (-align));
6835 t = fold_convert (TREE_TYPE (ovf), t);
6836 }
6837 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
6838 gimplify_assign (addr, t, pre_p);
6839
6840 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
6841 size_int (rsize * UNITS_PER_WORD));
6842 gimplify_assign (unshare_expr (ovf), t, pre_p);
6843
6844 if (container)
6845 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
6846
6847 ptrtype = build_pointer_type (type);
6848 addr = fold_convert (ptrtype, addr);
6849
6850 if (indirect_p)
6851 addr = build_va_arg_indirect_ref (addr);
6852 return build_va_arg_indirect_ref (addr);
6853 }
6854 \f
6855 /* Return nonzero if OPNUM's MEM should be matched
6856 in movabs* patterns. */
6857
6858 int
6859 ix86_check_movabs (rtx insn, int opnum)
6860 {
6861 rtx set, mem;
6862
6863 set = PATTERN (insn);
6864 if (GET_CODE (set) == PARALLEL)
6865 set = XVECEXP (set, 0, 0);
6866 gcc_assert (GET_CODE (set) == SET);
6867 mem = XEXP (set, opnum);
6868 while (GET_CODE (mem) == SUBREG)
6869 mem = SUBREG_REG (mem);
6870 gcc_assert (MEM_P (mem));
6871 return (volatile_ok || !MEM_VOLATILE_P (mem));
6872 }
6873 \f
6874 /* Initialize the table of extra 80387 mathematical constants. */
6875
6876 static void
6877 init_ext_80387_constants (void)
6878 {
6879 static const char * cst[5] =
6880 {
6881 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
6882 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
6883 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
6884 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
6885 "3.1415926535897932385128089594061862044", /* 4: fldpi */
6886 };
6887 int i;
6888
6889 for (i = 0; i < 5; i++)
6890 {
6891 real_from_string (&ext_80387_constants_table[i], cst[i]);
6892 /* Ensure each constant is rounded to XFmode precision. */
6893 real_convert (&ext_80387_constants_table[i],
6894 XFmode, &ext_80387_constants_table[i]);
6895 }
6896
6897 ext_80387_constants_init = 1;
6898 }
6899
6900 /* Return true if the constant is something that can be loaded with
6901 a special instruction. */
6902
6903 int
6904 standard_80387_constant_p (rtx x)
6905 {
6906 enum machine_mode mode = GET_MODE (x);
6907
6908 REAL_VALUE_TYPE r;
6909
6910 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
6911 return -1;
6912
6913 if (x == CONST0_RTX (mode))
6914 return 1;
6915 if (x == CONST1_RTX (mode))
6916 return 2;
6917
6918 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6919
6920 /* For XFmode constants, try to find a special 80387 instruction when
6921 optimizing for size or on those CPUs that benefit from them. */
6922 if (mode == XFmode
6923 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
6924 {
6925 int i;
6926
6927 if (! ext_80387_constants_init)
6928 init_ext_80387_constants ();
6929
6930 for (i = 0; i < 5; i++)
6931 if (real_identical (&r, &ext_80387_constants_table[i]))
6932 return i + 3;
6933 }
6934
6935 /* Load of the constant -0.0 or -1.0 will be split as
6936 fldz;fchs or fld1;fchs sequence. */
6937 if (real_isnegzero (&r))
6938 return 8;
6939 if (real_identical (&r, &dconstm1))
6940 return 9;
6941
6942 return 0;
6943 }
6944
6945 /* Return the opcode of the special instruction to be used to load
6946 the constant X. */
6947
6948 const char *
6949 standard_80387_constant_opcode (rtx x)
6950 {
6951 switch (standard_80387_constant_p (x))
6952 {
6953 case 1:
6954 return "fldz";
6955 case 2:
6956 return "fld1";
6957 case 3:
6958 return "fldlg2";
6959 case 4:
6960 return "fldln2";
6961 case 5:
6962 return "fldl2e";
6963 case 6:
6964 return "fldl2t";
6965 case 7:
6966 return "fldpi";
6967 case 8:
6968 case 9:
6969 return "#";
6970 default:
6971 gcc_unreachable ();
6972 }
6973 }
6974
6975 /* Return the CONST_DOUBLE representing the 80387 constant that is
6976 loaded by the specified special instruction. The argument IDX
6977 matches the return value from standard_80387_constant_p. */
6978
6979 rtx
6980 standard_80387_constant_rtx (int idx)
6981 {
6982 int i;
6983
6984 if (! ext_80387_constants_init)
6985 init_ext_80387_constants ();
6986
6987 switch (idx)
6988 {
6989 case 3:
6990 case 4:
6991 case 5:
6992 case 6:
6993 case 7:
6994 i = idx - 3;
6995 break;
6996
6997 default:
6998 gcc_unreachable ();
6999 }
7000
7001 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7002 XFmode);
7003 }
7004
7005 /* Return 1 if mode is a valid mode for sse. */
7006 static int
7007 standard_sse_mode_p (enum machine_mode mode)
7008 {
7009 switch (mode)
7010 {
7011 case V16QImode:
7012 case V8HImode:
7013 case V4SImode:
7014 case V2DImode:
7015 case V4SFmode:
7016 case V2DFmode:
7017 return 1;
7018
7019 default:
7020 return 0;
7021 }
7022 }
7023
7024 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7025 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7026 modes and AVX is enabled. */
7027
7028 int
7029 standard_sse_constant_p (rtx x)
7030 {
7031 enum machine_mode mode = GET_MODE (x);
7032
7033 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7034 return 1;
7035 if (vector_all_ones_operand (x, mode))
7036 {
7037 if (standard_sse_mode_p (mode))
7038 return TARGET_SSE2 ? 2 : -2;
7039 else if (VALID_AVX256_REG_MODE (mode))
7040 return TARGET_AVX ? 3 : -3;
7041 }
7042
7043 return 0;
7044 }
7045
7046 /* Return the opcode of the special instruction to be used to load
7047 the constant X. */
7048
7049 const char *
7050 standard_sse_constant_opcode (rtx insn, rtx x)
7051 {
7052 switch (standard_sse_constant_p (x))
7053 {
7054 case 1:
7055 switch (get_attr_mode (insn))
7056 {
7057 case MODE_V4SF:
7058 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7059 case MODE_V2DF:
7060 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7061 case MODE_TI:
7062 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7063 case MODE_V8SF:
7064 return "vxorps\t%x0, %x0, %x0";
7065 case MODE_V4DF:
7066 return "vxorpd\t%x0, %x0, %x0";
7067 case MODE_OI:
7068 return "vpxor\t%x0, %x0, %x0";
7069 default:
7070 gcc_unreachable ();
7071 }
7072 case 2:
7073 if (TARGET_AVX)
7074 switch (get_attr_mode (insn))
7075 {
7076 case MODE_V4SF:
7077 case MODE_V2DF:
7078 case MODE_TI:
7079 return "vpcmpeqd\t%0, %0, %0";
7080 break;
7081 default:
7082 gcc_unreachable ();
7083 }
7084 else
7085 return "pcmpeqd\t%0, %0";
7086 }
7087 gcc_unreachable ();
7088 }
7089
7090 /* Returns 1 if OP contains a symbol reference */
7091
7092 int
7093 symbolic_reference_mentioned_p (rtx op)
7094 {
7095 const char *fmt;
7096 int i;
7097
7098 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7099 return 1;
7100
7101 fmt = GET_RTX_FORMAT (GET_CODE (op));
7102 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7103 {
7104 if (fmt[i] == 'E')
7105 {
7106 int j;
7107
7108 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7109 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7110 return 1;
7111 }
7112
7113 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7114 return 1;
7115 }
7116
7117 return 0;
7118 }
7119
7120 /* Return 1 if it is appropriate to emit `ret' instructions in the
7121 body of a function. Do this only if the epilogue is simple, needing a
7122 couple of insns. Prior to reloading, we can't tell how many registers
7123 must be saved, so return 0 then. Return 0 if there is no frame
7124 marker to de-allocate. */
7125
7126 int
7127 ix86_can_use_return_insn_p (void)
7128 {
7129 struct ix86_frame frame;
7130
7131 if (! reload_completed || frame_pointer_needed)
7132 return 0;
7133
7134 /* Don't allow more than 32 pop, since that's all we can do
7135 with one instruction. */
7136 if (crtl->args.pops_args
7137 && crtl->args.size >= 32768)
7138 return 0;
7139
7140 ix86_compute_frame_layout (&frame);
7141 return frame.to_allocate == 0 && frame.nregs == 0;
7142 }
7143 \f
7144 /* Value should be nonzero if functions must have frame pointers.
7145 Zero means the frame pointer need not be set up (and parms may
7146 be accessed via the stack pointer) in functions that seem suitable. */
7147
7148 int
7149 ix86_frame_pointer_required (void)
7150 {
7151 /* If we accessed previous frames, then the generated code expects
7152 to be able to access the saved ebp value in our frame. */
7153 if (cfun->machine->accesses_prev_frame)
7154 return 1;
7155
7156 /* Several x86 os'es need a frame pointer for other reasons,
7157 usually pertaining to setjmp. */
7158 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7159 return 1;
7160
7161 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7162 the frame pointer by default. Turn it back on now if we've not
7163 got a leaf function. */
7164 if (TARGET_OMIT_LEAF_FRAME_POINTER
7165 && (!current_function_is_leaf
7166 || ix86_current_function_calls_tls_descriptor))
7167 return 1;
7168
7169 if (crtl->profile)
7170 return 1;
7171
7172 return 0;
7173 }
7174
7175 /* Record that the current function accesses previous call frames. */
7176
7177 void
7178 ix86_setup_frame_addresses (void)
7179 {
7180 cfun->machine->accesses_prev_frame = 1;
7181 }
7182 \f
7183 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7184 # define USE_HIDDEN_LINKONCE 1
7185 #else
7186 # define USE_HIDDEN_LINKONCE 0
7187 #endif
7188
7189 static int pic_labels_used;
7190
7191 /* Fills in the label name that should be used for a pc thunk for
7192 the given register. */
7193
7194 static void
7195 get_pc_thunk_name (char name[32], unsigned int regno)
7196 {
7197 gcc_assert (!TARGET_64BIT);
7198
7199 if (USE_HIDDEN_LINKONCE)
7200 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7201 else
7202 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7203 }
7204
7205
7206 /* This function generates code for -fpic that loads %ebx with
7207 the return address of the caller and then returns. */
7208
7209 void
7210 ix86_file_end (void)
7211 {
7212 rtx xops[2];
7213 int regno;
7214
7215 for (regno = 0; regno < 8; ++regno)
7216 {
7217 char name[32];
7218
7219 if (! ((pic_labels_used >> regno) & 1))
7220 continue;
7221
7222 get_pc_thunk_name (name, regno);
7223
7224 #if TARGET_MACHO
7225 if (TARGET_MACHO)
7226 {
7227 switch_to_section (darwin_sections[text_coal_section]);
7228 fputs ("\t.weak_definition\t", asm_out_file);
7229 assemble_name (asm_out_file, name);
7230 fputs ("\n\t.private_extern\t", asm_out_file);
7231 assemble_name (asm_out_file, name);
7232 fputs ("\n", asm_out_file);
7233 ASM_OUTPUT_LABEL (asm_out_file, name);
7234 }
7235 else
7236 #endif
7237 if (USE_HIDDEN_LINKONCE)
7238 {
7239 tree decl;
7240
7241 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7242 error_mark_node);
7243 TREE_PUBLIC (decl) = 1;
7244 TREE_STATIC (decl) = 1;
7245 DECL_ONE_ONLY (decl) = 1;
7246
7247 (*targetm.asm_out.unique_section) (decl, 0);
7248 switch_to_section (get_named_section (decl, NULL, 0));
7249
7250 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7251 fputs ("\t.hidden\t", asm_out_file);
7252 assemble_name (asm_out_file, name);
7253 fputc ('\n', asm_out_file);
7254 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7255 }
7256 else
7257 {
7258 switch_to_section (text_section);
7259 ASM_OUTPUT_LABEL (asm_out_file, name);
7260 }
7261
7262 xops[0] = gen_rtx_REG (Pmode, regno);
7263 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7264 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7265 output_asm_insn ("ret", xops);
7266 }
7267
7268 if (NEED_INDICATE_EXEC_STACK)
7269 file_end_indicate_exec_stack ();
7270 }
7271
7272 /* Emit code for the SET_GOT patterns. */
7273
7274 const char *
7275 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7276 {
7277 rtx xops[3];
7278
7279 xops[0] = dest;
7280
7281 if (TARGET_VXWORKS_RTP && flag_pic)
7282 {
7283 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7284 xops[2] = gen_rtx_MEM (Pmode,
7285 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7286 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7287
7288 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7289 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7290 an unadorned address. */
7291 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7292 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7293 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7294 return "";
7295 }
7296
7297 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7298
7299 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7300 {
7301 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7302
7303 if (!flag_pic)
7304 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7305 else
7306 output_asm_insn ("call\t%a2", xops);
7307
7308 #if TARGET_MACHO
7309 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7310 is what will be referenced by the Mach-O PIC subsystem. */
7311 if (!label)
7312 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7313 #endif
7314
7315 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7316 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7317
7318 if (flag_pic)
7319 output_asm_insn ("pop%z0\t%0", xops);
7320 }
7321 else
7322 {
7323 char name[32];
7324 get_pc_thunk_name (name, REGNO (dest));
7325 pic_labels_used |= 1 << REGNO (dest);
7326
7327 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7328 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7329 output_asm_insn ("call\t%X2", xops);
7330 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7331 is what will be referenced by the Mach-O PIC subsystem. */
7332 #if TARGET_MACHO
7333 if (!label)
7334 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7335 else
7336 targetm.asm_out.internal_label (asm_out_file, "L",
7337 CODE_LABEL_NUMBER (label));
7338 #endif
7339 }
7340
7341 if (TARGET_MACHO)
7342 return "";
7343
7344 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7345 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7346 else
7347 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7348
7349 return "";
7350 }
7351
7352 /* Generate an "push" pattern for input ARG. */
7353
7354 static rtx
7355 gen_push (rtx arg)
7356 {
7357 return gen_rtx_SET (VOIDmode,
7358 gen_rtx_MEM (Pmode,
7359 gen_rtx_PRE_DEC (Pmode,
7360 stack_pointer_rtx)),
7361 arg);
7362 }
7363
7364 /* Return >= 0 if there is an unused call-clobbered register available
7365 for the entire function. */
7366
7367 static unsigned int
7368 ix86_select_alt_pic_regnum (void)
7369 {
7370 if (current_function_is_leaf && !crtl->profile
7371 && !ix86_current_function_calls_tls_descriptor)
7372 {
7373 int i, drap;
7374 /* Can't use the same register for both PIC and DRAP. */
7375 if (crtl->drap_reg)
7376 drap = REGNO (crtl->drap_reg);
7377 else
7378 drap = -1;
7379 for (i = 2; i >= 0; --i)
7380 if (i != drap && !df_regs_ever_live_p (i))
7381 return i;
7382 }
7383
7384 return INVALID_REGNUM;
7385 }
7386
7387 /* Return 1 if we need to save REGNO. */
7388 static int
7389 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7390 {
7391 if (pic_offset_table_rtx
7392 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7393 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7394 || crtl->profile
7395 || crtl->calls_eh_return
7396 || crtl->uses_const_pool))
7397 {
7398 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7399 return 0;
7400 return 1;
7401 }
7402
7403 if (crtl->calls_eh_return && maybe_eh_return)
7404 {
7405 unsigned i;
7406 for (i = 0; ; i++)
7407 {
7408 unsigned test = EH_RETURN_DATA_REGNO (i);
7409 if (test == INVALID_REGNUM)
7410 break;
7411 if (test == regno)
7412 return 1;
7413 }
7414 }
7415
7416 if (crtl->drap_reg
7417 && regno == REGNO (crtl->drap_reg))
7418 return 1;
7419
7420 return (df_regs_ever_live_p (regno)
7421 && !call_used_regs[regno]
7422 && !fixed_regs[regno]
7423 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7424 }
7425
7426 /* Return number of saved general prupose registers. */
7427
7428 static int
7429 ix86_nsaved_regs (void)
7430 {
7431 int nregs = 0;
7432 int regno;
7433
7434 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7435 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7436 nregs ++;
7437 return nregs;
7438 }
7439
7440 /* Return number of saved SSE registrers. */
7441
7442 static int
7443 ix86_nsaved_sseregs (void)
7444 {
7445 int nregs = 0;
7446 int regno;
7447
7448 if (ix86_cfun_abi () != MS_ABI)
7449 return 0;
7450 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7451 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7452 nregs ++;
7453 return nregs;
7454 }
7455
7456 /* Given FROM and TO register numbers, say whether this elimination is
7457 allowed. If stack alignment is needed, we can only replace argument
7458 pointer with hard frame pointer, or replace frame pointer with stack
7459 pointer. Otherwise, frame pointer elimination is automatically
7460 handled and all other eliminations are valid. */
7461
7462 int
7463 ix86_can_eliminate (int from, int to)
7464 {
7465 if (stack_realign_fp)
7466 return ((from == ARG_POINTER_REGNUM
7467 && to == HARD_FRAME_POINTER_REGNUM)
7468 || (from == FRAME_POINTER_REGNUM
7469 && to == STACK_POINTER_REGNUM));
7470 else
7471 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7472 }
7473
7474 /* Return the offset between two registers, one to be eliminated, and the other
7475 its replacement, at the start of a routine. */
7476
7477 HOST_WIDE_INT
7478 ix86_initial_elimination_offset (int from, int to)
7479 {
7480 struct ix86_frame frame;
7481 ix86_compute_frame_layout (&frame);
7482
7483 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7484 return frame.hard_frame_pointer_offset;
7485 else if (from == FRAME_POINTER_REGNUM
7486 && to == HARD_FRAME_POINTER_REGNUM)
7487 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7488 else
7489 {
7490 gcc_assert (to == STACK_POINTER_REGNUM);
7491
7492 if (from == ARG_POINTER_REGNUM)
7493 return frame.stack_pointer_offset;
7494
7495 gcc_assert (from == FRAME_POINTER_REGNUM);
7496 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7497 }
7498 }
7499
7500 /* Fill structure ix86_frame about frame of currently computed function. */
7501
7502 static void
7503 ix86_compute_frame_layout (struct ix86_frame *frame)
7504 {
7505 HOST_WIDE_INT total_size;
7506 unsigned int stack_alignment_needed;
7507 HOST_WIDE_INT offset;
7508 unsigned int preferred_alignment;
7509 HOST_WIDE_INT size = get_frame_size ();
7510
7511 frame->nregs = ix86_nsaved_regs ();
7512 frame->nsseregs = ix86_nsaved_sseregs ();
7513 total_size = size;
7514
7515 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7516 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7517
7518 /* MS ABI seem to require stack alignment to be always 16 except for function
7519 prologues. */
7520 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7521 {
7522 preferred_alignment = 16;
7523 stack_alignment_needed = 16;
7524 crtl->preferred_stack_boundary = 128;
7525 crtl->stack_alignment_needed = 128;
7526 }
7527
7528 gcc_assert (!size || stack_alignment_needed);
7529 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7530 gcc_assert (preferred_alignment <= stack_alignment_needed);
7531
7532 /* During reload iteration the amount of registers saved can change.
7533 Recompute the value as needed. Do not recompute when amount of registers
7534 didn't change as reload does multiple calls to the function and does not
7535 expect the decision to change within single iteration. */
7536 if (!optimize_function_for_size_p (cfun)
7537 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7538 {
7539 int count = frame->nregs;
7540
7541 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7542 /* The fast prologue uses move instead of push to save registers. This
7543 is significantly longer, but also executes faster as modern hardware
7544 can execute the moves in parallel, but can't do that for push/pop.
7545
7546 Be careful about choosing what prologue to emit: When function takes
7547 many instructions to execute we may use slow version as well as in
7548 case function is known to be outside hot spot (this is known with
7549 feedback only). Weight the size of function by number of registers
7550 to save as it is cheap to use one or two push instructions but very
7551 slow to use many of them. */
7552 if (count)
7553 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7554 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7555 || (flag_branch_probabilities
7556 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7557 cfun->machine->use_fast_prologue_epilogue = false;
7558 else
7559 cfun->machine->use_fast_prologue_epilogue
7560 = !expensive_function_p (count);
7561 }
7562 if (TARGET_PROLOGUE_USING_MOVE
7563 && cfun->machine->use_fast_prologue_epilogue)
7564 frame->save_regs_using_mov = true;
7565 else
7566 frame->save_regs_using_mov = false;
7567
7568
7569 /* Skip return address and saved base pointer. */
7570 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7571
7572 frame->hard_frame_pointer_offset = offset;
7573
7574 /* Set offset to aligned because the realigned frame starts from
7575 here. */
7576 if (stack_realign_fp)
7577 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7578
7579 /* Register save area */
7580 offset += frame->nregs * UNITS_PER_WORD;
7581
7582 /* Align SSE reg save area. */
7583 if (frame->nsseregs)
7584 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7585 else
7586 frame->padding0 = 0;
7587
7588 /* SSE register save area. */
7589 offset += frame->padding0 + frame->nsseregs * 16;
7590
7591 /* Va-arg area */
7592 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7593 offset += frame->va_arg_size;
7594
7595 /* Align start of frame for local function. */
7596 frame->padding1 = ((offset + stack_alignment_needed - 1)
7597 & -stack_alignment_needed) - offset;
7598
7599 offset += frame->padding1;
7600
7601 /* Frame pointer points here. */
7602 frame->frame_pointer_offset = offset;
7603
7604 offset += size;
7605
7606 /* Add outgoing arguments area. Can be skipped if we eliminated
7607 all the function calls as dead code.
7608 Skipping is however impossible when function calls alloca. Alloca
7609 expander assumes that last crtl->outgoing_args_size
7610 of stack frame are unused. */
7611 if (ACCUMULATE_OUTGOING_ARGS
7612 && (!current_function_is_leaf || cfun->calls_alloca
7613 || ix86_current_function_calls_tls_descriptor))
7614 {
7615 offset += crtl->outgoing_args_size;
7616 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7617 }
7618 else
7619 frame->outgoing_arguments_size = 0;
7620
7621 /* Align stack boundary. Only needed if we're calling another function
7622 or using alloca. */
7623 if (!current_function_is_leaf || cfun->calls_alloca
7624 || ix86_current_function_calls_tls_descriptor)
7625 frame->padding2 = ((offset + preferred_alignment - 1)
7626 & -preferred_alignment) - offset;
7627 else
7628 frame->padding2 = 0;
7629
7630 offset += frame->padding2;
7631
7632 /* We've reached end of stack frame. */
7633 frame->stack_pointer_offset = offset;
7634
7635 /* Size prologue needs to allocate. */
7636 frame->to_allocate =
7637 (size + frame->padding1 + frame->padding2
7638 + frame->outgoing_arguments_size + frame->va_arg_size);
7639
7640 if ((!frame->to_allocate && frame->nregs <= 1)
7641 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7642 frame->save_regs_using_mov = false;
7643
7644 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7645 && current_function_is_leaf
7646 && !ix86_current_function_calls_tls_descriptor)
7647 {
7648 frame->red_zone_size = frame->to_allocate;
7649 if (frame->save_regs_using_mov)
7650 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7651 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7652 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7653 }
7654 else
7655 frame->red_zone_size = 0;
7656 frame->to_allocate -= frame->red_zone_size;
7657 frame->stack_pointer_offset -= frame->red_zone_size;
7658 #if 0
7659 fprintf (stderr, "\n");
7660 fprintf (stderr, "size: %ld\n", (long)size);
7661 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7662 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7663 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7664 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7665 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7666 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7667 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7668 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7669 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7670 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7671 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7672 (long)frame->hard_frame_pointer_offset);
7673 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7674 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7675 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7676 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7677 #endif
7678 }
7679
7680 /* Emit code to save registers in the prologue. */
7681
7682 static void
7683 ix86_emit_save_regs (void)
7684 {
7685 unsigned int regno;
7686 rtx insn;
7687
7688 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7689 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7690 {
7691 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7692 RTX_FRAME_RELATED_P (insn) = 1;
7693 }
7694 }
7695
7696 /* Emit code to save registers using MOV insns. First register
7697 is restored from POINTER + OFFSET. */
7698 static void
7699 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7700 {
7701 unsigned int regno;
7702 rtx insn;
7703
7704 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7705 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7706 {
7707 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7708 Pmode, offset),
7709 gen_rtx_REG (Pmode, regno));
7710 RTX_FRAME_RELATED_P (insn) = 1;
7711 offset += UNITS_PER_WORD;
7712 }
7713 }
7714
7715 /* Emit code to save registers using MOV insns. First register
7716 is restored from POINTER + OFFSET. */
7717 static void
7718 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7719 {
7720 unsigned int regno;
7721 rtx insn;
7722 rtx mem;
7723
7724 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7725 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7726 {
7727 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7728 set_mem_align (mem, 128);
7729 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
7730 RTX_FRAME_RELATED_P (insn) = 1;
7731 offset += 16;
7732 }
7733 }
7734
7735 /* Expand prologue or epilogue stack adjustment.
7736 The pattern exist to put a dependency on all ebp-based memory accesses.
7737 STYLE should be negative if instructions should be marked as frame related,
7738 zero if %r11 register is live and cannot be freely used and positive
7739 otherwise. */
7740
7741 static void
7742 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7743 {
7744 rtx insn;
7745
7746 if (! TARGET_64BIT)
7747 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7748 else if (x86_64_immediate_operand (offset, DImode))
7749 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7750 else
7751 {
7752 rtx r11;
7753 /* r11 is used by indirect sibcall return as well, set before the
7754 epilogue and used after the epilogue. ATM indirect sibcall
7755 shouldn't be used together with huge frame sizes in one
7756 function because of the frame_size check in sibcall.c. */
7757 gcc_assert (style);
7758 r11 = gen_rtx_REG (DImode, R11_REG);
7759 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
7760 if (style < 0)
7761 RTX_FRAME_RELATED_P (insn) = 1;
7762 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
7763 offset));
7764 }
7765 if (style < 0)
7766 RTX_FRAME_RELATED_P (insn) = 1;
7767 }
7768
7769 /* Find an available register to be used as dynamic realign argument
7770 pointer regsiter. Such a register will be written in prologue and
7771 used in begin of body, so it must not be
7772 1. parameter passing register.
7773 2. GOT pointer.
7774 We reuse static-chain register if it is available. Otherwise, we
7775 use DI for i386 and R13 for x86-64. We chose R13 since it has
7776 shorter encoding.
7777
7778 Return: the regno of chosen register. */
7779
7780 static unsigned int
7781 find_drap_reg (void)
7782 {
7783 tree decl = cfun->decl;
7784
7785 if (TARGET_64BIT)
7786 {
7787 /* Use R13 for nested function or function need static chain.
7788 Since function with tail call may use any caller-saved
7789 registers in epilogue, DRAP must not use caller-saved
7790 register in such case. */
7791 if ((decl_function_context (decl)
7792 && !DECL_NO_STATIC_CHAIN (decl))
7793 || crtl->tail_call_emit)
7794 return R13_REG;
7795
7796 return R10_REG;
7797 }
7798 else
7799 {
7800 /* Use DI for nested function or function need static chain.
7801 Since function with tail call may use any caller-saved
7802 registers in epilogue, DRAP must not use caller-saved
7803 register in such case. */
7804 if ((decl_function_context (decl)
7805 && !DECL_NO_STATIC_CHAIN (decl))
7806 || crtl->tail_call_emit)
7807 return DI_REG;
7808
7809 /* Reuse static chain register if it isn't used for parameter
7810 passing. */
7811 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
7812 && !lookup_attribute ("fastcall",
7813 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
7814 return CX_REG;
7815 else
7816 return DI_REG;
7817 }
7818 }
7819
7820 /* Update incoming stack boundary and estimated stack alignment. */
7821
7822 static void
7823 ix86_update_stack_boundary (void)
7824 {
7825 /* Prefer the one specified at command line. */
7826 ix86_incoming_stack_boundary
7827 = (ix86_user_incoming_stack_boundary
7828 ? ix86_user_incoming_stack_boundary
7829 : ix86_default_incoming_stack_boundary);
7830
7831 /* Incoming stack alignment can be changed on individual functions
7832 via force_align_arg_pointer attribute. We use the smallest
7833 incoming stack boundary. */
7834 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
7835 && lookup_attribute (ix86_force_align_arg_pointer_string,
7836 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7837 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
7838
7839 /* The incoming stack frame has to be aligned at least at
7840 parm_stack_boundary. */
7841 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
7842 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
7843
7844 /* Stack at entrance of main is aligned by runtime. We use the
7845 smallest incoming stack boundary. */
7846 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
7847 && DECL_NAME (current_function_decl)
7848 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7849 && DECL_FILE_SCOPE_P (current_function_decl))
7850 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7851
7852 /* x86_64 vararg needs 16byte stack alignment for register save
7853 area. */
7854 if (TARGET_64BIT
7855 && cfun->stdarg
7856 && crtl->stack_alignment_estimated < 128)
7857 crtl->stack_alignment_estimated = 128;
7858 }
7859
7860 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7861 needed or an rtx for DRAP otherwise. */
7862
7863 static rtx
7864 ix86_get_drap_rtx (void)
7865 {
7866 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
7867 crtl->need_drap = true;
7868
7869 if (stack_realign_drap)
7870 {
7871 /* Assign DRAP to vDRAP and returns vDRAP */
7872 unsigned int regno = find_drap_reg ();
7873 rtx drap_vreg;
7874 rtx arg_ptr;
7875 rtx seq, insn;
7876
7877 arg_ptr = gen_rtx_REG (Pmode, regno);
7878 crtl->drap_reg = arg_ptr;
7879
7880 start_sequence ();
7881 drap_vreg = copy_to_reg (arg_ptr);
7882 seq = get_insns ();
7883 end_sequence ();
7884
7885 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7886 RTX_FRAME_RELATED_P (insn) = 1;
7887 return drap_vreg;
7888 }
7889 else
7890 return NULL;
7891 }
7892
7893 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7894
7895 static rtx
7896 ix86_internal_arg_pointer (void)
7897 {
7898 return virtual_incoming_args_rtx;
7899 }
7900
7901 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
7902 This is called from dwarf2out.c to emit call frame instructions
7903 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
7904 static void
7905 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
7906 {
7907 rtx unspec = SET_SRC (pattern);
7908 gcc_assert (GET_CODE (unspec) == UNSPEC);
7909
7910 switch (index)
7911 {
7912 case UNSPEC_REG_SAVE:
7913 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
7914 SET_DEST (pattern));
7915 break;
7916 case UNSPEC_DEF_CFA:
7917 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
7918 INTVAL (XVECEXP (unspec, 0, 0)));
7919 break;
7920 default:
7921 gcc_unreachable ();
7922 }
7923 }
7924
7925 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
7926 to be generated in correct form. */
7927 static void
7928 ix86_finalize_stack_realign_flags (void)
7929 {
7930 /* Check if stack realign is really needed after reload, and
7931 stores result in cfun */
7932 unsigned int incoming_stack_boundary
7933 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7934 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7935 unsigned int stack_realign = (incoming_stack_boundary
7936 < (current_function_is_leaf
7937 ? crtl->max_used_stack_slot_alignment
7938 : crtl->stack_alignment_needed));
7939
7940 if (crtl->stack_realign_finalized)
7941 {
7942 /* After stack_realign_needed is finalized, we can't no longer
7943 change it. */
7944 gcc_assert (crtl->stack_realign_needed == stack_realign);
7945 }
7946 else
7947 {
7948 crtl->stack_realign_needed = stack_realign;
7949 crtl->stack_realign_finalized = true;
7950 }
7951 }
7952
7953 /* Expand the prologue into a bunch of separate insns. */
7954
7955 void
7956 ix86_expand_prologue (void)
7957 {
7958 rtx insn;
7959 bool pic_reg_used;
7960 struct ix86_frame frame;
7961 HOST_WIDE_INT allocate;
7962
7963 ix86_finalize_stack_realign_flags ();
7964
7965 /* DRAP should not coexist with stack_realign_fp */
7966 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
7967
7968 ix86_compute_frame_layout (&frame);
7969
7970 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
7971 of DRAP is needed and stack realignment is really needed after reload */
7972 if (crtl->drap_reg && crtl->stack_realign_needed)
7973 {
7974 rtx x, y;
7975 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7976 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
7977 ? 0 : UNITS_PER_WORD);
7978
7979 gcc_assert (stack_realign_drap);
7980
7981 /* Grab the argument pointer. */
7982 x = plus_constant (stack_pointer_rtx,
7983 (UNITS_PER_WORD + param_ptr_offset));
7984 y = crtl->drap_reg;
7985
7986 /* Only need to push parameter pointer reg if it is caller
7987 saved reg */
7988 if (!call_used_regs[REGNO (crtl->drap_reg)])
7989 {
7990 /* Push arg pointer reg */
7991 insn = emit_insn (gen_push (y));
7992 RTX_FRAME_RELATED_P (insn) = 1;
7993 }
7994
7995 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
7996 RTX_FRAME_RELATED_P (insn) = 1;
7997
7998 /* Align the stack. */
7999 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8000 stack_pointer_rtx,
8001 GEN_INT (-align_bytes)));
8002 RTX_FRAME_RELATED_P (insn) = 1;
8003
8004 /* Replicate the return address on the stack so that return
8005 address can be reached via (argp - 1) slot. This is needed
8006 to implement macro RETURN_ADDR_RTX and intrinsic function
8007 expand_builtin_return_addr etc. */
8008 x = crtl->drap_reg;
8009 x = gen_frame_mem (Pmode,
8010 plus_constant (x, -UNITS_PER_WORD));
8011 insn = emit_insn (gen_push (x));
8012 RTX_FRAME_RELATED_P (insn) = 1;
8013 }
8014
8015 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8016 slower on all targets. Also sdb doesn't like it. */
8017
8018 if (frame_pointer_needed)
8019 {
8020 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8021 RTX_FRAME_RELATED_P (insn) = 1;
8022
8023 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8024 RTX_FRAME_RELATED_P (insn) = 1;
8025 }
8026
8027 if (stack_realign_fp)
8028 {
8029 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8030 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8031
8032 /* Align the stack. */
8033 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8034 stack_pointer_rtx,
8035 GEN_INT (-align_bytes)));
8036 RTX_FRAME_RELATED_P (insn) = 1;
8037 }
8038
8039 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8040
8041 if (!frame.save_regs_using_mov)
8042 ix86_emit_save_regs ();
8043 else
8044 allocate += frame.nregs * UNITS_PER_WORD;
8045
8046 /* When using red zone we may start register saving before allocating
8047 the stack frame saving one cycle of the prologue. However I will
8048 avoid doing this if I am going to have to probe the stack since
8049 at least on x86_64 the stack probe can turn into a call that clobbers
8050 a red zone location */
8051 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8052 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8053 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8054 && !crtl->stack_realign_needed)
8055 ? hard_frame_pointer_rtx
8056 : stack_pointer_rtx,
8057 -frame.nregs * UNITS_PER_WORD);
8058
8059 if (allocate == 0)
8060 ;
8061 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8062 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8063 GEN_INT (-allocate), -1);
8064 else
8065 {
8066 /* Only valid for Win32. */
8067 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8068 bool eax_live;
8069 rtx t;
8070
8071 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8072
8073 if (cfun->machine->call_abi == MS_ABI)
8074 eax_live = false;
8075 else
8076 eax_live = ix86_eax_live_at_start_p ();
8077
8078 if (eax_live)
8079 {
8080 emit_insn (gen_push (eax));
8081 allocate -= UNITS_PER_WORD;
8082 }
8083
8084 emit_move_insn (eax, GEN_INT (allocate));
8085
8086 if (TARGET_64BIT)
8087 insn = gen_allocate_stack_worker_64 (eax, eax);
8088 else
8089 insn = gen_allocate_stack_worker_32 (eax, eax);
8090 insn = emit_insn (insn);
8091 RTX_FRAME_RELATED_P (insn) = 1;
8092 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8093 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8094 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
8095 t, REG_NOTES (insn));
8096
8097 if (eax_live)
8098 {
8099 if (frame_pointer_needed)
8100 t = plus_constant (hard_frame_pointer_rtx,
8101 allocate
8102 - frame.to_allocate
8103 - frame.nregs * UNITS_PER_WORD);
8104 else
8105 t = plus_constant (stack_pointer_rtx, allocate);
8106 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8107 }
8108 }
8109
8110 if (frame.save_regs_using_mov
8111 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8112 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8113 {
8114 if (!frame_pointer_needed
8115 || !frame.to_allocate
8116 || crtl->stack_realign_needed)
8117 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8118 frame.to_allocate
8119 + frame.nsseregs * 16 + frame.padding0);
8120 else
8121 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8122 -frame.nregs * UNITS_PER_WORD);
8123 }
8124 if (!frame_pointer_needed
8125 || !frame.to_allocate
8126 || crtl->stack_realign_needed)
8127 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8128 frame.to_allocate);
8129 else
8130 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8131 - frame.nregs * UNITS_PER_WORD
8132 - frame.nsseregs * 16
8133 - frame.padding0);
8134
8135 pic_reg_used = false;
8136 if (pic_offset_table_rtx
8137 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8138 || crtl->profile))
8139 {
8140 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8141
8142 if (alt_pic_reg_used != INVALID_REGNUM)
8143 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8144
8145 pic_reg_used = true;
8146 }
8147
8148 if (pic_reg_used)
8149 {
8150 if (TARGET_64BIT)
8151 {
8152 if (ix86_cmodel == CM_LARGE_PIC)
8153 {
8154 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8155 rtx label = gen_label_rtx ();
8156 emit_label (label);
8157 LABEL_PRESERVE_P (label) = 1;
8158 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8159 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8160 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8161 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8162 pic_offset_table_rtx, tmp_reg));
8163 }
8164 else
8165 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8166 }
8167 else
8168 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8169 }
8170
8171 /* Prevent function calls from being scheduled before the call to mcount.
8172 In the pic_reg_used case, make sure that the got load isn't deleted. */
8173 if (crtl->profile)
8174 {
8175 if (pic_reg_used)
8176 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8177 emit_insn (gen_blockage ());
8178 }
8179
8180 if (crtl->drap_reg && !crtl->stack_realign_needed)
8181 {
8182 /* vDRAP is setup but after reload it turns out stack realign
8183 isn't necessary, here we will emit prologue to setup DRAP
8184 without stack realign adjustment */
8185 int drap_bp_offset = UNITS_PER_WORD * 2;
8186 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8187 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8188 }
8189
8190 /* Emit cld instruction if stringops are used in the function. */
8191 if (TARGET_CLD && ix86_current_function_needs_cld)
8192 emit_insn (gen_cld ());
8193 }
8194
8195 /* Emit code to restore saved registers using MOV insns. First register
8196 is restored from POINTER + OFFSET. */
8197 static void
8198 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8199 int maybe_eh_return)
8200 {
8201 int regno;
8202 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8203
8204 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8205 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8206 {
8207 /* Ensure that adjust_address won't be forced to produce pointer
8208 out of range allowed by x86-64 instruction set. */
8209 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8210 {
8211 rtx r11;
8212
8213 r11 = gen_rtx_REG (DImode, R11_REG);
8214 emit_move_insn (r11, GEN_INT (offset));
8215 emit_insn (gen_adddi3 (r11, r11, pointer));
8216 base_address = gen_rtx_MEM (Pmode, r11);
8217 offset = 0;
8218 }
8219 emit_move_insn (gen_rtx_REG (Pmode, regno),
8220 adjust_address (base_address, Pmode, offset));
8221 offset += UNITS_PER_WORD;
8222 }
8223 }
8224
8225 /* Emit code to restore saved registers using MOV insns. First register
8226 is restored from POINTER + OFFSET. */
8227 static void
8228 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8229 int maybe_eh_return)
8230 {
8231 int regno;
8232 rtx base_address = gen_rtx_MEM (TImode, pointer);
8233 rtx mem;
8234
8235 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8236 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8237 {
8238 /* Ensure that adjust_address won't be forced to produce pointer
8239 out of range allowed by x86-64 instruction set. */
8240 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8241 {
8242 rtx r11;
8243
8244 r11 = gen_rtx_REG (DImode, R11_REG);
8245 emit_move_insn (r11, GEN_INT (offset));
8246 emit_insn (gen_adddi3 (r11, r11, pointer));
8247 base_address = gen_rtx_MEM (TImode, r11);
8248 offset = 0;
8249 }
8250 mem = adjust_address (base_address, TImode, offset);
8251 set_mem_align (mem, 128);
8252 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8253 offset += 16;
8254 }
8255 }
8256
8257 /* Restore function stack, frame, and registers. */
8258
8259 void
8260 ix86_expand_epilogue (int style)
8261 {
8262 int regno;
8263 int sp_valid;
8264 struct ix86_frame frame;
8265 HOST_WIDE_INT offset;
8266
8267 ix86_finalize_stack_realign_flags ();
8268
8269 /* When stack is realigned, SP must be valid. */
8270 sp_valid = (!frame_pointer_needed
8271 || current_function_sp_is_unchanging
8272 || stack_realign_fp);
8273
8274 ix86_compute_frame_layout (&frame);
8275
8276 /* Calculate start of saved registers relative to ebp. Special care
8277 must be taken for the normal return case of a function using
8278 eh_return: the eax and edx registers are marked as saved, but not
8279 restored along this path. */
8280 offset = frame.nregs;
8281 if (crtl->calls_eh_return && style != 2)
8282 offset -= 2;
8283 offset *= -UNITS_PER_WORD;
8284 offset -= frame.nsseregs * 16 + frame.padding0;
8285
8286 /* If we're only restoring one register and sp is not valid then
8287 using a move instruction to restore the register since it's
8288 less work than reloading sp and popping the register.
8289
8290 The default code result in stack adjustment using add/lea instruction,
8291 while this code results in LEAVE instruction (or discrete equivalent),
8292 so it is profitable in some other cases as well. Especially when there
8293 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8294 and there is exactly one register to pop. This heuristic may need some
8295 tuning in future. */
8296 if ((!sp_valid && frame.nregs <= 1)
8297 || (TARGET_EPILOGUE_USING_MOVE
8298 && cfun->machine->use_fast_prologue_epilogue
8299 && (frame.nregs > 1 || frame.to_allocate))
8300 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
8301 || (frame_pointer_needed && TARGET_USE_LEAVE
8302 && cfun->machine->use_fast_prologue_epilogue
8303 && frame.nregs == 1)
8304 || crtl->calls_eh_return)
8305 {
8306 /* Restore registers. We can use ebp or esp to address the memory
8307 locations. If both are available, default to ebp, since offsets
8308 are known to be small. Only exception is esp pointing directly
8309 to the end of block of saved registers, where we may simplify
8310 addressing mode.
8311
8312 If we are realigning stack with bp and sp, regs restore can't
8313 be addressed by bp. sp must be used instead. */
8314
8315 if (!frame_pointer_needed
8316 || (sp_valid && !frame.to_allocate)
8317 || stack_realign_fp)
8318 {
8319 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8320 frame.to_allocate, style == 2);
8321 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8322 frame.to_allocate
8323 + frame.nsseregs * 16
8324 + frame.padding0, style == 2);
8325 }
8326 else
8327 {
8328 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8329 offset, style == 2);
8330 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8331 offset
8332 + frame.nsseregs * 16
8333 + frame.padding0, style == 2);
8334 }
8335
8336 /* eh_return epilogues need %ecx added to the stack pointer. */
8337 if (style == 2)
8338 {
8339 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8340
8341 /* Stack align doesn't work with eh_return. */
8342 gcc_assert (!crtl->stack_realign_needed);
8343
8344 if (frame_pointer_needed)
8345 {
8346 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8347 tmp = plus_constant (tmp, UNITS_PER_WORD);
8348 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8349
8350 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8351 emit_move_insn (hard_frame_pointer_rtx, tmp);
8352
8353 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8354 const0_rtx, style);
8355 }
8356 else
8357 {
8358 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8359 tmp = plus_constant (tmp, (frame.to_allocate
8360 + frame.nregs * UNITS_PER_WORD
8361 + frame.nsseregs * 16
8362 + frame.padding0));
8363 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8364 }
8365 }
8366 else if (!frame_pointer_needed)
8367 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8368 GEN_INT (frame.to_allocate
8369 + frame.nregs * UNITS_PER_WORD
8370 + frame.nsseregs * 16
8371 + frame.padding0),
8372 style);
8373 /* If not an i386, mov & pop is faster than "leave". */
8374 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8375 || !cfun->machine->use_fast_prologue_epilogue)
8376 emit_insn ((*ix86_gen_leave) ());
8377 else
8378 {
8379 pro_epilogue_adjust_stack (stack_pointer_rtx,
8380 hard_frame_pointer_rtx,
8381 const0_rtx, style);
8382
8383 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8384 }
8385 }
8386 else
8387 {
8388 /* First step is to deallocate the stack frame so that we can
8389 pop the registers.
8390
8391 If we realign stack with frame pointer, then stack pointer
8392 won't be able to recover via lea $offset(%bp), %sp, because
8393 there is a padding area between bp and sp for realign.
8394 "add $to_allocate, %sp" must be used instead. */
8395 if (!sp_valid)
8396 {
8397 gcc_assert (frame_pointer_needed);
8398 gcc_assert (!stack_realign_fp);
8399 pro_epilogue_adjust_stack (stack_pointer_rtx,
8400 hard_frame_pointer_rtx,
8401 GEN_INT (offset), style);
8402 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8403 frame.to_allocate, style == 2);
8404 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8405 GEN_INT (frame.nsseregs * 16), style);
8406 }
8407 else if (frame.to_allocate || frame.nsseregs)
8408 {
8409 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8410 frame.to_allocate,
8411 style == 2);
8412 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8413 GEN_INT (frame.to_allocate
8414 + frame.nsseregs * 16
8415 + frame.padding0), style);
8416 }
8417
8418 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8419 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8420 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8421 if (frame_pointer_needed)
8422 {
8423 /* Leave results in shorter dependency chains on CPUs that are
8424 able to grok it fast. */
8425 if (TARGET_USE_LEAVE)
8426 emit_insn ((*ix86_gen_leave) ());
8427 else
8428 {
8429 /* For stack realigned really happens, recover stack
8430 pointer to hard frame pointer is a must, if not using
8431 leave. */
8432 if (stack_realign_fp)
8433 pro_epilogue_adjust_stack (stack_pointer_rtx,
8434 hard_frame_pointer_rtx,
8435 const0_rtx, style);
8436 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8437 }
8438 }
8439 }
8440
8441 if (crtl->drap_reg && crtl->stack_realign_needed)
8442 {
8443 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8444 ? 0 : UNITS_PER_WORD);
8445 gcc_assert (stack_realign_drap);
8446 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8447 crtl->drap_reg,
8448 GEN_INT (-(UNITS_PER_WORD
8449 + param_ptr_offset))));
8450 if (!call_used_regs[REGNO (crtl->drap_reg)])
8451 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8452
8453 }
8454
8455 /* Sibcall epilogues don't want a return instruction. */
8456 if (style == 0)
8457 return;
8458
8459 if (crtl->args.pops_args && crtl->args.size)
8460 {
8461 rtx popc = GEN_INT (crtl->args.pops_args);
8462
8463 /* i386 can only pop 64K bytes. If asked to pop more, pop
8464 return address, do explicit add, and jump indirectly to the
8465 caller. */
8466
8467 if (crtl->args.pops_args >= 65536)
8468 {
8469 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8470
8471 /* There is no "pascal" calling convention in any 64bit ABI. */
8472 gcc_assert (!TARGET_64BIT);
8473
8474 emit_insn (gen_popsi1 (ecx));
8475 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8476 emit_jump_insn (gen_return_indirect_internal (ecx));
8477 }
8478 else
8479 emit_jump_insn (gen_return_pop_internal (popc));
8480 }
8481 else
8482 emit_jump_insn (gen_return_internal ());
8483 }
8484
8485 /* Reset from the function's potential modifications. */
8486
8487 static void
8488 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8489 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8490 {
8491 if (pic_offset_table_rtx)
8492 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8493 #if TARGET_MACHO
8494 /* Mach-O doesn't support labels at the end of objects, so if
8495 it looks like we might want one, insert a NOP. */
8496 {
8497 rtx insn = get_last_insn ();
8498 while (insn
8499 && NOTE_P (insn)
8500 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8501 insn = PREV_INSN (insn);
8502 if (insn
8503 && (LABEL_P (insn)
8504 || (NOTE_P (insn)
8505 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8506 fputs ("\tnop\n", file);
8507 }
8508 #endif
8509
8510 }
8511 \f
8512 /* Extract the parts of an RTL expression that is a valid memory address
8513 for an instruction. Return 0 if the structure of the address is
8514 grossly off. Return -1 if the address contains ASHIFT, so it is not
8515 strictly valid, but still used for computing length of lea instruction. */
8516
8517 int
8518 ix86_decompose_address (rtx addr, struct ix86_address *out)
8519 {
8520 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8521 rtx base_reg, index_reg;
8522 HOST_WIDE_INT scale = 1;
8523 rtx scale_rtx = NULL_RTX;
8524 int retval = 1;
8525 enum ix86_address_seg seg = SEG_DEFAULT;
8526
8527 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8528 base = addr;
8529 else if (GET_CODE (addr) == PLUS)
8530 {
8531 rtx addends[4], op;
8532 int n = 0, i;
8533
8534 op = addr;
8535 do
8536 {
8537 if (n >= 4)
8538 return 0;
8539 addends[n++] = XEXP (op, 1);
8540 op = XEXP (op, 0);
8541 }
8542 while (GET_CODE (op) == PLUS);
8543 if (n >= 4)
8544 return 0;
8545 addends[n] = op;
8546
8547 for (i = n; i >= 0; --i)
8548 {
8549 op = addends[i];
8550 switch (GET_CODE (op))
8551 {
8552 case MULT:
8553 if (index)
8554 return 0;
8555 index = XEXP (op, 0);
8556 scale_rtx = XEXP (op, 1);
8557 break;
8558
8559 case UNSPEC:
8560 if (XINT (op, 1) == UNSPEC_TP
8561 && TARGET_TLS_DIRECT_SEG_REFS
8562 && seg == SEG_DEFAULT)
8563 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8564 else
8565 return 0;
8566 break;
8567
8568 case REG:
8569 case SUBREG:
8570 if (!base)
8571 base = op;
8572 else if (!index)
8573 index = op;
8574 else
8575 return 0;
8576 break;
8577
8578 case CONST:
8579 case CONST_INT:
8580 case SYMBOL_REF:
8581 case LABEL_REF:
8582 if (disp)
8583 return 0;
8584 disp = op;
8585 break;
8586
8587 default:
8588 return 0;
8589 }
8590 }
8591 }
8592 else if (GET_CODE (addr) == MULT)
8593 {
8594 index = XEXP (addr, 0); /* index*scale */
8595 scale_rtx = XEXP (addr, 1);
8596 }
8597 else if (GET_CODE (addr) == ASHIFT)
8598 {
8599 rtx tmp;
8600
8601 /* We're called for lea too, which implements ashift on occasion. */
8602 index = XEXP (addr, 0);
8603 tmp = XEXP (addr, 1);
8604 if (!CONST_INT_P (tmp))
8605 return 0;
8606 scale = INTVAL (tmp);
8607 if ((unsigned HOST_WIDE_INT) scale > 3)
8608 return 0;
8609 scale = 1 << scale;
8610 retval = -1;
8611 }
8612 else
8613 disp = addr; /* displacement */
8614
8615 /* Extract the integral value of scale. */
8616 if (scale_rtx)
8617 {
8618 if (!CONST_INT_P (scale_rtx))
8619 return 0;
8620 scale = INTVAL (scale_rtx);
8621 }
8622
8623 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8624 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8625
8626 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8627 if (base_reg && index_reg && scale == 1
8628 && (index_reg == arg_pointer_rtx
8629 || index_reg == frame_pointer_rtx
8630 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8631 {
8632 rtx tmp;
8633 tmp = base, base = index, index = tmp;
8634 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8635 }
8636
8637 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8638 if ((base_reg == hard_frame_pointer_rtx
8639 || base_reg == frame_pointer_rtx
8640 || base_reg == arg_pointer_rtx) && !disp)
8641 disp = const0_rtx;
8642
8643 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8644 Avoid this by transforming to [%esi+0].
8645 Reload calls address legitimization without cfun defined, so we need
8646 to test cfun for being non-NULL. */
8647 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8648 && base_reg && !index_reg && !disp
8649 && REG_P (base_reg)
8650 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8651 disp = const0_rtx;
8652
8653 /* Special case: encode reg+reg instead of reg*2. */
8654 if (!base && index && scale && scale == 2)
8655 base = index, base_reg = index_reg, scale = 1;
8656
8657 /* Special case: scaling cannot be encoded without base or displacement. */
8658 if (!base && !disp && index && scale != 1)
8659 disp = const0_rtx;
8660
8661 out->base = base;
8662 out->index = index;
8663 out->disp = disp;
8664 out->scale = scale;
8665 out->seg = seg;
8666
8667 return retval;
8668 }
8669 \f
8670 /* Return cost of the memory address x.
8671 For i386, it is better to use a complex address than let gcc copy
8672 the address into a reg and make a new pseudo. But not if the address
8673 requires to two regs - that would mean more pseudos with longer
8674 lifetimes. */
8675 static int
8676 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8677 {
8678 struct ix86_address parts;
8679 int cost = 1;
8680 int ok = ix86_decompose_address (x, &parts);
8681
8682 gcc_assert (ok);
8683
8684 if (parts.base && GET_CODE (parts.base) == SUBREG)
8685 parts.base = SUBREG_REG (parts.base);
8686 if (parts.index && GET_CODE (parts.index) == SUBREG)
8687 parts.index = SUBREG_REG (parts.index);
8688
8689 /* Attempt to minimize number of registers in the address. */
8690 if ((parts.base
8691 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8692 || (parts.index
8693 && (!REG_P (parts.index)
8694 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8695 cost++;
8696
8697 if (parts.base
8698 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8699 && parts.index
8700 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8701 && parts.base != parts.index)
8702 cost++;
8703
8704 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8705 since it's predecode logic can't detect the length of instructions
8706 and it degenerates to vector decoded. Increase cost of such
8707 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8708 to split such addresses or even refuse such addresses at all.
8709
8710 Following addressing modes are affected:
8711 [base+scale*index]
8712 [scale*index+disp]
8713 [base+index]
8714
8715 The first and last case may be avoidable by explicitly coding the zero in
8716 memory address, but I don't have AMD-K6 machine handy to check this
8717 theory. */
8718
8719 if (TARGET_K6
8720 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8721 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8722 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8723 cost += 10;
8724
8725 return cost;
8726 }
8727 \f
8728 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8729 this is used for to form addresses to local data when -fPIC is in
8730 use. */
8731
8732 static bool
8733 darwin_local_data_pic (rtx disp)
8734 {
8735 return (GET_CODE (disp) == UNSPEC
8736 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
8737 }
8738
8739 /* Determine if a given RTX is a valid constant. We already know this
8740 satisfies CONSTANT_P. */
8741
8742 bool
8743 legitimate_constant_p (rtx x)
8744 {
8745 switch (GET_CODE (x))
8746 {
8747 case CONST:
8748 x = XEXP (x, 0);
8749
8750 if (GET_CODE (x) == PLUS)
8751 {
8752 if (!CONST_INT_P (XEXP (x, 1)))
8753 return false;
8754 x = XEXP (x, 0);
8755 }
8756
8757 if (TARGET_MACHO && darwin_local_data_pic (x))
8758 return true;
8759
8760 /* Only some unspecs are valid as "constants". */
8761 if (GET_CODE (x) == UNSPEC)
8762 switch (XINT (x, 1))
8763 {
8764 case UNSPEC_GOT:
8765 case UNSPEC_GOTOFF:
8766 case UNSPEC_PLTOFF:
8767 return TARGET_64BIT;
8768 case UNSPEC_TPOFF:
8769 case UNSPEC_NTPOFF:
8770 x = XVECEXP (x, 0, 0);
8771 return (GET_CODE (x) == SYMBOL_REF
8772 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8773 case UNSPEC_DTPOFF:
8774 x = XVECEXP (x, 0, 0);
8775 return (GET_CODE (x) == SYMBOL_REF
8776 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
8777 default:
8778 return false;
8779 }
8780
8781 /* We must have drilled down to a symbol. */
8782 if (GET_CODE (x) == LABEL_REF)
8783 return true;
8784 if (GET_CODE (x) != SYMBOL_REF)
8785 return false;
8786 /* FALLTHRU */
8787
8788 case SYMBOL_REF:
8789 /* TLS symbols are never valid. */
8790 if (SYMBOL_REF_TLS_MODEL (x))
8791 return false;
8792
8793 /* DLLIMPORT symbols are never valid. */
8794 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
8795 && SYMBOL_REF_DLLIMPORT_P (x))
8796 return false;
8797 break;
8798
8799 case CONST_DOUBLE:
8800 if (GET_MODE (x) == TImode
8801 && x != CONST0_RTX (TImode)
8802 && !TARGET_64BIT)
8803 return false;
8804 break;
8805
8806 case CONST_VECTOR:
8807 if (x == CONST0_RTX (GET_MODE (x)))
8808 return true;
8809 return false;
8810
8811 default:
8812 break;
8813 }
8814
8815 /* Otherwise we handle everything else in the move patterns. */
8816 return true;
8817 }
8818
8819 /* Determine if it's legal to put X into the constant pool. This
8820 is not possible for the address of thread-local symbols, which
8821 is checked above. */
8822
8823 static bool
8824 ix86_cannot_force_const_mem (rtx x)
8825 {
8826 /* We can always put integral constants and vectors in memory. */
8827 switch (GET_CODE (x))
8828 {
8829 case CONST_INT:
8830 case CONST_DOUBLE:
8831 case CONST_VECTOR:
8832 return false;
8833
8834 default:
8835 break;
8836 }
8837 return !legitimate_constant_p (x);
8838 }
8839
8840 /* Determine if a given RTX is a valid constant address. */
8841
8842 bool
8843 constant_address_p (rtx x)
8844 {
8845 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
8846 }
8847
8848 /* Nonzero if the constant value X is a legitimate general operand
8849 when generating PIC code. It is given that flag_pic is on and
8850 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
8851
8852 bool
8853 legitimate_pic_operand_p (rtx x)
8854 {
8855 rtx inner;
8856
8857 switch (GET_CODE (x))
8858 {
8859 case CONST:
8860 inner = XEXP (x, 0);
8861 if (GET_CODE (inner) == PLUS
8862 && CONST_INT_P (XEXP (inner, 1)))
8863 inner = XEXP (inner, 0);
8864
8865 /* Only some unspecs are valid as "constants". */
8866 if (GET_CODE (inner) == UNSPEC)
8867 switch (XINT (inner, 1))
8868 {
8869 case UNSPEC_GOT:
8870 case UNSPEC_GOTOFF:
8871 case UNSPEC_PLTOFF:
8872 return TARGET_64BIT;
8873 case UNSPEC_TPOFF:
8874 x = XVECEXP (inner, 0, 0);
8875 return (GET_CODE (x) == SYMBOL_REF
8876 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8877 case UNSPEC_MACHOPIC_OFFSET:
8878 return legitimate_pic_address_disp_p (x);
8879 default:
8880 return false;
8881 }
8882 /* FALLTHRU */
8883
8884 case SYMBOL_REF:
8885 case LABEL_REF:
8886 return legitimate_pic_address_disp_p (x);
8887
8888 default:
8889 return true;
8890 }
8891 }
8892
8893 /* Determine if a given CONST RTX is a valid memory displacement
8894 in PIC mode. */
8895
8896 int
8897 legitimate_pic_address_disp_p (rtx disp)
8898 {
8899 bool saw_plus;
8900
8901 /* In 64bit mode we can allow direct addresses of symbols and labels
8902 when they are not dynamic symbols. */
8903 if (TARGET_64BIT)
8904 {
8905 rtx op0 = disp, op1;
8906
8907 switch (GET_CODE (disp))
8908 {
8909 case LABEL_REF:
8910 return true;
8911
8912 case CONST:
8913 if (GET_CODE (XEXP (disp, 0)) != PLUS)
8914 break;
8915 op0 = XEXP (XEXP (disp, 0), 0);
8916 op1 = XEXP (XEXP (disp, 0), 1);
8917 if (!CONST_INT_P (op1)
8918 || INTVAL (op1) >= 16*1024*1024
8919 || INTVAL (op1) < -16*1024*1024)
8920 break;
8921 if (GET_CODE (op0) == LABEL_REF)
8922 return true;
8923 if (GET_CODE (op0) != SYMBOL_REF)
8924 break;
8925 /* FALLTHRU */
8926
8927 case SYMBOL_REF:
8928 /* TLS references should always be enclosed in UNSPEC. */
8929 if (SYMBOL_REF_TLS_MODEL (op0))
8930 return false;
8931 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
8932 && ix86_cmodel != CM_LARGE_PIC)
8933 return true;
8934 break;
8935
8936 default:
8937 break;
8938 }
8939 }
8940 if (GET_CODE (disp) != CONST)
8941 return 0;
8942 disp = XEXP (disp, 0);
8943
8944 if (TARGET_64BIT)
8945 {
8946 /* We are unsafe to allow PLUS expressions. This limit allowed distance
8947 of GOT tables. We should not need these anyway. */
8948 if (GET_CODE (disp) != UNSPEC
8949 || (XINT (disp, 1) != UNSPEC_GOTPCREL
8950 && XINT (disp, 1) != UNSPEC_GOTOFF
8951 && XINT (disp, 1) != UNSPEC_PLTOFF))
8952 return 0;
8953
8954 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
8955 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
8956 return 0;
8957 return 1;
8958 }
8959
8960 saw_plus = false;
8961 if (GET_CODE (disp) == PLUS)
8962 {
8963 if (!CONST_INT_P (XEXP (disp, 1)))
8964 return 0;
8965 disp = XEXP (disp, 0);
8966 saw_plus = true;
8967 }
8968
8969 if (TARGET_MACHO && darwin_local_data_pic (disp))
8970 return 1;
8971
8972 if (GET_CODE (disp) != UNSPEC)
8973 return 0;
8974
8975 switch (XINT (disp, 1))
8976 {
8977 case UNSPEC_GOT:
8978 if (saw_plus)
8979 return false;
8980 /* We need to check for both symbols and labels because VxWorks loads
8981 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
8982 details. */
8983 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8984 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
8985 case UNSPEC_GOTOFF:
8986 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
8987 While ABI specify also 32bit relocation but we don't produce it in
8988 small PIC model at all. */
8989 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8990 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
8991 && !TARGET_64BIT)
8992 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
8993 return false;
8994 case UNSPEC_GOTTPOFF:
8995 case UNSPEC_GOTNTPOFF:
8996 case UNSPEC_INDNTPOFF:
8997 if (saw_plus)
8998 return false;
8999 disp = XVECEXP (disp, 0, 0);
9000 return (GET_CODE (disp) == SYMBOL_REF
9001 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9002 case UNSPEC_NTPOFF:
9003 disp = XVECEXP (disp, 0, 0);
9004 return (GET_CODE (disp) == SYMBOL_REF
9005 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9006 case UNSPEC_DTPOFF:
9007 disp = XVECEXP (disp, 0, 0);
9008 return (GET_CODE (disp) == SYMBOL_REF
9009 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9010 }
9011
9012 return 0;
9013 }
9014
9015 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
9016 memory address for an instruction. The MODE argument is the machine mode
9017 for the MEM expression that wants to use this address.
9018
9019 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9020 convert common non-canonical forms to canonical form so that they will
9021 be recognized. */
9022
9023 int
9024 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9025 rtx addr, int strict)
9026 {
9027 struct ix86_address parts;
9028 rtx base, index, disp;
9029 HOST_WIDE_INT scale;
9030 const char *reason = NULL;
9031 rtx reason_rtx = NULL_RTX;
9032
9033 if (ix86_decompose_address (addr, &parts) <= 0)
9034 {
9035 reason = "decomposition failed";
9036 goto report_error;
9037 }
9038
9039 base = parts.base;
9040 index = parts.index;
9041 disp = parts.disp;
9042 scale = parts.scale;
9043
9044 /* Validate base register.
9045
9046 Don't allow SUBREG's that span more than a word here. It can lead to spill
9047 failures when the base is one word out of a two word structure, which is
9048 represented internally as a DImode int. */
9049
9050 if (base)
9051 {
9052 rtx reg;
9053 reason_rtx = base;
9054
9055 if (REG_P (base))
9056 reg = base;
9057 else if (GET_CODE (base) == SUBREG
9058 && REG_P (SUBREG_REG (base))
9059 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9060 <= UNITS_PER_WORD)
9061 reg = SUBREG_REG (base);
9062 else
9063 {
9064 reason = "base is not a register";
9065 goto report_error;
9066 }
9067
9068 if (GET_MODE (base) != Pmode)
9069 {
9070 reason = "base is not in Pmode";
9071 goto report_error;
9072 }
9073
9074 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9075 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9076 {
9077 reason = "base is not valid";
9078 goto report_error;
9079 }
9080 }
9081
9082 /* Validate index register.
9083
9084 Don't allow SUBREG's that span more than a word here -- same as above. */
9085
9086 if (index)
9087 {
9088 rtx reg;
9089 reason_rtx = index;
9090
9091 if (REG_P (index))
9092 reg = index;
9093 else if (GET_CODE (index) == SUBREG
9094 && REG_P (SUBREG_REG (index))
9095 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9096 <= UNITS_PER_WORD)
9097 reg = SUBREG_REG (index);
9098 else
9099 {
9100 reason = "index is not a register";
9101 goto report_error;
9102 }
9103
9104 if (GET_MODE (index) != Pmode)
9105 {
9106 reason = "index is not in Pmode";
9107 goto report_error;
9108 }
9109
9110 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9111 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9112 {
9113 reason = "index is not valid";
9114 goto report_error;
9115 }
9116 }
9117
9118 /* Validate scale factor. */
9119 if (scale != 1)
9120 {
9121 reason_rtx = GEN_INT (scale);
9122 if (!index)
9123 {
9124 reason = "scale without index";
9125 goto report_error;
9126 }
9127
9128 if (scale != 2 && scale != 4 && scale != 8)
9129 {
9130 reason = "scale is not a valid multiplier";
9131 goto report_error;
9132 }
9133 }
9134
9135 /* Validate displacement. */
9136 if (disp)
9137 {
9138 reason_rtx = disp;
9139
9140 if (GET_CODE (disp) == CONST
9141 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9142 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9143 switch (XINT (XEXP (disp, 0), 1))
9144 {
9145 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9146 used. While ABI specify also 32bit relocations, we don't produce
9147 them at all and use IP relative instead. */
9148 case UNSPEC_GOT:
9149 case UNSPEC_GOTOFF:
9150 gcc_assert (flag_pic);
9151 if (!TARGET_64BIT)
9152 goto is_legitimate_pic;
9153 reason = "64bit address unspec";
9154 goto report_error;
9155
9156 case UNSPEC_GOTPCREL:
9157 gcc_assert (flag_pic);
9158 goto is_legitimate_pic;
9159
9160 case UNSPEC_GOTTPOFF:
9161 case UNSPEC_GOTNTPOFF:
9162 case UNSPEC_INDNTPOFF:
9163 case UNSPEC_NTPOFF:
9164 case UNSPEC_DTPOFF:
9165 break;
9166
9167 default:
9168 reason = "invalid address unspec";
9169 goto report_error;
9170 }
9171
9172 else if (SYMBOLIC_CONST (disp)
9173 && (flag_pic
9174 || (TARGET_MACHO
9175 #if TARGET_MACHO
9176 && MACHOPIC_INDIRECT
9177 && !machopic_operand_p (disp)
9178 #endif
9179 )))
9180 {
9181
9182 is_legitimate_pic:
9183 if (TARGET_64BIT && (index || base))
9184 {
9185 /* foo@dtpoff(%rX) is ok. */
9186 if (GET_CODE (disp) != CONST
9187 || GET_CODE (XEXP (disp, 0)) != PLUS
9188 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9189 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9190 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9191 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9192 {
9193 reason = "non-constant pic memory reference";
9194 goto report_error;
9195 }
9196 }
9197 else if (! legitimate_pic_address_disp_p (disp))
9198 {
9199 reason = "displacement is an invalid pic construct";
9200 goto report_error;
9201 }
9202
9203 /* This code used to verify that a symbolic pic displacement
9204 includes the pic_offset_table_rtx register.
9205
9206 While this is good idea, unfortunately these constructs may
9207 be created by "adds using lea" optimization for incorrect
9208 code like:
9209
9210 int a;
9211 int foo(int i)
9212 {
9213 return *(&a+i);
9214 }
9215
9216 This code is nonsensical, but results in addressing
9217 GOT table with pic_offset_table_rtx base. We can't
9218 just refuse it easily, since it gets matched by
9219 "addsi3" pattern, that later gets split to lea in the
9220 case output register differs from input. While this
9221 can be handled by separate addsi pattern for this case
9222 that never results in lea, this seems to be easier and
9223 correct fix for crash to disable this test. */
9224 }
9225 else if (GET_CODE (disp) != LABEL_REF
9226 && !CONST_INT_P (disp)
9227 && (GET_CODE (disp) != CONST
9228 || !legitimate_constant_p (disp))
9229 && (GET_CODE (disp) != SYMBOL_REF
9230 || !legitimate_constant_p (disp)))
9231 {
9232 reason = "displacement is not constant";
9233 goto report_error;
9234 }
9235 else if (TARGET_64BIT
9236 && !x86_64_immediate_operand (disp, VOIDmode))
9237 {
9238 reason = "displacement is out of range";
9239 goto report_error;
9240 }
9241 }
9242
9243 /* Everything looks valid. */
9244 return TRUE;
9245
9246 report_error:
9247 return FALSE;
9248 }
9249 \f
9250 /* Return a unique alias set for the GOT. */
9251
9252 static alias_set_type
9253 ix86_GOT_alias_set (void)
9254 {
9255 static alias_set_type set = -1;
9256 if (set == -1)
9257 set = new_alias_set ();
9258 return set;
9259 }
9260
9261 /* Return a legitimate reference for ORIG (an address) using the
9262 register REG. If REG is 0, a new pseudo is generated.
9263
9264 There are two types of references that must be handled:
9265
9266 1. Global data references must load the address from the GOT, via
9267 the PIC reg. An insn is emitted to do this load, and the reg is
9268 returned.
9269
9270 2. Static data references, constant pool addresses, and code labels
9271 compute the address as an offset from the GOT, whose base is in
9272 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9273 differentiate them from global data objects. The returned
9274 address is the PIC reg + an unspec constant.
9275
9276 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9277 reg also appears in the address. */
9278
9279 static rtx
9280 legitimize_pic_address (rtx orig, rtx reg)
9281 {
9282 rtx addr = orig;
9283 rtx new_rtx = orig;
9284 rtx base;
9285
9286 #if TARGET_MACHO
9287 if (TARGET_MACHO && !TARGET_64BIT)
9288 {
9289 if (reg == 0)
9290 reg = gen_reg_rtx (Pmode);
9291 /* Use the generic Mach-O PIC machinery. */
9292 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9293 }
9294 #endif
9295
9296 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9297 new_rtx = addr;
9298 else if (TARGET_64BIT
9299 && ix86_cmodel != CM_SMALL_PIC
9300 && gotoff_operand (addr, Pmode))
9301 {
9302 rtx tmpreg;
9303 /* This symbol may be referenced via a displacement from the PIC
9304 base address (@GOTOFF). */
9305
9306 if (reload_in_progress)
9307 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9308 if (GET_CODE (addr) == CONST)
9309 addr = XEXP (addr, 0);
9310 if (GET_CODE (addr) == PLUS)
9311 {
9312 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9313 UNSPEC_GOTOFF);
9314 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9315 }
9316 else
9317 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9318 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9319 if (!reg)
9320 tmpreg = gen_reg_rtx (Pmode);
9321 else
9322 tmpreg = reg;
9323 emit_move_insn (tmpreg, new_rtx);
9324
9325 if (reg != 0)
9326 {
9327 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9328 tmpreg, 1, OPTAB_DIRECT);
9329 new_rtx = reg;
9330 }
9331 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9332 }
9333 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9334 {
9335 /* This symbol may be referenced via a displacement from the PIC
9336 base address (@GOTOFF). */
9337
9338 if (reload_in_progress)
9339 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9340 if (GET_CODE (addr) == CONST)
9341 addr = XEXP (addr, 0);
9342 if (GET_CODE (addr) == PLUS)
9343 {
9344 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9345 UNSPEC_GOTOFF);
9346 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9347 }
9348 else
9349 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9350 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9351 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9352
9353 if (reg != 0)
9354 {
9355 emit_move_insn (reg, new_rtx);
9356 new_rtx = reg;
9357 }
9358 }
9359 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9360 /* We can't use @GOTOFF for text labels on VxWorks;
9361 see gotoff_operand. */
9362 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9363 {
9364 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9365 {
9366 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9367 return legitimize_dllimport_symbol (addr, true);
9368 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9369 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9370 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9371 {
9372 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9373 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9374 }
9375 }
9376
9377 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9378 {
9379 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9380 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9381 new_rtx = gen_const_mem (Pmode, new_rtx);
9382 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9383
9384 if (reg == 0)
9385 reg = gen_reg_rtx (Pmode);
9386 /* Use directly gen_movsi, otherwise the address is loaded
9387 into register for CSE. We don't want to CSE this addresses,
9388 instead we CSE addresses from the GOT table, so skip this. */
9389 emit_insn (gen_movsi (reg, new_rtx));
9390 new_rtx = reg;
9391 }
9392 else
9393 {
9394 /* This symbol must be referenced via a load from the
9395 Global Offset Table (@GOT). */
9396
9397 if (reload_in_progress)
9398 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9399 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9400 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9401 if (TARGET_64BIT)
9402 new_rtx = force_reg (Pmode, new_rtx);
9403 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9404 new_rtx = gen_const_mem (Pmode, new_rtx);
9405 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9406
9407 if (reg == 0)
9408 reg = gen_reg_rtx (Pmode);
9409 emit_move_insn (reg, new_rtx);
9410 new_rtx = reg;
9411 }
9412 }
9413 else
9414 {
9415 if (CONST_INT_P (addr)
9416 && !x86_64_immediate_operand (addr, VOIDmode))
9417 {
9418 if (reg)
9419 {
9420 emit_move_insn (reg, addr);
9421 new_rtx = reg;
9422 }
9423 else
9424 new_rtx = force_reg (Pmode, addr);
9425 }
9426 else if (GET_CODE (addr) == CONST)
9427 {
9428 addr = XEXP (addr, 0);
9429
9430 /* We must match stuff we generate before. Assume the only
9431 unspecs that can get here are ours. Not that we could do
9432 anything with them anyway.... */
9433 if (GET_CODE (addr) == UNSPEC
9434 || (GET_CODE (addr) == PLUS
9435 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9436 return orig;
9437 gcc_assert (GET_CODE (addr) == PLUS);
9438 }
9439 if (GET_CODE (addr) == PLUS)
9440 {
9441 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9442
9443 /* Check first to see if this is a constant offset from a @GOTOFF
9444 symbol reference. */
9445 if (gotoff_operand (op0, Pmode)
9446 && CONST_INT_P (op1))
9447 {
9448 if (!TARGET_64BIT)
9449 {
9450 if (reload_in_progress)
9451 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9452 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9453 UNSPEC_GOTOFF);
9454 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9455 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9456 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9457
9458 if (reg != 0)
9459 {
9460 emit_move_insn (reg, new_rtx);
9461 new_rtx = reg;
9462 }
9463 }
9464 else
9465 {
9466 if (INTVAL (op1) < -16*1024*1024
9467 || INTVAL (op1) >= 16*1024*1024)
9468 {
9469 if (!x86_64_immediate_operand (op1, Pmode))
9470 op1 = force_reg (Pmode, op1);
9471 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9472 }
9473 }
9474 }
9475 else
9476 {
9477 base = legitimize_pic_address (XEXP (addr, 0), reg);
9478 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9479 base == reg ? NULL_RTX : reg);
9480
9481 if (CONST_INT_P (new_rtx))
9482 new_rtx = plus_constant (base, INTVAL (new_rtx));
9483 else
9484 {
9485 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9486 {
9487 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9488 new_rtx = XEXP (new_rtx, 1);
9489 }
9490 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9491 }
9492 }
9493 }
9494 }
9495 return new_rtx;
9496 }
9497 \f
9498 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9499
9500 static rtx
9501 get_thread_pointer (int to_reg)
9502 {
9503 rtx tp, reg, insn;
9504
9505 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9506 if (!to_reg)
9507 return tp;
9508
9509 reg = gen_reg_rtx (Pmode);
9510 insn = gen_rtx_SET (VOIDmode, reg, tp);
9511 insn = emit_insn (insn);
9512
9513 return reg;
9514 }
9515
9516 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9517 false if we expect this to be used for a memory address and true if
9518 we expect to load the address into a register. */
9519
9520 static rtx
9521 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9522 {
9523 rtx dest, base, off, pic, tp;
9524 int type;
9525
9526 switch (model)
9527 {
9528 case TLS_MODEL_GLOBAL_DYNAMIC:
9529 dest = gen_reg_rtx (Pmode);
9530 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9531
9532 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9533 {
9534 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9535
9536 start_sequence ();
9537 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9538 insns = get_insns ();
9539 end_sequence ();
9540
9541 RTL_CONST_CALL_P (insns) = 1;
9542 emit_libcall_block (insns, dest, rax, x);
9543 }
9544 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9545 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9546 else
9547 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9548
9549 if (TARGET_GNU2_TLS)
9550 {
9551 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9552
9553 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9554 }
9555 break;
9556
9557 case TLS_MODEL_LOCAL_DYNAMIC:
9558 base = gen_reg_rtx (Pmode);
9559 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9560
9561 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9562 {
9563 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9564
9565 start_sequence ();
9566 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9567 insns = get_insns ();
9568 end_sequence ();
9569
9570 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9571 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9572 RTL_CONST_CALL_P (insns) = 1;
9573 emit_libcall_block (insns, base, rax, note);
9574 }
9575 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9576 emit_insn (gen_tls_local_dynamic_base_64 (base));
9577 else
9578 emit_insn (gen_tls_local_dynamic_base_32 (base));
9579
9580 if (TARGET_GNU2_TLS)
9581 {
9582 rtx x = ix86_tls_module_base ();
9583
9584 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9585 gen_rtx_MINUS (Pmode, x, tp));
9586 }
9587
9588 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9589 off = gen_rtx_CONST (Pmode, off);
9590
9591 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9592
9593 if (TARGET_GNU2_TLS)
9594 {
9595 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9596
9597 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9598 }
9599
9600 break;
9601
9602 case TLS_MODEL_INITIAL_EXEC:
9603 if (TARGET_64BIT)
9604 {
9605 pic = NULL;
9606 type = UNSPEC_GOTNTPOFF;
9607 }
9608 else if (flag_pic)
9609 {
9610 if (reload_in_progress)
9611 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9612 pic = pic_offset_table_rtx;
9613 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9614 }
9615 else if (!TARGET_ANY_GNU_TLS)
9616 {
9617 pic = gen_reg_rtx (Pmode);
9618 emit_insn (gen_set_got (pic));
9619 type = UNSPEC_GOTTPOFF;
9620 }
9621 else
9622 {
9623 pic = NULL;
9624 type = UNSPEC_INDNTPOFF;
9625 }
9626
9627 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9628 off = gen_rtx_CONST (Pmode, off);
9629 if (pic)
9630 off = gen_rtx_PLUS (Pmode, pic, off);
9631 off = gen_const_mem (Pmode, off);
9632 set_mem_alias_set (off, ix86_GOT_alias_set ());
9633
9634 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9635 {
9636 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9637 off = force_reg (Pmode, off);
9638 return gen_rtx_PLUS (Pmode, base, off);
9639 }
9640 else
9641 {
9642 base = get_thread_pointer (true);
9643 dest = gen_reg_rtx (Pmode);
9644 emit_insn (gen_subsi3 (dest, base, off));
9645 }
9646 break;
9647
9648 case TLS_MODEL_LOCAL_EXEC:
9649 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9650 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9651 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9652 off = gen_rtx_CONST (Pmode, off);
9653
9654 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9655 {
9656 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9657 return gen_rtx_PLUS (Pmode, base, off);
9658 }
9659 else
9660 {
9661 base = get_thread_pointer (true);
9662 dest = gen_reg_rtx (Pmode);
9663 emit_insn (gen_subsi3 (dest, base, off));
9664 }
9665 break;
9666
9667 default:
9668 gcc_unreachable ();
9669 }
9670
9671 return dest;
9672 }
9673
9674 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9675 to symbol DECL. */
9676
9677 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9678 htab_t dllimport_map;
9679
9680 static tree
9681 get_dllimport_decl (tree decl)
9682 {
9683 struct tree_map *h, in;
9684 void **loc;
9685 const char *name;
9686 const char *prefix;
9687 size_t namelen, prefixlen;
9688 char *imp_name;
9689 tree to;
9690 rtx rtl;
9691
9692 if (!dllimport_map)
9693 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9694
9695 in.hash = htab_hash_pointer (decl);
9696 in.base.from = decl;
9697 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9698 h = (struct tree_map *) *loc;
9699 if (h)
9700 return h->to;
9701
9702 *loc = h = GGC_NEW (struct tree_map);
9703 h->hash = in.hash;
9704 h->base.from = decl;
9705 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9706 DECL_ARTIFICIAL (to) = 1;
9707 DECL_IGNORED_P (to) = 1;
9708 DECL_EXTERNAL (to) = 1;
9709 TREE_READONLY (to) = 1;
9710
9711 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9712 name = targetm.strip_name_encoding (name);
9713 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9714 ? "*__imp_" : "*__imp__";
9715 namelen = strlen (name);
9716 prefixlen = strlen (prefix);
9717 imp_name = (char *) alloca (namelen + prefixlen + 1);
9718 memcpy (imp_name, prefix, prefixlen);
9719 memcpy (imp_name + prefixlen, name, namelen + 1);
9720
9721 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9722 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9723 SET_SYMBOL_REF_DECL (rtl, to);
9724 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9725
9726 rtl = gen_const_mem (Pmode, rtl);
9727 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9728
9729 SET_DECL_RTL (to, rtl);
9730 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9731
9732 return to;
9733 }
9734
9735 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9736 true if we require the result be a register. */
9737
9738 static rtx
9739 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9740 {
9741 tree imp_decl;
9742 rtx x;
9743
9744 gcc_assert (SYMBOL_REF_DECL (symbol));
9745 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
9746
9747 x = DECL_RTL (imp_decl);
9748 if (want_reg)
9749 x = force_reg (Pmode, x);
9750 return x;
9751 }
9752
9753 /* Try machine-dependent ways of modifying an illegitimate address
9754 to be legitimate. If we find one, return the new, valid address.
9755 This macro is used in only one place: `memory_address' in explow.c.
9756
9757 OLDX is the address as it was before break_out_memory_refs was called.
9758 In some cases it is useful to look at this to decide what needs to be done.
9759
9760 MODE and WIN are passed so that this macro can use
9761 GO_IF_LEGITIMATE_ADDRESS.
9762
9763 It is always safe for this macro to do nothing. It exists to recognize
9764 opportunities to optimize the output.
9765
9766 For the 80386, we handle X+REG by loading X into a register R and
9767 using R+REG. R will go in a general reg and indexing will be used.
9768 However, if REG is a broken-out memory address or multiplication,
9769 nothing needs to be done because REG can certainly go in a general reg.
9770
9771 When -fpic is used, special handling is needed for symbolic references.
9772 See comments by legitimize_pic_address in i386.c for details. */
9773
9774 rtx
9775 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
9776 {
9777 int changed = 0;
9778 unsigned log;
9779
9780 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
9781 if (log)
9782 return legitimize_tls_address (x, (enum tls_model) log, false);
9783 if (GET_CODE (x) == CONST
9784 && GET_CODE (XEXP (x, 0)) == PLUS
9785 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9786 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
9787 {
9788 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
9789 (enum tls_model) log, false);
9790 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9791 }
9792
9793 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9794 {
9795 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
9796 return legitimize_dllimport_symbol (x, true);
9797 if (GET_CODE (x) == CONST
9798 && GET_CODE (XEXP (x, 0)) == PLUS
9799 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9800 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
9801 {
9802 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
9803 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9804 }
9805 }
9806
9807 if (flag_pic && SYMBOLIC_CONST (x))
9808 return legitimize_pic_address (x, 0);
9809
9810 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
9811 if (GET_CODE (x) == ASHIFT
9812 && CONST_INT_P (XEXP (x, 1))
9813 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
9814 {
9815 changed = 1;
9816 log = INTVAL (XEXP (x, 1));
9817 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
9818 GEN_INT (1 << log));
9819 }
9820
9821 if (GET_CODE (x) == PLUS)
9822 {
9823 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
9824
9825 if (GET_CODE (XEXP (x, 0)) == ASHIFT
9826 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9827 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
9828 {
9829 changed = 1;
9830 log = INTVAL (XEXP (XEXP (x, 0), 1));
9831 XEXP (x, 0) = gen_rtx_MULT (Pmode,
9832 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
9833 GEN_INT (1 << log));
9834 }
9835
9836 if (GET_CODE (XEXP (x, 1)) == ASHIFT
9837 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9838 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
9839 {
9840 changed = 1;
9841 log = INTVAL (XEXP (XEXP (x, 1), 1));
9842 XEXP (x, 1) = gen_rtx_MULT (Pmode,
9843 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
9844 GEN_INT (1 << log));
9845 }
9846
9847 /* Put multiply first if it isn't already. */
9848 if (GET_CODE (XEXP (x, 1)) == MULT)
9849 {
9850 rtx tmp = XEXP (x, 0);
9851 XEXP (x, 0) = XEXP (x, 1);
9852 XEXP (x, 1) = tmp;
9853 changed = 1;
9854 }
9855
9856 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
9857 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
9858 created by virtual register instantiation, register elimination, and
9859 similar optimizations. */
9860 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
9861 {
9862 changed = 1;
9863 x = gen_rtx_PLUS (Pmode,
9864 gen_rtx_PLUS (Pmode, XEXP (x, 0),
9865 XEXP (XEXP (x, 1), 0)),
9866 XEXP (XEXP (x, 1), 1));
9867 }
9868
9869 /* Canonicalize
9870 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
9871 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
9872 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
9873 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9874 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
9875 && CONSTANT_P (XEXP (x, 1)))
9876 {
9877 rtx constant;
9878 rtx other = NULL_RTX;
9879
9880 if (CONST_INT_P (XEXP (x, 1)))
9881 {
9882 constant = XEXP (x, 1);
9883 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
9884 }
9885 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
9886 {
9887 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
9888 other = XEXP (x, 1);
9889 }
9890 else
9891 constant = 0;
9892
9893 if (constant)
9894 {
9895 changed = 1;
9896 x = gen_rtx_PLUS (Pmode,
9897 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
9898 XEXP (XEXP (XEXP (x, 0), 1), 0)),
9899 plus_constant (other, INTVAL (constant)));
9900 }
9901 }
9902
9903 if (changed && legitimate_address_p (mode, x, FALSE))
9904 return x;
9905
9906 if (GET_CODE (XEXP (x, 0)) == MULT)
9907 {
9908 changed = 1;
9909 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
9910 }
9911
9912 if (GET_CODE (XEXP (x, 1)) == MULT)
9913 {
9914 changed = 1;
9915 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
9916 }
9917
9918 if (changed
9919 && REG_P (XEXP (x, 1))
9920 && REG_P (XEXP (x, 0)))
9921 return x;
9922
9923 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
9924 {
9925 changed = 1;
9926 x = legitimize_pic_address (x, 0);
9927 }
9928
9929 if (changed && legitimate_address_p (mode, x, FALSE))
9930 return x;
9931
9932 if (REG_P (XEXP (x, 0)))
9933 {
9934 rtx temp = gen_reg_rtx (Pmode);
9935 rtx val = force_operand (XEXP (x, 1), temp);
9936 if (val != temp)
9937 emit_move_insn (temp, val);
9938
9939 XEXP (x, 1) = temp;
9940 return x;
9941 }
9942
9943 else if (REG_P (XEXP (x, 1)))
9944 {
9945 rtx temp = gen_reg_rtx (Pmode);
9946 rtx val = force_operand (XEXP (x, 0), temp);
9947 if (val != temp)
9948 emit_move_insn (temp, val);
9949
9950 XEXP (x, 0) = temp;
9951 return x;
9952 }
9953 }
9954
9955 return x;
9956 }
9957 \f
9958 /* Print an integer constant expression in assembler syntax. Addition
9959 and subtraction are the only arithmetic that may appear in these
9960 expressions. FILE is the stdio stream to write to, X is the rtx, and
9961 CODE is the operand print code from the output string. */
9962
9963 static void
9964 output_pic_addr_const (FILE *file, rtx x, int code)
9965 {
9966 char buf[256];
9967
9968 switch (GET_CODE (x))
9969 {
9970 case PC:
9971 gcc_assert (flag_pic);
9972 putc ('.', file);
9973 break;
9974
9975 case SYMBOL_REF:
9976 if (! TARGET_MACHO || TARGET_64BIT)
9977 output_addr_const (file, x);
9978 else
9979 {
9980 const char *name = XSTR (x, 0);
9981
9982 /* Mark the decl as referenced so that cgraph will
9983 output the function. */
9984 if (SYMBOL_REF_DECL (x))
9985 mark_decl_referenced (SYMBOL_REF_DECL (x));
9986
9987 #if TARGET_MACHO
9988 if (MACHOPIC_INDIRECT
9989 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
9990 name = machopic_indirection_name (x, /*stub_p=*/true);
9991 #endif
9992 assemble_name (file, name);
9993 }
9994 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
9995 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
9996 fputs ("@PLT", file);
9997 break;
9998
9999 case LABEL_REF:
10000 x = XEXP (x, 0);
10001 /* FALLTHRU */
10002 case CODE_LABEL:
10003 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10004 assemble_name (asm_out_file, buf);
10005 break;
10006
10007 case CONST_INT:
10008 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10009 break;
10010
10011 case CONST:
10012 /* This used to output parentheses around the expression,
10013 but that does not work on the 386 (either ATT or BSD assembler). */
10014 output_pic_addr_const (file, XEXP (x, 0), code);
10015 break;
10016
10017 case CONST_DOUBLE:
10018 if (GET_MODE (x) == VOIDmode)
10019 {
10020 /* We can use %d if the number is <32 bits and positive. */
10021 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10022 fprintf (file, "0x%lx%08lx",
10023 (unsigned long) CONST_DOUBLE_HIGH (x),
10024 (unsigned long) CONST_DOUBLE_LOW (x));
10025 else
10026 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10027 }
10028 else
10029 /* We can't handle floating point constants;
10030 PRINT_OPERAND must handle them. */
10031 output_operand_lossage ("floating constant misused");
10032 break;
10033
10034 case PLUS:
10035 /* Some assemblers need integer constants to appear first. */
10036 if (CONST_INT_P (XEXP (x, 0)))
10037 {
10038 output_pic_addr_const (file, XEXP (x, 0), code);
10039 putc ('+', file);
10040 output_pic_addr_const (file, XEXP (x, 1), code);
10041 }
10042 else
10043 {
10044 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10045 output_pic_addr_const (file, XEXP (x, 1), code);
10046 putc ('+', file);
10047 output_pic_addr_const (file, XEXP (x, 0), code);
10048 }
10049 break;
10050
10051 case MINUS:
10052 if (!TARGET_MACHO)
10053 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10054 output_pic_addr_const (file, XEXP (x, 0), code);
10055 putc ('-', file);
10056 output_pic_addr_const (file, XEXP (x, 1), code);
10057 if (!TARGET_MACHO)
10058 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10059 break;
10060
10061 case UNSPEC:
10062 gcc_assert (XVECLEN (x, 0) == 1);
10063 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10064 switch (XINT (x, 1))
10065 {
10066 case UNSPEC_GOT:
10067 fputs ("@GOT", file);
10068 break;
10069 case UNSPEC_GOTOFF:
10070 fputs ("@GOTOFF", file);
10071 break;
10072 case UNSPEC_PLTOFF:
10073 fputs ("@PLTOFF", file);
10074 break;
10075 case UNSPEC_GOTPCREL:
10076 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10077 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10078 break;
10079 case UNSPEC_GOTTPOFF:
10080 /* FIXME: This might be @TPOFF in Sun ld too. */
10081 fputs ("@GOTTPOFF", file);
10082 break;
10083 case UNSPEC_TPOFF:
10084 fputs ("@TPOFF", file);
10085 break;
10086 case UNSPEC_NTPOFF:
10087 if (TARGET_64BIT)
10088 fputs ("@TPOFF", file);
10089 else
10090 fputs ("@NTPOFF", file);
10091 break;
10092 case UNSPEC_DTPOFF:
10093 fputs ("@DTPOFF", file);
10094 break;
10095 case UNSPEC_GOTNTPOFF:
10096 if (TARGET_64BIT)
10097 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10098 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10099 else
10100 fputs ("@GOTNTPOFF", file);
10101 break;
10102 case UNSPEC_INDNTPOFF:
10103 fputs ("@INDNTPOFF", file);
10104 break;
10105 #if TARGET_MACHO
10106 case UNSPEC_MACHOPIC_OFFSET:
10107 putc ('-', file);
10108 machopic_output_function_base_name (file);
10109 break;
10110 #endif
10111 default:
10112 output_operand_lossage ("invalid UNSPEC as operand");
10113 break;
10114 }
10115 break;
10116
10117 default:
10118 output_operand_lossage ("invalid expression as operand");
10119 }
10120 }
10121
10122 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10123 We need to emit DTP-relative relocations. */
10124
10125 static void ATTRIBUTE_UNUSED
10126 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10127 {
10128 fputs (ASM_LONG, file);
10129 output_addr_const (file, x);
10130 fputs ("@DTPOFF", file);
10131 switch (size)
10132 {
10133 case 4:
10134 break;
10135 case 8:
10136 fputs (", 0", file);
10137 break;
10138 default:
10139 gcc_unreachable ();
10140 }
10141 }
10142
10143 /* Return true if X is a representation of the PIC register. This copes
10144 with calls from ix86_find_base_term, where the register might have
10145 been replaced by a cselib value. */
10146
10147 static bool
10148 ix86_pic_register_p (rtx x)
10149 {
10150 if (GET_CODE (x) == VALUE)
10151 return (pic_offset_table_rtx
10152 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10153 else
10154 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10155 }
10156
10157 /* In the name of slightly smaller debug output, and to cater to
10158 general assembler lossage, recognize PIC+GOTOFF and turn it back
10159 into a direct symbol reference.
10160
10161 On Darwin, this is necessary to avoid a crash, because Darwin
10162 has a different PIC label for each routine but the DWARF debugging
10163 information is not associated with any particular routine, so it's
10164 necessary to remove references to the PIC label from RTL stored by
10165 the DWARF output code. */
10166
10167 static rtx
10168 ix86_delegitimize_address (rtx orig_x)
10169 {
10170 rtx x = orig_x;
10171 /* reg_addend is NULL or a multiple of some register. */
10172 rtx reg_addend = NULL_RTX;
10173 /* const_addend is NULL or a const_int. */
10174 rtx const_addend = NULL_RTX;
10175 /* This is the result, or NULL. */
10176 rtx result = NULL_RTX;
10177
10178 if (MEM_P (x))
10179 x = XEXP (x, 0);
10180
10181 if (TARGET_64BIT)
10182 {
10183 if (GET_CODE (x) != CONST
10184 || GET_CODE (XEXP (x, 0)) != UNSPEC
10185 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10186 || !MEM_P (orig_x))
10187 return orig_x;
10188 return XVECEXP (XEXP (x, 0), 0, 0);
10189 }
10190
10191 if (GET_CODE (x) != PLUS
10192 || GET_CODE (XEXP (x, 1)) != CONST)
10193 return orig_x;
10194
10195 if (ix86_pic_register_p (XEXP (x, 0)))
10196 /* %ebx + GOT/GOTOFF */
10197 ;
10198 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10199 {
10200 /* %ebx + %reg * scale + GOT/GOTOFF */
10201 reg_addend = XEXP (x, 0);
10202 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10203 reg_addend = XEXP (reg_addend, 1);
10204 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10205 reg_addend = XEXP (reg_addend, 0);
10206 else
10207 return orig_x;
10208 if (!REG_P (reg_addend)
10209 && GET_CODE (reg_addend) != MULT
10210 && GET_CODE (reg_addend) != ASHIFT)
10211 return orig_x;
10212 }
10213 else
10214 return orig_x;
10215
10216 x = XEXP (XEXP (x, 1), 0);
10217 if (GET_CODE (x) == PLUS
10218 && CONST_INT_P (XEXP (x, 1)))
10219 {
10220 const_addend = XEXP (x, 1);
10221 x = XEXP (x, 0);
10222 }
10223
10224 if (GET_CODE (x) == UNSPEC
10225 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10226 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10227 result = XVECEXP (x, 0, 0);
10228
10229 if (TARGET_MACHO && darwin_local_data_pic (x)
10230 && !MEM_P (orig_x))
10231 result = XVECEXP (x, 0, 0);
10232
10233 if (! result)
10234 return orig_x;
10235
10236 if (const_addend)
10237 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10238 if (reg_addend)
10239 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10240 return result;
10241 }
10242
10243 /* If X is a machine specific address (i.e. a symbol or label being
10244 referenced as a displacement from the GOT implemented using an
10245 UNSPEC), then return the base term. Otherwise return X. */
10246
10247 rtx
10248 ix86_find_base_term (rtx x)
10249 {
10250 rtx term;
10251
10252 if (TARGET_64BIT)
10253 {
10254 if (GET_CODE (x) != CONST)
10255 return x;
10256 term = XEXP (x, 0);
10257 if (GET_CODE (term) == PLUS
10258 && (CONST_INT_P (XEXP (term, 1))
10259 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10260 term = XEXP (term, 0);
10261 if (GET_CODE (term) != UNSPEC
10262 || XINT (term, 1) != UNSPEC_GOTPCREL)
10263 return x;
10264
10265 return XVECEXP (term, 0, 0);
10266 }
10267
10268 return ix86_delegitimize_address (x);
10269 }
10270 \f
10271 static void
10272 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10273 int fp, FILE *file)
10274 {
10275 const char *suffix;
10276
10277 if (mode == CCFPmode || mode == CCFPUmode)
10278 {
10279 enum rtx_code second_code, bypass_code;
10280 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10281 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10282 code = ix86_fp_compare_code_to_integer (code);
10283 mode = CCmode;
10284 }
10285 if (reverse)
10286 code = reverse_condition (code);
10287
10288 switch (code)
10289 {
10290 case EQ:
10291 switch (mode)
10292 {
10293 case CCAmode:
10294 suffix = "a";
10295 break;
10296
10297 case CCCmode:
10298 suffix = "c";
10299 break;
10300
10301 case CCOmode:
10302 suffix = "o";
10303 break;
10304
10305 case CCSmode:
10306 suffix = "s";
10307 break;
10308
10309 default:
10310 suffix = "e";
10311 }
10312 break;
10313 case NE:
10314 switch (mode)
10315 {
10316 case CCAmode:
10317 suffix = "na";
10318 break;
10319
10320 case CCCmode:
10321 suffix = "nc";
10322 break;
10323
10324 case CCOmode:
10325 suffix = "no";
10326 break;
10327
10328 case CCSmode:
10329 suffix = "ns";
10330 break;
10331
10332 default:
10333 suffix = "ne";
10334 }
10335 break;
10336 case GT:
10337 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10338 suffix = "g";
10339 break;
10340 case GTU:
10341 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10342 Those same assemblers have the same but opposite lossage on cmov. */
10343 if (mode == CCmode)
10344 suffix = fp ? "nbe" : "a";
10345 else if (mode == CCCmode)
10346 suffix = "b";
10347 else
10348 gcc_unreachable ();
10349 break;
10350 case LT:
10351 switch (mode)
10352 {
10353 case CCNOmode:
10354 case CCGOCmode:
10355 suffix = "s";
10356 break;
10357
10358 case CCmode:
10359 case CCGCmode:
10360 suffix = "l";
10361 break;
10362
10363 default:
10364 gcc_unreachable ();
10365 }
10366 break;
10367 case LTU:
10368 gcc_assert (mode == CCmode || mode == CCCmode);
10369 suffix = "b";
10370 break;
10371 case GE:
10372 switch (mode)
10373 {
10374 case CCNOmode:
10375 case CCGOCmode:
10376 suffix = "ns";
10377 break;
10378
10379 case CCmode:
10380 case CCGCmode:
10381 suffix = "ge";
10382 break;
10383
10384 default:
10385 gcc_unreachable ();
10386 }
10387 break;
10388 case GEU:
10389 /* ??? As above. */
10390 gcc_assert (mode == CCmode || mode == CCCmode);
10391 suffix = fp ? "nb" : "ae";
10392 break;
10393 case LE:
10394 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10395 suffix = "le";
10396 break;
10397 case LEU:
10398 /* ??? As above. */
10399 if (mode == CCmode)
10400 suffix = "be";
10401 else if (mode == CCCmode)
10402 suffix = fp ? "nb" : "ae";
10403 else
10404 gcc_unreachable ();
10405 break;
10406 case UNORDERED:
10407 suffix = fp ? "u" : "p";
10408 break;
10409 case ORDERED:
10410 suffix = fp ? "nu" : "np";
10411 break;
10412 default:
10413 gcc_unreachable ();
10414 }
10415 fputs (suffix, file);
10416 }
10417
10418 /* Print the name of register X to FILE based on its machine mode and number.
10419 If CODE is 'w', pretend the mode is HImode.
10420 If CODE is 'b', pretend the mode is QImode.
10421 If CODE is 'k', pretend the mode is SImode.
10422 If CODE is 'q', pretend the mode is DImode.
10423 If CODE is 'x', pretend the mode is V4SFmode.
10424 If CODE is 't', pretend the mode is V8SFmode.
10425 If CODE is 'h', pretend the reg is the 'high' byte register.
10426 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10427 If CODE is 'd', duplicate the operand for AVX instruction.
10428 */
10429
10430 void
10431 print_reg (rtx x, int code, FILE *file)
10432 {
10433 const char *reg;
10434 bool duplicated = code == 'd' && TARGET_AVX;
10435
10436 gcc_assert (x == pc_rtx
10437 || (REGNO (x) != ARG_POINTER_REGNUM
10438 && REGNO (x) != FRAME_POINTER_REGNUM
10439 && REGNO (x) != FLAGS_REG
10440 && REGNO (x) != FPSR_REG
10441 && REGNO (x) != FPCR_REG));
10442
10443 if (ASSEMBLER_DIALECT == ASM_ATT)
10444 putc ('%', file);
10445
10446 if (x == pc_rtx)
10447 {
10448 gcc_assert (TARGET_64BIT);
10449 fputs ("rip", file);
10450 return;
10451 }
10452
10453 if (code == 'w' || MMX_REG_P (x))
10454 code = 2;
10455 else if (code == 'b')
10456 code = 1;
10457 else if (code == 'k')
10458 code = 4;
10459 else if (code == 'q')
10460 code = 8;
10461 else if (code == 'y')
10462 code = 3;
10463 else if (code == 'h')
10464 code = 0;
10465 else if (code == 'x')
10466 code = 16;
10467 else if (code == 't')
10468 code = 32;
10469 else
10470 code = GET_MODE_SIZE (GET_MODE (x));
10471
10472 /* Irritatingly, AMD extended registers use different naming convention
10473 from the normal registers. */
10474 if (REX_INT_REG_P (x))
10475 {
10476 gcc_assert (TARGET_64BIT);
10477 switch (code)
10478 {
10479 case 0:
10480 error ("extended registers have no high halves");
10481 break;
10482 case 1:
10483 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10484 break;
10485 case 2:
10486 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10487 break;
10488 case 4:
10489 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10490 break;
10491 case 8:
10492 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10493 break;
10494 default:
10495 error ("unsupported operand size for extended register");
10496 break;
10497 }
10498 return;
10499 }
10500
10501 reg = NULL;
10502 switch (code)
10503 {
10504 case 3:
10505 if (STACK_TOP_P (x))
10506 {
10507 reg = "st(0)";
10508 break;
10509 }
10510 /* FALLTHRU */
10511 case 8:
10512 case 4:
10513 case 12:
10514 if (! ANY_FP_REG_P (x))
10515 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10516 /* FALLTHRU */
10517 case 16:
10518 case 2:
10519 normal:
10520 reg = hi_reg_name[REGNO (x)];
10521 break;
10522 case 1:
10523 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10524 goto normal;
10525 reg = qi_reg_name[REGNO (x)];
10526 break;
10527 case 0:
10528 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10529 goto normal;
10530 reg = qi_high_reg_name[REGNO (x)];
10531 break;
10532 case 32:
10533 if (SSE_REG_P (x))
10534 {
10535 gcc_assert (!duplicated);
10536 putc ('y', file);
10537 fputs (hi_reg_name[REGNO (x)] + 1, file);
10538 return;
10539 }
10540 break;
10541 default:
10542 gcc_unreachable ();
10543 }
10544
10545 fputs (reg, file);
10546 if (duplicated)
10547 {
10548 if (ASSEMBLER_DIALECT == ASM_ATT)
10549 fprintf (file, ", %%%s", reg);
10550 else
10551 fprintf (file, ", %s", reg);
10552 }
10553 }
10554
10555 /* Locate some local-dynamic symbol still in use by this function
10556 so that we can print its name in some tls_local_dynamic_base
10557 pattern. */
10558
10559 static int
10560 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10561 {
10562 rtx x = *px;
10563
10564 if (GET_CODE (x) == SYMBOL_REF
10565 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10566 {
10567 cfun->machine->some_ld_name = XSTR (x, 0);
10568 return 1;
10569 }
10570
10571 return 0;
10572 }
10573
10574 static const char *
10575 get_some_local_dynamic_name (void)
10576 {
10577 rtx insn;
10578
10579 if (cfun->machine->some_ld_name)
10580 return cfun->machine->some_ld_name;
10581
10582 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10583 if (INSN_P (insn)
10584 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10585 return cfun->machine->some_ld_name;
10586
10587 gcc_unreachable ();
10588 }
10589
10590 /* Meaning of CODE:
10591 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10592 C -- print opcode suffix for set/cmov insn.
10593 c -- like C, but print reversed condition
10594 E,e -- likewise, but for compare-and-branch fused insn.
10595 F,f -- likewise, but for floating-point.
10596 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10597 otherwise nothing
10598 R -- print the prefix for register names.
10599 z -- print the opcode suffix for the size of the current operand.
10600 * -- print a star (in certain assembler syntax)
10601 A -- print an absolute memory reference.
10602 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10603 s -- print a shift double count, followed by the assemblers argument
10604 delimiter.
10605 b -- print the QImode name of the register for the indicated operand.
10606 %b0 would print %al if operands[0] is reg 0.
10607 w -- likewise, print the HImode name of the register.
10608 k -- likewise, print the SImode name of the register.
10609 q -- likewise, print the DImode name of the register.
10610 x -- likewise, print the V4SFmode name of the register.
10611 t -- likewise, print the V8SFmode name of the register.
10612 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10613 y -- print "st(0)" instead of "st" as a register.
10614 d -- print duplicated register operand for AVX instruction.
10615 D -- print condition for SSE cmp instruction.
10616 P -- if PIC, print an @PLT suffix.
10617 X -- don't print any sort of PIC '@' suffix for a symbol.
10618 & -- print some in-use local-dynamic symbol name.
10619 H -- print a memory address offset by 8; used for sse high-parts
10620 Y -- print condition for SSE5 com* instruction.
10621 + -- print a branch hint as 'cs' or 'ds' prefix
10622 ; -- print a semicolon (after prefixes due to bug in older gas).
10623 */
10624
10625 void
10626 print_operand (FILE *file, rtx x, int code)
10627 {
10628 if (code)
10629 {
10630 switch (code)
10631 {
10632 case '*':
10633 if (ASSEMBLER_DIALECT == ASM_ATT)
10634 putc ('*', file);
10635 return;
10636
10637 case '&':
10638 assemble_name (file, get_some_local_dynamic_name ());
10639 return;
10640
10641 case 'A':
10642 switch (ASSEMBLER_DIALECT)
10643 {
10644 case ASM_ATT:
10645 putc ('*', file);
10646 break;
10647
10648 case ASM_INTEL:
10649 /* Intel syntax. For absolute addresses, registers should not
10650 be surrounded by braces. */
10651 if (!REG_P (x))
10652 {
10653 putc ('[', file);
10654 PRINT_OPERAND (file, x, 0);
10655 putc (']', file);
10656 return;
10657 }
10658 break;
10659
10660 default:
10661 gcc_unreachable ();
10662 }
10663
10664 PRINT_OPERAND (file, x, 0);
10665 return;
10666
10667
10668 case 'L':
10669 if (ASSEMBLER_DIALECT == ASM_ATT)
10670 putc ('l', file);
10671 return;
10672
10673 case 'W':
10674 if (ASSEMBLER_DIALECT == ASM_ATT)
10675 putc ('w', file);
10676 return;
10677
10678 case 'B':
10679 if (ASSEMBLER_DIALECT == ASM_ATT)
10680 putc ('b', file);
10681 return;
10682
10683 case 'Q':
10684 if (ASSEMBLER_DIALECT == ASM_ATT)
10685 putc ('l', file);
10686 return;
10687
10688 case 'S':
10689 if (ASSEMBLER_DIALECT == ASM_ATT)
10690 putc ('s', file);
10691 return;
10692
10693 case 'T':
10694 if (ASSEMBLER_DIALECT == ASM_ATT)
10695 putc ('t', file);
10696 return;
10697
10698 case 'z':
10699 /* 387 opcodes don't get size suffixes if the operands are
10700 registers. */
10701 if (STACK_REG_P (x))
10702 return;
10703
10704 /* Likewise if using Intel opcodes. */
10705 if (ASSEMBLER_DIALECT == ASM_INTEL)
10706 return;
10707
10708 /* This is the size of op from size of operand. */
10709 switch (GET_MODE_SIZE (GET_MODE (x)))
10710 {
10711 case 1:
10712 putc ('b', file);
10713 return;
10714
10715 case 2:
10716 if (MEM_P (x))
10717 {
10718 #ifdef HAVE_GAS_FILDS_FISTS
10719 putc ('s', file);
10720 #endif
10721 return;
10722 }
10723 else
10724 putc ('w', file);
10725 return;
10726
10727 case 4:
10728 if (GET_MODE (x) == SFmode)
10729 {
10730 putc ('s', file);
10731 return;
10732 }
10733 else
10734 putc ('l', file);
10735 return;
10736
10737 case 12:
10738 case 16:
10739 putc ('t', file);
10740 return;
10741
10742 case 8:
10743 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10744 {
10745 if (MEM_P (x))
10746 {
10747 #ifdef GAS_MNEMONICS
10748 putc ('q', file);
10749 #else
10750 putc ('l', file);
10751 putc ('l', file);
10752 #endif
10753 }
10754 else
10755 putc ('q', file);
10756 }
10757 else
10758 putc ('l', file);
10759 return;
10760
10761 default:
10762 gcc_unreachable ();
10763 }
10764
10765 case 'd':
10766 case 'b':
10767 case 'w':
10768 case 'k':
10769 case 'q':
10770 case 'h':
10771 case 't':
10772 case 'y':
10773 case 'x':
10774 case 'X':
10775 case 'P':
10776 break;
10777
10778 case 's':
10779 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
10780 {
10781 PRINT_OPERAND (file, x, 0);
10782 fputs (", ", file);
10783 }
10784 return;
10785
10786 case 'D':
10787 /* Little bit of braindamage here. The SSE compare instructions
10788 does use completely different names for the comparisons that the
10789 fp conditional moves. */
10790 if (TARGET_AVX)
10791 {
10792 switch (GET_CODE (x))
10793 {
10794 case EQ:
10795 fputs ("eq", file);
10796 break;
10797 case UNEQ:
10798 fputs ("eq_us", file);
10799 break;
10800 case LT:
10801 fputs ("lt", file);
10802 break;
10803 case UNLT:
10804 fputs ("nge", file);
10805 break;
10806 case LE:
10807 fputs ("le", file);
10808 break;
10809 case UNLE:
10810 fputs ("ngt", file);
10811 break;
10812 case UNORDERED:
10813 fputs ("unord", file);
10814 break;
10815 case NE:
10816 fputs ("neq", file);
10817 break;
10818 case LTGT:
10819 fputs ("neq_oq", file);
10820 break;
10821 case GE:
10822 fputs ("ge", file);
10823 break;
10824 case UNGE:
10825 fputs ("nlt", file);
10826 break;
10827 case GT:
10828 fputs ("gt", file);
10829 break;
10830 case UNGT:
10831 fputs ("nle", file);
10832 break;
10833 case ORDERED:
10834 fputs ("ord", file);
10835 break;
10836 default:
10837 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
10838 return;
10839 }
10840 }
10841 else
10842 {
10843 switch (GET_CODE (x))
10844 {
10845 case EQ:
10846 case UNEQ:
10847 fputs ("eq", file);
10848 break;
10849 case LT:
10850 case UNLT:
10851 fputs ("lt", file);
10852 break;
10853 case LE:
10854 case UNLE:
10855 fputs ("le", file);
10856 break;
10857 case UNORDERED:
10858 fputs ("unord", file);
10859 break;
10860 case NE:
10861 case LTGT:
10862 fputs ("neq", file);
10863 break;
10864 case UNGE:
10865 case GE:
10866 fputs ("nlt", file);
10867 break;
10868 case UNGT:
10869 case GT:
10870 fputs ("nle", file);
10871 break;
10872 case ORDERED:
10873 fputs ("ord", file);
10874 break;
10875 default:
10876 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
10877 return;
10878 }
10879 }
10880 return;
10881 case 'O':
10882 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10883 if (ASSEMBLER_DIALECT == ASM_ATT)
10884 {
10885 switch (GET_MODE (x))
10886 {
10887 case HImode: putc ('w', file); break;
10888 case SImode:
10889 case SFmode: putc ('l', file); break;
10890 case DImode:
10891 case DFmode: putc ('q', file); break;
10892 default: gcc_unreachable ();
10893 }
10894 putc ('.', file);
10895 }
10896 #endif
10897 return;
10898 case 'C':
10899 if (!COMPARISON_P (x))
10900 {
10901 output_operand_lossage ("operand is neither a constant nor a "
10902 "condition code, invalid operand code "
10903 "'C'");
10904 return;
10905 }
10906 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
10907 return;
10908 case 'F':
10909 if (!COMPARISON_P (x))
10910 {
10911 output_operand_lossage ("operand is neither a constant nor a "
10912 "condition code, invalid operand code "
10913 "'F'");
10914 return;
10915 }
10916 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10917 if (ASSEMBLER_DIALECT == ASM_ATT)
10918 putc ('.', file);
10919 #endif
10920 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
10921 return;
10922
10923 /* Like above, but reverse condition */
10924 case 'c':
10925 /* Check to see if argument to %c is really a constant
10926 and not a condition code which needs to be reversed. */
10927 if (!COMPARISON_P (x))
10928 {
10929 output_operand_lossage ("operand is neither a constant nor a "
10930 "condition code, invalid operand "
10931 "code 'c'");
10932 return;
10933 }
10934 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
10935 return;
10936 case 'f':
10937 if (!COMPARISON_P (x))
10938 {
10939 output_operand_lossage ("operand is neither a constant nor a "
10940 "condition code, invalid operand "
10941 "code 'f'");
10942 return;
10943 }
10944 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10945 if (ASSEMBLER_DIALECT == ASM_ATT)
10946 putc ('.', file);
10947 #endif
10948 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
10949 return;
10950
10951 case 'E':
10952 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
10953 return;
10954
10955 case 'e':
10956 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
10957 return;
10958
10959 case 'H':
10960 /* It doesn't actually matter what mode we use here, as we're
10961 only going to use this for printing. */
10962 x = adjust_address_nv (x, DImode, 8);
10963 break;
10964
10965 case '+':
10966 {
10967 rtx x;
10968
10969 if (!optimize
10970 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
10971 return;
10972
10973 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
10974 if (x)
10975 {
10976 int pred_val = INTVAL (XEXP (x, 0));
10977
10978 if (pred_val < REG_BR_PROB_BASE * 45 / 100
10979 || pred_val > REG_BR_PROB_BASE * 55 / 100)
10980 {
10981 int taken = pred_val > REG_BR_PROB_BASE / 2;
10982 int cputaken = final_forward_branch_p (current_output_insn) == 0;
10983
10984 /* Emit hints only in the case default branch prediction
10985 heuristics would fail. */
10986 if (taken != cputaken)
10987 {
10988 /* We use 3e (DS) prefix for taken branches and
10989 2e (CS) prefix for not taken branches. */
10990 if (taken)
10991 fputs ("ds ; ", file);
10992 else
10993 fputs ("cs ; ", file);
10994 }
10995 }
10996 }
10997 return;
10998 }
10999
11000 case 'Y':
11001 switch (GET_CODE (x))
11002 {
11003 case NE:
11004 fputs ("neq", file);
11005 break;
11006 case EQ:
11007 fputs ("eq", file);
11008 break;
11009 case GE:
11010 case GEU:
11011 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11012 break;
11013 case GT:
11014 case GTU:
11015 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11016 break;
11017 case LE:
11018 case LEU:
11019 fputs ("le", file);
11020 break;
11021 case LT:
11022 case LTU:
11023 fputs ("lt", file);
11024 break;
11025 case UNORDERED:
11026 fputs ("unord", file);
11027 break;
11028 case ORDERED:
11029 fputs ("ord", file);
11030 break;
11031 case UNEQ:
11032 fputs ("ueq", file);
11033 break;
11034 case UNGE:
11035 fputs ("nlt", file);
11036 break;
11037 case UNGT:
11038 fputs ("nle", file);
11039 break;
11040 case UNLE:
11041 fputs ("ule", file);
11042 break;
11043 case UNLT:
11044 fputs ("ult", file);
11045 break;
11046 case LTGT:
11047 fputs ("une", file);
11048 break;
11049 default:
11050 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11051 return;
11052 }
11053 return;
11054
11055 case ';':
11056 #if TARGET_MACHO
11057 fputs (" ; ", file);
11058 #else
11059 fputc (' ', file);
11060 #endif
11061 return;
11062
11063 default:
11064 output_operand_lossage ("invalid operand code '%c'", code);
11065 }
11066 }
11067
11068 if (REG_P (x))
11069 print_reg (x, code, file);
11070
11071 else if (MEM_P (x))
11072 {
11073 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11074 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11075 && GET_MODE (x) != BLKmode)
11076 {
11077 const char * size;
11078 switch (GET_MODE_SIZE (GET_MODE (x)))
11079 {
11080 case 1: size = "BYTE"; break;
11081 case 2: size = "WORD"; break;
11082 case 4: size = "DWORD"; break;
11083 case 8: size = "QWORD"; break;
11084 case 12: size = "XWORD"; break;
11085 case 16:
11086 if (GET_MODE (x) == XFmode)
11087 size = "XWORD";
11088 else
11089 size = "XMMWORD";
11090 break;
11091 default:
11092 gcc_unreachable ();
11093 }
11094
11095 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11096 if (code == 'b')
11097 size = "BYTE";
11098 else if (code == 'w')
11099 size = "WORD";
11100 else if (code == 'k')
11101 size = "DWORD";
11102
11103 fputs (size, file);
11104 fputs (" PTR ", file);
11105 }
11106
11107 x = XEXP (x, 0);
11108 /* Avoid (%rip) for call operands. */
11109 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11110 && !CONST_INT_P (x))
11111 output_addr_const (file, x);
11112 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11113 output_operand_lossage ("invalid constraints for operand");
11114 else
11115 output_address (x);
11116 }
11117
11118 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11119 {
11120 REAL_VALUE_TYPE r;
11121 long l;
11122
11123 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11124 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11125
11126 if (ASSEMBLER_DIALECT == ASM_ATT)
11127 putc ('$', file);
11128 fprintf (file, "0x%08lx", (long unsigned int) l);
11129 }
11130
11131 /* These float cases don't actually occur as immediate operands. */
11132 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11133 {
11134 char dstr[30];
11135
11136 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11137 fprintf (file, "%s", dstr);
11138 }
11139
11140 else if (GET_CODE (x) == CONST_DOUBLE
11141 && GET_MODE (x) == XFmode)
11142 {
11143 char dstr[30];
11144
11145 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11146 fprintf (file, "%s", dstr);
11147 }
11148
11149 else
11150 {
11151 /* We have patterns that allow zero sets of memory, for instance.
11152 In 64-bit mode, we should probably support all 8-byte vectors,
11153 since we can in fact encode that into an immediate. */
11154 if (GET_CODE (x) == CONST_VECTOR)
11155 {
11156 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11157 x = const0_rtx;
11158 }
11159
11160 if (code != 'P')
11161 {
11162 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11163 {
11164 if (ASSEMBLER_DIALECT == ASM_ATT)
11165 putc ('$', file);
11166 }
11167 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11168 || GET_CODE (x) == LABEL_REF)
11169 {
11170 if (ASSEMBLER_DIALECT == ASM_ATT)
11171 putc ('$', file);
11172 else
11173 fputs ("OFFSET FLAT:", file);
11174 }
11175 }
11176 if (CONST_INT_P (x))
11177 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11178 else if (flag_pic)
11179 output_pic_addr_const (file, x, code);
11180 else
11181 output_addr_const (file, x);
11182 }
11183 }
11184 \f
11185 /* Print a memory operand whose address is ADDR. */
11186
11187 void
11188 print_operand_address (FILE *file, rtx addr)
11189 {
11190 struct ix86_address parts;
11191 rtx base, index, disp;
11192 int scale;
11193 int ok = ix86_decompose_address (addr, &parts);
11194
11195 gcc_assert (ok);
11196
11197 base = parts.base;
11198 index = parts.index;
11199 disp = parts.disp;
11200 scale = parts.scale;
11201
11202 switch (parts.seg)
11203 {
11204 case SEG_DEFAULT:
11205 break;
11206 case SEG_FS:
11207 case SEG_GS:
11208 if (ASSEMBLER_DIALECT == ASM_ATT)
11209 putc ('%', file);
11210 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11211 break;
11212 default:
11213 gcc_unreachable ();
11214 }
11215
11216 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11217 if (TARGET_64BIT && !base && !index)
11218 {
11219 rtx symbol = disp;
11220
11221 if (GET_CODE (disp) == CONST
11222 && GET_CODE (XEXP (disp, 0)) == PLUS
11223 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11224 symbol = XEXP (XEXP (disp, 0), 0);
11225
11226 if (GET_CODE (symbol) == LABEL_REF
11227 || (GET_CODE (symbol) == SYMBOL_REF
11228 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11229 base = pc_rtx;
11230 }
11231 if (!base && !index)
11232 {
11233 /* Displacement only requires special attention. */
11234
11235 if (CONST_INT_P (disp))
11236 {
11237 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11238 fputs ("ds:", file);
11239 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11240 }
11241 else if (flag_pic)
11242 output_pic_addr_const (file, disp, 0);
11243 else
11244 output_addr_const (file, disp);
11245 }
11246 else
11247 {
11248 if (ASSEMBLER_DIALECT == ASM_ATT)
11249 {
11250 if (disp)
11251 {
11252 if (flag_pic)
11253 output_pic_addr_const (file, disp, 0);
11254 else if (GET_CODE (disp) == LABEL_REF)
11255 output_asm_label (disp);
11256 else
11257 output_addr_const (file, disp);
11258 }
11259
11260 putc ('(', file);
11261 if (base)
11262 print_reg (base, 0, file);
11263 if (index)
11264 {
11265 putc (',', file);
11266 print_reg (index, 0, file);
11267 if (scale != 1)
11268 fprintf (file, ",%d", scale);
11269 }
11270 putc (')', file);
11271 }
11272 else
11273 {
11274 rtx offset = NULL_RTX;
11275
11276 if (disp)
11277 {
11278 /* Pull out the offset of a symbol; print any symbol itself. */
11279 if (GET_CODE (disp) == CONST
11280 && GET_CODE (XEXP (disp, 0)) == PLUS
11281 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11282 {
11283 offset = XEXP (XEXP (disp, 0), 1);
11284 disp = gen_rtx_CONST (VOIDmode,
11285 XEXP (XEXP (disp, 0), 0));
11286 }
11287
11288 if (flag_pic)
11289 output_pic_addr_const (file, disp, 0);
11290 else if (GET_CODE (disp) == LABEL_REF)
11291 output_asm_label (disp);
11292 else if (CONST_INT_P (disp))
11293 offset = disp;
11294 else
11295 output_addr_const (file, disp);
11296 }
11297
11298 putc ('[', file);
11299 if (base)
11300 {
11301 print_reg (base, 0, file);
11302 if (offset)
11303 {
11304 if (INTVAL (offset) >= 0)
11305 putc ('+', file);
11306 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11307 }
11308 }
11309 else if (offset)
11310 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11311 else
11312 putc ('0', file);
11313
11314 if (index)
11315 {
11316 putc ('+', file);
11317 print_reg (index, 0, file);
11318 if (scale != 1)
11319 fprintf (file, "*%d", scale);
11320 }
11321 putc (']', file);
11322 }
11323 }
11324 }
11325
11326 bool
11327 output_addr_const_extra (FILE *file, rtx x)
11328 {
11329 rtx op;
11330
11331 if (GET_CODE (x) != UNSPEC)
11332 return false;
11333
11334 op = XVECEXP (x, 0, 0);
11335 switch (XINT (x, 1))
11336 {
11337 case UNSPEC_GOTTPOFF:
11338 output_addr_const (file, op);
11339 /* FIXME: This might be @TPOFF in Sun ld. */
11340 fputs ("@GOTTPOFF", file);
11341 break;
11342 case UNSPEC_TPOFF:
11343 output_addr_const (file, op);
11344 fputs ("@TPOFF", file);
11345 break;
11346 case UNSPEC_NTPOFF:
11347 output_addr_const (file, op);
11348 if (TARGET_64BIT)
11349 fputs ("@TPOFF", file);
11350 else
11351 fputs ("@NTPOFF", file);
11352 break;
11353 case UNSPEC_DTPOFF:
11354 output_addr_const (file, op);
11355 fputs ("@DTPOFF", file);
11356 break;
11357 case UNSPEC_GOTNTPOFF:
11358 output_addr_const (file, op);
11359 if (TARGET_64BIT)
11360 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11361 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11362 else
11363 fputs ("@GOTNTPOFF", file);
11364 break;
11365 case UNSPEC_INDNTPOFF:
11366 output_addr_const (file, op);
11367 fputs ("@INDNTPOFF", file);
11368 break;
11369 #if TARGET_MACHO
11370 case UNSPEC_MACHOPIC_OFFSET:
11371 output_addr_const (file, op);
11372 putc ('-', file);
11373 machopic_output_function_base_name (file);
11374 break;
11375 #endif
11376
11377 default:
11378 return false;
11379 }
11380
11381 return true;
11382 }
11383 \f
11384 /* Split one or more DImode RTL references into pairs of SImode
11385 references. The RTL can be REG, offsettable MEM, integer constant, or
11386 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11387 split and "num" is its length. lo_half and hi_half are output arrays
11388 that parallel "operands". */
11389
11390 void
11391 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11392 {
11393 while (num--)
11394 {
11395 rtx op = operands[num];
11396
11397 /* simplify_subreg refuse to split volatile memory addresses,
11398 but we still have to handle it. */
11399 if (MEM_P (op))
11400 {
11401 lo_half[num] = adjust_address (op, SImode, 0);
11402 hi_half[num] = adjust_address (op, SImode, 4);
11403 }
11404 else
11405 {
11406 lo_half[num] = simplify_gen_subreg (SImode, op,
11407 GET_MODE (op) == VOIDmode
11408 ? DImode : GET_MODE (op), 0);
11409 hi_half[num] = simplify_gen_subreg (SImode, op,
11410 GET_MODE (op) == VOIDmode
11411 ? DImode : GET_MODE (op), 4);
11412 }
11413 }
11414 }
11415 /* Split one or more TImode RTL references into pairs of DImode
11416 references. The RTL can be REG, offsettable MEM, integer constant, or
11417 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11418 split and "num" is its length. lo_half and hi_half are output arrays
11419 that parallel "operands". */
11420
11421 void
11422 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11423 {
11424 while (num--)
11425 {
11426 rtx op = operands[num];
11427
11428 /* simplify_subreg refuse to split volatile memory addresses, but we
11429 still have to handle it. */
11430 if (MEM_P (op))
11431 {
11432 lo_half[num] = adjust_address (op, DImode, 0);
11433 hi_half[num] = adjust_address (op, DImode, 8);
11434 }
11435 else
11436 {
11437 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11438 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11439 }
11440 }
11441 }
11442 \f
11443 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11444 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11445 is the expression of the binary operation. The output may either be
11446 emitted here, or returned to the caller, like all output_* functions.
11447
11448 There is no guarantee that the operands are the same mode, as they
11449 might be within FLOAT or FLOAT_EXTEND expressions. */
11450
11451 #ifndef SYSV386_COMPAT
11452 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11453 wants to fix the assemblers because that causes incompatibility
11454 with gcc. No-one wants to fix gcc because that causes
11455 incompatibility with assemblers... You can use the option of
11456 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11457 #define SYSV386_COMPAT 1
11458 #endif
11459
11460 const char *
11461 output_387_binary_op (rtx insn, rtx *operands)
11462 {
11463 static char buf[40];
11464 const char *p;
11465 const char *ssep;
11466 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11467
11468 #ifdef ENABLE_CHECKING
11469 /* Even if we do not want to check the inputs, this documents input
11470 constraints. Which helps in understanding the following code. */
11471 if (STACK_REG_P (operands[0])
11472 && ((REG_P (operands[1])
11473 && REGNO (operands[0]) == REGNO (operands[1])
11474 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11475 || (REG_P (operands[2])
11476 && REGNO (operands[0]) == REGNO (operands[2])
11477 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11478 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11479 ; /* ok */
11480 else
11481 gcc_assert (is_sse);
11482 #endif
11483
11484 switch (GET_CODE (operands[3]))
11485 {
11486 case PLUS:
11487 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11488 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11489 p = "fiadd";
11490 else
11491 p = "fadd";
11492 ssep = "vadd";
11493 break;
11494
11495 case MINUS:
11496 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11497 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11498 p = "fisub";
11499 else
11500 p = "fsub";
11501 ssep = "vsub";
11502 break;
11503
11504 case MULT:
11505 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11506 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11507 p = "fimul";
11508 else
11509 p = "fmul";
11510 ssep = "vmul";
11511 break;
11512
11513 case DIV:
11514 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11515 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11516 p = "fidiv";
11517 else
11518 p = "fdiv";
11519 ssep = "vdiv";
11520 break;
11521
11522 default:
11523 gcc_unreachable ();
11524 }
11525
11526 if (is_sse)
11527 {
11528 if (TARGET_AVX)
11529 {
11530 strcpy (buf, ssep);
11531 if (GET_MODE (operands[0]) == SFmode)
11532 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11533 else
11534 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11535 }
11536 else
11537 {
11538 strcpy (buf, ssep + 1);
11539 if (GET_MODE (operands[0]) == SFmode)
11540 strcat (buf, "ss\t{%2, %0|%0, %2}");
11541 else
11542 strcat (buf, "sd\t{%2, %0|%0, %2}");
11543 }
11544 return buf;
11545 }
11546 strcpy (buf, p);
11547
11548 switch (GET_CODE (operands[3]))
11549 {
11550 case MULT:
11551 case PLUS:
11552 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11553 {
11554 rtx temp = operands[2];
11555 operands[2] = operands[1];
11556 operands[1] = temp;
11557 }
11558
11559 /* know operands[0] == operands[1]. */
11560
11561 if (MEM_P (operands[2]))
11562 {
11563 p = "%z2\t%2";
11564 break;
11565 }
11566
11567 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11568 {
11569 if (STACK_TOP_P (operands[0]))
11570 /* How is it that we are storing to a dead operand[2]?
11571 Well, presumably operands[1] is dead too. We can't
11572 store the result to st(0) as st(0) gets popped on this
11573 instruction. Instead store to operands[2] (which I
11574 think has to be st(1)). st(1) will be popped later.
11575 gcc <= 2.8.1 didn't have this check and generated
11576 assembly code that the Unixware assembler rejected. */
11577 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11578 else
11579 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11580 break;
11581 }
11582
11583 if (STACK_TOP_P (operands[0]))
11584 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11585 else
11586 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11587 break;
11588
11589 case MINUS:
11590 case DIV:
11591 if (MEM_P (operands[1]))
11592 {
11593 p = "r%z1\t%1";
11594 break;
11595 }
11596
11597 if (MEM_P (operands[2]))
11598 {
11599 p = "%z2\t%2";
11600 break;
11601 }
11602
11603 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11604 {
11605 #if SYSV386_COMPAT
11606 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11607 derived assemblers, confusingly reverse the direction of
11608 the operation for fsub{r} and fdiv{r} when the
11609 destination register is not st(0). The Intel assembler
11610 doesn't have this brain damage. Read !SYSV386_COMPAT to
11611 figure out what the hardware really does. */
11612 if (STACK_TOP_P (operands[0]))
11613 p = "{p\t%0, %2|rp\t%2, %0}";
11614 else
11615 p = "{rp\t%2, %0|p\t%0, %2}";
11616 #else
11617 if (STACK_TOP_P (operands[0]))
11618 /* As above for fmul/fadd, we can't store to st(0). */
11619 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11620 else
11621 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11622 #endif
11623 break;
11624 }
11625
11626 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11627 {
11628 #if SYSV386_COMPAT
11629 if (STACK_TOP_P (operands[0]))
11630 p = "{rp\t%0, %1|p\t%1, %0}";
11631 else
11632 p = "{p\t%1, %0|rp\t%0, %1}";
11633 #else
11634 if (STACK_TOP_P (operands[0]))
11635 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11636 else
11637 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11638 #endif
11639 break;
11640 }
11641
11642 if (STACK_TOP_P (operands[0]))
11643 {
11644 if (STACK_TOP_P (operands[1]))
11645 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11646 else
11647 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11648 break;
11649 }
11650 else if (STACK_TOP_P (operands[1]))
11651 {
11652 #if SYSV386_COMPAT
11653 p = "{\t%1, %0|r\t%0, %1}";
11654 #else
11655 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11656 #endif
11657 }
11658 else
11659 {
11660 #if SYSV386_COMPAT
11661 p = "{r\t%2, %0|\t%0, %2}";
11662 #else
11663 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11664 #endif
11665 }
11666 break;
11667
11668 default:
11669 gcc_unreachable ();
11670 }
11671
11672 strcat (buf, p);
11673 return buf;
11674 }
11675
11676 /* Return needed mode for entity in optimize_mode_switching pass. */
11677
11678 int
11679 ix86_mode_needed (int entity, rtx insn)
11680 {
11681 enum attr_i387_cw mode;
11682
11683 /* The mode UNINITIALIZED is used to store control word after a
11684 function call or ASM pattern. The mode ANY specify that function
11685 has no requirements on the control word and make no changes in the
11686 bits we are interested in. */
11687
11688 if (CALL_P (insn)
11689 || (NONJUMP_INSN_P (insn)
11690 && (asm_noperands (PATTERN (insn)) >= 0
11691 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11692 return I387_CW_UNINITIALIZED;
11693
11694 if (recog_memoized (insn) < 0)
11695 return I387_CW_ANY;
11696
11697 mode = get_attr_i387_cw (insn);
11698
11699 switch (entity)
11700 {
11701 case I387_TRUNC:
11702 if (mode == I387_CW_TRUNC)
11703 return mode;
11704 break;
11705
11706 case I387_FLOOR:
11707 if (mode == I387_CW_FLOOR)
11708 return mode;
11709 break;
11710
11711 case I387_CEIL:
11712 if (mode == I387_CW_CEIL)
11713 return mode;
11714 break;
11715
11716 case I387_MASK_PM:
11717 if (mode == I387_CW_MASK_PM)
11718 return mode;
11719 break;
11720
11721 default:
11722 gcc_unreachable ();
11723 }
11724
11725 return I387_CW_ANY;
11726 }
11727
11728 /* Output code to initialize control word copies used by trunc?f?i and
11729 rounding patterns. CURRENT_MODE is set to current control word,
11730 while NEW_MODE is set to new control word. */
11731
11732 void
11733 emit_i387_cw_initialization (int mode)
11734 {
11735 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
11736 rtx new_mode;
11737
11738 enum ix86_stack_slot slot;
11739
11740 rtx reg = gen_reg_rtx (HImode);
11741
11742 emit_insn (gen_x86_fnstcw_1 (stored_mode));
11743 emit_move_insn (reg, copy_rtx (stored_mode));
11744
11745 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
11746 || optimize_function_for_size_p (cfun))
11747 {
11748 switch (mode)
11749 {
11750 case I387_CW_TRUNC:
11751 /* round toward zero (truncate) */
11752 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
11753 slot = SLOT_CW_TRUNC;
11754 break;
11755
11756 case I387_CW_FLOOR:
11757 /* round down toward -oo */
11758 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11759 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
11760 slot = SLOT_CW_FLOOR;
11761 break;
11762
11763 case I387_CW_CEIL:
11764 /* round up toward +oo */
11765 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11766 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
11767 slot = SLOT_CW_CEIL;
11768 break;
11769
11770 case I387_CW_MASK_PM:
11771 /* mask precision exception for nearbyint() */
11772 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11773 slot = SLOT_CW_MASK_PM;
11774 break;
11775
11776 default:
11777 gcc_unreachable ();
11778 }
11779 }
11780 else
11781 {
11782 switch (mode)
11783 {
11784 case I387_CW_TRUNC:
11785 /* round toward zero (truncate) */
11786 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
11787 slot = SLOT_CW_TRUNC;
11788 break;
11789
11790 case I387_CW_FLOOR:
11791 /* round down toward -oo */
11792 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
11793 slot = SLOT_CW_FLOOR;
11794 break;
11795
11796 case I387_CW_CEIL:
11797 /* round up toward +oo */
11798 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
11799 slot = SLOT_CW_CEIL;
11800 break;
11801
11802 case I387_CW_MASK_PM:
11803 /* mask precision exception for nearbyint() */
11804 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11805 slot = SLOT_CW_MASK_PM;
11806 break;
11807
11808 default:
11809 gcc_unreachable ();
11810 }
11811 }
11812
11813 gcc_assert (slot < MAX_386_STACK_LOCALS);
11814
11815 new_mode = assign_386_stack_local (HImode, slot);
11816 emit_move_insn (new_mode, reg);
11817 }
11818
11819 /* Output code for INSN to convert a float to a signed int. OPERANDS
11820 are the insn operands. The output may be [HSD]Imode and the input
11821 operand may be [SDX]Fmode. */
11822
11823 const char *
11824 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
11825 {
11826 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11827 int dimode_p = GET_MODE (operands[0]) == DImode;
11828 int round_mode = get_attr_i387_cw (insn);
11829
11830 /* Jump through a hoop or two for DImode, since the hardware has no
11831 non-popping instruction. We used to do this a different way, but
11832 that was somewhat fragile and broke with post-reload splitters. */
11833 if ((dimode_p || fisttp) && !stack_top_dies)
11834 output_asm_insn ("fld\t%y1", operands);
11835
11836 gcc_assert (STACK_TOP_P (operands[1]));
11837 gcc_assert (MEM_P (operands[0]));
11838 gcc_assert (GET_MODE (operands[1]) != TFmode);
11839
11840 if (fisttp)
11841 output_asm_insn ("fisttp%z0\t%0", operands);
11842 else
11843 {
11844 if (round_mode != I387_CW_ANY)
11845 output_asm_insn ("fldcw\t%3", operands);
11846 if (stack_top_dies || dimode_p)
11847 output_asm_insn ("fistp%z0\t%0", operands);
11848 else
11849 output_asm_insn ("fist%z0\t%0", operands);
11850 if (round_mode != I387_CW_ANY)
11851 output_asm_insn ("fldcw\t%2", operands);
11852 }
11853
11854 return "";
11855 }
11856
11857 /* Output code for x87 ffreep insn. The OPNO argument, which may only
11858 have the values zero or one, indicates the ffreep insn's operand
11859 from the OPERANDS array. */
11860
11861 static const char *
11862 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
11863 {
11864 if (TARGET_USE_FFREEP)
11865 #if HAVE_AS_IX86_FFREEP
11866 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
11867 #else
11868 {
11869 static char retval[] = ".word\t0xc_df";
11870 int regno = REGNO (operands[opno]);
11871
11872 gcc_assert (FP_REGNO_P (regno));
11873
11874 retval[9] = '0' + (regno - FIRST_STACK_REG);
11875 return retval;
11876 }
11877 #endif
11878
11879 return opno ? "fstp\t%y1" : "fstp\t%y0";
11880 }
11881
11882
11883 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
11884 should be used. UNORDERED_P is true when fucom should be used. */
11885
11886 const char *
11887 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
11888 {
11889 int stack_top_dies;
11890 rtx cmp_op0, cmp_op1;
11891 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
11892
11893 if (eflags_p)
11894 {
11895 cmp_op0 = operands[0];
11896 cmp_op1 = operands[1];
11897 }
11898 else
11899 {
11900 cmp_op0 = operands[1];
11901 cmp_op1 = operands[2];
11902 }
11903
11904 if (is_sse)
11905 {
11906 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
11907 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
11908 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
11909 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
11910
11911 if (GET_MODE (operands[0]) == SFmode)
11912 if (unordered_p)
11913 return &ucomiss[TARGET_AVX ? 0 : 1];
11914 else
11915 return &comiss[TARGET_AVX ? 0 : 1];
11916 else
11917 if (unordered_p)
11918 return &ucomisd[TARGET_AVX ? 0 : 1];
11919 else
11920 return &comisd[TARGET_AVX ? 0 : 1];
11921 }
11922
11923 gcc_assert (STACK_TOP_P (cmp_op0));
11924
11925 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11926
11927 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
11928 {
11929 if (stack_top_dies)
11930 {
11931 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
11932 return output_387_ffreep (operands, 1);
11933 }
11934 else
11935 return "ftst\n\tfnstsw\t%0";
11936 }
11937
11938 if (STACK_REG_P (cmp_op1)
11939 && stack_top_dies
11940 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
11941 && REGNO (cmp_op1) != FIRST_STACK_REG)
11942 {
11943 /* If both the top of the 387 stack dies, and the other operand
11944 is also a stack register that dies, then this must be a
11945 `fcompp' float compare */
11946
11947 if (eflags_p)
11948 {
11949 /* There is no double popping fcomi variant. Fortunately,
11950 eflags is immune from the fstp's cc clobbering. */
11951 if (unordered_p)
11952 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
11953 else
11954 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
11955 return output_387_ffreep (operands, 0);
11956 }
11957 else
11958 {
11959 if (unordered_p)
11960 return "fucompp\n\tfnstsw\t%0";
11961 else
11962 return "fcompp\n\tfnstsw\t%0";
11963 }
11964 }
11965 else
11966 {
11967 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
11968
11969 static const char * const alt[16] =
11970 {
11971 "fcom%z2\t%y2\n\tfnstsw\t%0",
11972 "fcomp%z2\t%y2\n\tfnstsw\t%0",
11973 "fucom%z2\t%y2\n\tfnstsw\t%0",
11974 "fucomp%z2\t%y2\n\tfnstsw\t%0",
11975
11976 "ficom%z2\t%y2\n\tfnstsw\t%0",
11977 "ficomp%z2\t%y2\n\tfnstsw\t%0",
11978 NULL,
11979 NULL,
11980
11981 "fcomi\t{%y1, %0|%0, %y1}",
11982 "fcomip\t{%y1, %0|%0, %y1}",
11983 "fucomi\t{%y1, %0|%0, %y1}",
11984 "fucomip\t{%y1, %0|%0, %y1}",
11985
11986 NULL,
11987 NULL,
11988 NULL,
11989 NULL
11990 };
11991
11992 int mask;
11993 const char *ret;
11994
11995 mask = eflags_p << 3;
11996 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
11997 mask |= unordered_p << 1;
11998 mask |= stack_top_dies;
11999
12000 gcc_assert (mask < 16);
12001 ret = alt[mask];
12002 gcc_assert (ret);
12003
12004 return ret;
12005 }
12006 }
12007
12008 void
12009 ix86_output_addr_vec_elt (FILE *file, int value)
12010 {
12011 const char *directive = ASM_LONG;
12012
12013 #ifdef ASM_QUAD
12014 if (TARGET_64BIT)
12015 directive = ASM_QUAD;
12016 #else
12017 gcc_assert (!TARGET_64BIT);
12018 #endif
12019
12020 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12021 }
12022
12023 void
12024 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12025 {
12026 const char *directive = ASM_LONG;
12027
12028 #ifdef ASM_QUAD
12029 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12030 directive = ASM_QUAD;
12031 #else
12032 gcc_assert (!TARGET_64BIT);
12033 #endif
12034 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12035 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12036 fprintf (file, "%s%s%d-%s%d\n",
12037 directive, LPREFIX, value, LPREFIX, rel);
12038 else if (HAVE_AS_GOTOFF_IN_DATA)
12039 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12040 #if TARGET_MACHO
12041 else if (TARGET_MACHO)
12042 {
12043 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12044 machopic_output_function_base_name (file);
12045 fprintf(file, "\n");
12046 }
12047 #endif
12048 else
12049 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12050 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12051 }
12052 \f
12053 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12054 for the target. */
12055
12056 void
12057 ix86_expand_clear (rtx dest)
12058 {
12059 rtx tmp;
12060
12061 /* We play register width games, which are only valid after reload. */
12062 gcc_assert (reload_completed);
12063
12064 /* Avoid HImode and its attendant prefix byte. */
12065 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12066 dest = gen_rtx_REG (SImode, REGNO (dest));
12067 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12068
12069 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12070 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12071 {
12072 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12073 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12074 }
12075
12076 emit_insn (tmp);
12077 }
12078
12079 /* X is an unchanging MEM. If it is a constant pool reference, return
12080 the constant pool rtx, else NULL. */
12081
12082 rtx
12083 maybe_get_pool_constant (rtx x)
12084 {
12085 x = ix86_delegitimize_address (XEXP (x, 0));
12086
12087 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12088 return get_pool_constant (x);
12089
12090 return NULL_RTX;
12091 }
12092
12093 void
12094 ix86_expand_move (enum machine_mode mode, rtx operands[])
12095 {
12096 rtx op0, op1;
12097 enum tls_model model;
12098
12099 op0 = operands[0];
12100 op1 = operands[1];
12101
12102 if (GET_CODE (op1) == SYMBOL_REF)
12103 {
12104 model = SYMBOL_REF_TLS_MODEL (op1);
12105 if (model)
12106 {
12107 op1 = legitimize_tls_address (op1, model, true);
12108 op1 = force_operand (op1, op0);
12109 if (op1 == op0)
12110 return;
12111 }
12112 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12113 && SYMBOL_REF_DLLIMPORT_P (op1))
12114 op1 = legitimize_dllimport_symbol (op1, false);
12115 }
12116 else if (GET_CODE (op1) == CONST
12117 && GET_CODE (XEXP (op1, 0)) == PLUS
12118 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12119 {
12120 rtx addend = XEXP (XEXP (op1, 0), 1);
12121 rtx symbol = XEXP (XEXP (op1, 0), 0);
12122 rtx tmp = NULL;
12123
12124 model = SYMBOL_REF_TLS_MODEL (symbol);
12125 if (model)
12126 tmp = legitimize_tls_address (symbol, model, true);
12127 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12128 && SYMBOL_REF_DLLIMPORT_P (symbol))
12129 tmp = legitimize_dllimport_symbol (symbol, true);
12130
12131 if (tmp)
12132 {
12133 tmp = force_operand (tmp, NULL);
12134 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12135 op0, 1, OPTAB_DIRECT);
12136 if (tmp == op0)
12137 return;
12138 }
12139 }
12140
12141 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12142 {
12143 if (TARGET_MACHO && !TARGET_64BIT)
12144 {
12145 #if TARGET_MACHO
12146 if (MACHOPIC_PURE)
12147 {
12148 rtx temp = ((reload_in_progress
12149 || ((op0 && REG_P (op0))
12150 && mode == Pmode))
12151 ? op0 : gen_reg_rtx (Pmode));
12152 op1 = machopic_indirect_data_reference (op1, temp);
12153 op1 = machopic_legitimize_pic_address (op1, mode,
12154 temp == op1 ? 0 : temp);
12155 }
12156 else if (MACHOPIC_INDIRECT)
12157 op1 = machopic_indirect_data_reference (op1, 0);
12158 if (op0 == op1)
12159 return;
12160 #endif
12161 }
12162 else
12163 {
12164 if (MEM_P (op0))
12165 op1 = force_reg (Pmode, op1);
12166 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12167 {
12168 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12169 op1 = legitimize_pic_address (op1, reg);
12170 if (op0 == op1)
12171 return;
12172 }
12173 }
12174 }
12175 else
12176 {
12177 if (MEM_P (op0)
12178 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12179 || !push_operand (op0, mode))
12180 && MEM_P (op1))
12181 op1 = force_reg (mode, op1);
12182
12183 if (push_operand (op0, mode)
12184 && ! general_no_elim_operand (op1, mode))
12185 op1 = copy_to_mode_reg (mode, op1);
12186
12187 /* Force large constants in 64bit compilation into register
12188 to get them CSEed. */
12189 if (can_create_pseudo_p ()
12190 && (mode == DImode) && TARGET_64BIT
12191 && immediate_operand (op1, mode)
12192 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12193 && !register_operand (op0, mode)
12194 && optimize)
12195 op1 = copy_to_mode_reg (mode, op1);
12196
12197 if (can_create_pseudo_p ()
12198 && FLOAT_MODE_P (mode)
12199 && GET_CODE (op1) == CONST_DOUBLE)
12200 {
12201 /* If we are loading a floating point constant to a register,
12202 force the value to memory now, since we'll get better code
12203 out the back end. */
12204
12205 op1 = validize_mem (force_const_mem (mode, op1));
12206 if (!register_operand (op0, mode))
12207 {
12208 rtx temp = gen_reg_rtx (mode);
12209 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12210 emit_move_insn (op0, temp);
12211 return;
12212 }
12213 }
12214 }
12215
12216 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12217 }
12218
12219 void
12220 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12221 {
12222 rtx op0 = operands[0], op1 = operands[1];
12223 unsigned int align = GET_MODE_ALIGNMENT (mode);
12224
12225 /* Force constants other than zero into memory. We do not know how
12226 the instructions used to build constants modify the upper 64 bits
12227 of the register, once we have that information we may be able
12228 to handle some of them more efficiently. */
12229 if (can_create_pseudo_p ()
12230 && register_operand (op0, mode)
12231 && (CONSTANT_P (op1)
12232 || (GET_CODE (op1) == SUBREG
12233 && CONSTANT_P (SUBREG_REG (op1))))
12234 && standard_sse_constant_p (op1) <= 0)
12235 op1 = validize_mem (force_const_mem (mode, op1));
12236
12237 /* We need to check memory alignment for SSE mode since attribute
12238 can make operands unaligned. */
12239 if (can_create_pseudo_p ()
12240 && SSE_REG_MODE_P (mode)
12241 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12242 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12243 {
12244 rtx tmp[2];
12245
12246 /* ix86_expand_vector_move_misalign() does not like constants ... */
12247 if (CONSTANT_P (op1)
12248 || (GET_CODE (op1) == SUBREG
12249 && CONSTANT_P (SUBREG_REG (op1))))
12250 op1 = validize_mem (force_const_mem (mode, op1));
12251
12252 /* ... nor both arguments in memory. */
12253 if (!register_operand (op0, mode)
12254 && !register_operand (op1, mode))
12255 op1 = force_reg (mode, op1);
12256
12257 tmp[0] = op0; tmp[1] = op1;
12258 ix86_expand_vector_move_misalign (mode, tmp);
12259 return;
12260 }
12261
12262 /* Make operand1 a register if it isn't already. */
12263 if (can_create_pseudo_p ()
12264 && !register_operand (op0, mode)
12265 && !register_operand (op1, mode))
12266 {
12267 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12268 return;
12269 }
12270
12271 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12272 }
12273
12274 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12275 straight to ix86_expand_vector_move. */
12276 /* Code generation for scalar reg-reg moves of single and double precision data:
12277 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12278 movaps reg, reg
12279 else
12280 movss reg, reg
12281 if (x86_sse_partial_reg_dependency == true)
12282 movapd reg, reg
12283 else
12284 movsd reg, reg
12285
12286 Code generation for scalar loads of double precision data:
12287 if (x86_sse_split_regs == true)
12288 movlpd mem, reg (gas syntax)
12289 else
12290 movsd mem, reg
12291
12292 Code generation for unaligned packed loads of single precision data
12293 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12294 if (x86_sse_unaligned_move_optimal)
12295 movups mem, reg
12296
12297 if (x86_sse_partial_reg_dependency == true)
12298 {
12299 xorps reg, reg
12300 movlps mem, reg
12301 movhps mem+8, reg
12302 }
12303 else
12304 {
12305 movlps mem, reg
12306 movhps mem+8, reg
12307 }
12308
12309 Code generation for unaligned packed loads of double precision data
12310 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12311 if (x86_sse_unaligned_move_optimal)
12312 movupd mem, reg
12313
12314 if (x86_sse_split_regs == true)
12315 {
12316 movlpd mem, reg
12317 movhpd mem+8, reg
12318 }
12319 else
12320 {
12321 movsd mem, reg
12322 movhpd mem+8, reg
12323 }
12324 */
12325
12326 void
12327 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12328 {
12329 rtx op0, op1, m;
12330
12331 op0 = operands[0];
12332 op1 = operands[1];
12333
12334 if (TARGET_AVX)
12335 {
12336 switch (GET_MODE_CLASS (mode))
12337 {
12338 case MODE_VECTOR_INT:
12339 case MODE_INT:
12340 switch (GET_MODE_SIZE (mode))
12341 {
12342 case 16:
12343 op0 = gen_lowpart (V16QImode, op0);
12344 op1 = gen_lowpart (V16QImode, op1);
12345 emit_insn (gen_avx_movdqu (op0, op1));
12346 break;
12347 case 32:
12348 op0 = gen_lowpart (V32QImode, op0);
12349 op1 = gen_lowpart (V32QImode, op1);
12350 emit_insn (gen_avx_movdqu256 (op0, op1));
12351 break;
12352 default:
12353 gcc_unreachable ();
12354 }
12355 break;
12356 case MODE_VECTOR_FLOAT:
12357 op0 = gen_lowpart (mode, op0);
12358 op1 = gen_lowpart (mode, op1);
12359
12360 switch (mode)
12361 {
12362 case V4SFmode:
12363 emit_insn (gen_avx_movups (op0, op1));
12364 break;
12365 case V8SFmode:
12366 emit_insn (gen_avx_movups256 (op0, op1));
12367 break;
12368 case V2DFmode:
12369 emit_insn (gen_avx_movupd (op0, op1));
12370 break;
12371 case V4DFmode:
12372 emit_insn (gen_avx_movupd256 (op0, op1));
12373 break;
12374 default:
12375 gcc_unreachable ();
12376 }
12377 break;
12378
12379 default:
12380 gcc_unreachable ();
12381 }
12382
12383 return;
12384 }
12385
12386 if (MEM_P (op1))
12387 {
12388 /* If we're optimizing for size, movups is the smallest. */
12389 if (optimize_insn_for_size_p ())
12390 {
12391 op0 = gen_lowpart (V4SFmode, op0);
12392 op1 = gen_lowpart (V4SFmode, op1);
12393 emit_insn (gen_sse_movups (op0, op1));
12394 return;
12395 }
12396
12397 /* ??? If we have typed data, then it would appear that using
12398 movdqu is the only way to get unaligned data loaded with
12399 integer type. */
12400 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12401 {
12402 op0 = gen_lowpart (V16QImode, op0);
12403 op1 = gen_lowpart (V16QImode, op1);
12404 emit_insn (gen_sse2_movdqu (op0, op1));
12405 return;
12406 }
12407
12408 if (TARGET_SSE2 && mode == V2DFmode)
12409 {
12410 rtx zero;
12411
12412 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12413 {
12414 op0 = gen_lowpart (V2DFmode, op0);
12415 op1 = gen_lowpart (V2DFmode, op1);
12416 emit_insn (gen_sse2_movupd (op0, op1));
12417 return;
12418 }
12419
12420 /* When SSE registers are split into halves, we can avoid
12421 writing to the top half twice. */
12422 if (TARGET_SSE_SPLIT_REGS)
12423 {
12424 emit_clobber (op0);
12425 zero = op0;
12426 }
12427 else
12428 {
12429 /* ??? Not sure about the best option for the Intel chips.
12430 The following would seem to satisfy; the register is
12431 entirely cleared, breaking the dependency chain. We
12432 then store to the upper half, with a dependency depth
12433 of one. A rumor has it that Intel recommends two movsd
12434 followed by an unpacklpd, but this is unconfirmed. And
12435 given that the dependency depth of the unpacklpd would
12436 still be one, I'm not sure why this would be better. */
12437 zero = CONST0_RTX (V2DFmode);
12438 }
12439
12440 m = adjust_address (op1, DFmode, 0);
12441 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12442 m = adjust_address (op1, DFmode, 8);
12443 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12444 }
12445 else
12446 {
12447 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12448 {
12449 op0 = gen_lowpart (V4SFmode, op0);
12450 op1 = gen_lowpart (V4SFmode, op1);
12451 emit_insn (gen_sse_movups (op0, op1));
12452 return;
12453 }
12454
12455 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12456 emit_move_insn (op0, CONST0_RTX (mode));
12457 else
12458 emit_clobber (op0);
12459
12460 if (mode != V4SFmode)
12461 op0 = gen_lowpart (V4SFmode, op0);
12462 m = adjust_address (op1, V2SFmode, 0);
12463 emit_insn (gen_sse_loadlps (op0, op0, m));
12464 m = adjust_address (op1, V2SFmode, 8);
12465 emit_insn (gen_sse_loadhps (op0, op0, m));
12466 }
12467 }
12468 else if (MEM_P (op0))
12469 {
12470 /* If we're optimizing for size, movups is the smallest. */
12471 if (optimize_insn_for_size_p ())
12472 {
12473 op0 = gen_lowpart (V4SFmode, op0);
12474 op1 = gen_lowpart (V4SFmode, op1);
12475 emit_insn (gen_sse_movups (op0, op1));
12476 return;
12477 }
12478
12479 /* ??? Similar to above, only less clear because of quote
12480 typeless stores unquote. */
12481 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12482 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12483 {
12484 op0 = gen_lowpart (V16QImode, op0);
12485 op1 = gen_lowpart (V16QImode, op1);
12486 emit_insn (gen_sse2_movdqu (op0, op1));
12487 return;
12488 }
12489
12490 if (TARGET_SSE2 && mode == V2DFmode)
12491 {
12492 m = adjust_address (op0, DFmode, 0);
12493 emit_insn (gen_sse2_storelpd (m, op1));
12494 m = adjust_address (op0, DFmode, 8);
12495 emit_insn (gen_sse2_storehpd (m, op1));
12496 }
12497 else
12498 {
12499 if (mode != V4SFmode)
12500 op1 = gen_lowpart (V4SFmode, op1);
12501 m = adjust_address (op0, V2SFmode, 0);
12502 emit_insn (gen_sse_storelps (m, op1));
12503 m = adjust_address (op0, V2SFmode, 8);
12504 emit_insn (gen_sse_storehps (m, op1));
12505 }
12506 }
12507 else
12508 gcc_unreachable ();
12509 }
12510
12511 /* Expand a push in MODE. This is some mode for which we do not support
12512 proper push instructions, at least from the registers that we expect
12513 the value to live in. */
12514
12515 void
12516 ix86_expand_push (enum machine_mode mode, rtx x)
12517 {
12518 rtx tmp;
12519
12520 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12521 GEN_INT (-GET_MODE_SIZE (mode)),
12522 stack_pointer_rtx, 1, OPTAB_DIRECT);
12523 if (tmp != stack_pointer_rtx)
12524 emit_move_insn (stack_pointer_rtx, tmp);
12525
12526 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12527 emit_move_insn (tmp, x);
12528 }
12529
12530 /* Helper function of ix86_fixup_binary_operands to canonicalize
12531 operand order. Returns true if the operands should be swapped. */
12532
12533 static bool
12534 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12535 rtx operands[])
12536 {
12537 rtx dst = operands[0];
12538 rtx src1 = operands[1];
12539 rtx src2 = operands[2];
12540
12541 /* If the operation is not commutative, we can't do anything. */
12542 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12543 return false;
12544
12545 /* Highest priority is that src1 should match dst. */
12546 if (rtx_equal_p (dst, src1))
12547 return false;
12548 if (rtx_equal_p (dst, src2))
12549 return true;
12550
12551 /* Next highest priority is that immediate constants come second. */
12552 if (immediate_operand (src2, mode))
12553 return false;
12554 if (immediate_operand (src1, mode))
12555 return true;
12556
12557 /* Lowest priority is that memory references should come second. */
12558 if (MEM_P (src2))
12559 return false;
12560 if (MEM_P (src1))
12561 return true;
12562
12563 return false;
12564 }
12565
12566
12567 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12568 destination to use for the operation. If different from the true
12569 destination in operands[0], a copy operation will be required. */
12570
12571 rtx
12572 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12573 rtx operands[])
12574 {
12575 rtx dst = operands[0];
12576 rtx src1 = operands[1];
12577 rtx src2 = operands[2];
12578
12579 /* Canonicalize operand order. */
12580 if (ix86_swap_binary_operands_p (code, mode, operands))
12581 {
12582 rtx temp;
12583
12584 /* It is invalid to swap operands of different modes. */
12585 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12586
12587 temp = src1;
12588 src1 = src2;
12589 src2 = temp;
12590 }
12591
12592 /* Both source operands cannot be in memory. */
12593 if (MEM_P (src1) && MEM_P (src2))
12594 {
12595 /* Optimization: Only read from memory once. */
12596 if (rtx_equal_p (src1, src2))
12597 {
12598 src2 = force_reg (mode, src2);
12599 src1 = src2;
12600 }
12601 else
12602 src2 = force_reg (mode, src2);
12603 }
12604
12605 /* If the destination is memory, and we do not have matching source
12606 operands, do things in registers. */
12607 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12608 dst = gen_reg_rtx (mode);
12609
12610 /* Source 1 cannot be a constant. */
12611 if (CONSTANT_P (src1))
12612 src1 = force_reg (mode, src1);
12613
12614 /* Source 1 cannot be a non-matching memory. */
12615 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12616 src1 = force_reg (mode, src1);
12617
12618 operands[1] = src1;
12619 operands[2] = src2;
12620 return dst;
12621 }
12622
12623 /* Similarly, but assume that the destination has already been
12624 set up properly. */
12625
12626 void
12627 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12628 enum machine_mode mode, rtx operands[])
12629 {
12630 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12631 gcc_assert (dst == operands[0]);
12632 }
12633
12634 /* Attempt to expand a binary operator. Make the expansion closer to the
12635 actual machine, then just general_operand, which will allow 3 separate
12636 memory references (one output, two input) in a single insn. */
12637
12638 void
12639 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12640 rtx operands[])
12641 {
12642 rtx src1, src2, dst, op, clob;
12643
12644 dst = ix86_fixup_binary_operands (code, mode, operands);
12645 src1 = operands[1];
12646 src2 = operands[2];
12647
12648 /* Emit the instruction. */
12649
12650 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12651 if (reload_in_progress)
12652 {
12653 /* Reload doesn't know about the flags register, and doesn't know that
12654 it doesn't want to clobber it. We can only do this with PLUS. */
12655 gcc_assert (code == PLUS);
12656 emit_insn (op);
12657 }
12658 else
12659 {
12660 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12661 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12662 }
12663
12664 /* Fix up the destination if needed. */
12665 if (dst != operands[0])
12666 emit_move_insn (operands[0], dst);
12667 }
12668
12669 /* Return TRUE or FALSE depending on whether the binary operator meets the
12670 appropriate constraints. */
12671
12672 int
12673 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12674 rtx operands[3])
12675 {
12676 rtx dst = operands[0];
12677 rtx src1 = operands[1];
12678 rtx src2 = operands[2];
12679
12680 /* Both source operands cannot be in memory. */
12681 if (MEM_P (src1) && MEM_P (src2))
12682 return 0;
12683
12684 /* Canonicalize operand order for commutative operators. */
12685 if (ix86_swap_binary_operands_p (code, mode, operands))
12686 {
12687 rtx temp = src1;
12688 src1 = src2;
12689 src2 = temp;
12690 }
12691
12692 /* If the destination is memory, we must have a matching source operand. */
12693 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12694 return 0;
12695
12696 /* Source 1 cannot be a constant. */
12697 if (CONSTANT_P (src1))
12698 return 0;
12699
12700 /* Source 1 cannot be a non-matching memory. */
12701 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12702 return 0;
12703
12704 return 1;
12705 }
12706
12707 /* Attempt to expand a unary operator. Make the expansion closer to the
12708 actual machine, then just general_operand, which will allow 2 separate
12709 memory references (one output, one input) in a single insn. */
12710
12711 void
12712 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
12713 rtx operands[])
12714 {
12715 int matching_memory;
12716 rtx src, dst, op, clob;
12717
12718 dst = operands[0];
12719 src = operands[1];
12720
12721 /* If the destination is memory, and we do not have matching source
12722 operands, do things in registers. */
12723 matching_memory = 0;
12724 if (MEM_P (dst))
12725 {
12726 if (rtx_equal_p (dst, src))
12727 matching_memory = 1;
12728 else
12729 dst = gen_reg_rtx (mode);
12730 }
12731
12732 /* When source operand is memory, destination must match. */
12733 if (MEM_P (src) && !matching_memory)
12734 src = force_reg (mode, src);
12735
12736 /* Emit the instruction. */
12737
12738 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
12739 if (reload_in_progress || code == NOT)
12740 {
12741 /* Reload doesn't know about the flags register, and doesn't know that
12742 it doesn't want to clobber it. */
12743 gcc_assert (code == NOT);
12744 emit_insn (op);
12745 }
12746 else
12747 {
12748 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12749 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12750 }
12751
12752 /* Fix up the destination if needed. */
12753 if (dst != operands[0])
12754 emit_move_insn (operands[0], dst);
12755 }
12756
12757 /* Return TRUE or FALSE depending on whether the unary operator meets the
12758 appropriate constraints. */
12759
12760 int
12761 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
12762 enum machine_mode mode ATTRIBUTE_UNUSED,
12763 rtx operands[2] ATTRIBUTE_UNUSED)
12764 {
12765 /* If one of operands is memory, source and destination must match. */
12766 if ((MEM_P (operands[0])
12767 || MEM_P (operands[1]))
12768 && ! rtx_equal_p (operands[0], operands[1]))
12769 return FALSE;
12770 return TRUE;
12771 }
12772
12773 /* Post-reload splitter for converting an SF or DFmode value in an
12774 SSE register into an unsigned SImode. */
12775
12776 void
12777 ix86_split_convert_uns_si_sse (rtx operands[])
12778 {
12779 enum machine_mode vecmode;
12780 rtx value, large, zero_or_two31, input, two31, x;
12781
12782 large = operands[1];
12783 zero_or_two31 = operands[2];
12784 input = operands[3];
12785 two31 = operands[4];
12786 vecmode = GET_MODE (large);
12787 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
12788
12789 /* Load up the value into the low element. We must ensure that the other
12790 elements are valid floats -- zero is the easiest such value. */
12791 if (MEM_P (input))
12792 {
12793 if (vecmode == V4SFmode)
12794 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
12795 else
12796 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
12797 }
12798 else
12799 {
12800 input = gen_rtx_REG (vecmode, REGNO (input));
12801 emit_move_insn (value, CONST0_RTX (vecmode));
12802 if (vecmode == V4SFmode)
12803 emit_insn (gen_sse_movss (value, value, input));
12804 else
12805 emit_insn (gen_sse2_movsd (value, value, input));
12806 }
12807
12808 emit_move_insn (large, two31);
12809 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
12810
12811 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
12812 emit_insn (gen_rtx_SET (VOIDmode, large, x));
12813
12814 x = gen_rtx_AND (vecmode, zero_or_two31, large);
12815 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
12816
12817 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
12818 emit_insn (gen_rtx_SET (VOIDmode, value, x));
12819
12820 large = gen_rtx_REG (V4SImode, REGNO (large));
12821 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
12822
12823 x = gen_rtx_REG (V4SImode, REGNO (value));
12824 if (vecmode == V4SFmode)
12825 emit_insn (gen_sse2_cvttps2dq (x, value));
12826 else
12827 emit_insn (gen_sse2_cvttpd2dq (x, value));
12828 value = x;
12829
12830 emit_insn (gen_xorv4si3 (value, value, large));
12831 }
12832
12833 /* Convert an unsigned DImode value into a DFmode, using only SSE.
12834 Expects the 64-bit DImode to be supplied in a pair of integral
12835 registers. Requires SSE2; will use SSE3 if available. For x86_32,
12836 -mfpmath=sse, !optimize_size only. */
12837
12838 void
12839 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
12840 {
12841 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
12842 rtx int_xmm, fp_xmm;
12843 rtx biases, exponents;
12844 rtx x;
12845
12846 int_xmm = gen_reg_rtx (V4SImode);
12847 if (TARGET_INTER_UNIT_MOVES)
12848 emit_insn (gen_movdi_to_sse (int_xmm, input));
12849 else if (TARGET_SSE_SPLIT_REGS)
12850 {
12851 emit_clobber (int_xmm);
12852 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
12853 }
12854 else
12855 {
12856 x = gen_reg_rtx (V2DImode);
12857 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
12858 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
12859 }
12860
12861 x = gen_rtx_CONST_VECTOR (V4SImode,
12862 gen_rtvec (4, GEN_INT (0x43300000UL),
12863 GEN_INT (0x45300000UL),
12864 const0_rtx, const0_rtx));
12865 exponents = validize_mem (force_const_mem (V4SImode, x));
12866
12867 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
12868 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
12869
12870 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
12871 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
12872 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
12873 (0x1.0p84 + double(fp_value_hi_xmm)).
12874 Note these exponents differ by 32. */
12875
12876 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
12877
12878 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
12879 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
12880 real_ldexp (&bias_lo_rvt, &dconst1, 52);
12881 real_ldexp (&bias_hi_rvt, &dconst1, 84);
12882 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
12883 x = const_double_from_real_value (bias_hi_rvt, DFmode);
12884 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
12885 biases = validize_mem (force_const_mem (V2DFmode, biases));
12886 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
12887
12888 /* Add the upper and lower DFmode values together. */
12889 if (TARGET_SSE3)
12890 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
12891 else
12892 {
12893 x = copy_to_mode_reg (V2DFmode, fp_xmm);
12894 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
12895 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
12896 }
12897
12898 ix86_expand_vector_extract (false, target, fp_xmm, 0);
12899 }
12900
12901 /* Not used, but eases macroization of patterns. */
12902 void
12903 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
12904 rtx input ATTRIBUTE_UNUSED)
12905 {
12906 gcc_unreachable ();
12907 }
12908
12909 /* Convert an unsigned SImode value into a DFmode. Only currently used
12910 for SSE, but applicable anywhere. */
12911
12912 void
12913 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
12914 {
12915 REAL_VALUE_TYPE TWO31r;
12916 rtx x, fp;
12917
12918 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
12919 NULL, 1, OPTAB_DIRECT);
12920
12921 fp = gen_reg_rtx (DFmode);
12922 emit_insn (gen_floatsidf2 (fp, x));
12923
12924 real_ldexp (&TWO31r, &dconst1, 31);
12925 x = const_double_from_real_value (TWO31r, DFmode);
12926
12927 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
12928 if (x != target)
12929 emit_move_insn (target, x);
12930 }
12931
12932 /* Convert a signed DImode value into a DFmode. Only used for SSE in
12933 32-bit mode; otherwise we have a direct convert instruction. */
12934
12935 void
12936 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
12937 {
12938 REAL_VALUE_TYPE TWO32r;
12939 rtx fp_lo, fp_hi, x;
12940
12941 fp_lo = gen_reg_rtx (DFmode);
12942 fp_hi = gen_reg_rtx (DFmode);
12943
12944 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
12945
12946 real_ldexp (&TWO32r, &dconst1, 32);
12947 x = const_double_from_real_value (TWO32r, DFmode);
12948 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
12949
12950 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
12951
12952 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
12953 0, OPTAB_DIRECT);
12954 if (x != target)
12955 emit_move_insn (target, x);
12956 }
12957
12958 /* Convert an unsigned SImode value into a SFmode, using only SSE.
12959 For x86_32, -mfpmath=sse, !optimize_size only. */
12960 void
12961 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
12962 {
12963 REAL_VALUE_TYPE ONE16r;
12964 rtx fp_hi, fp_lo, int_hi, int_lo, x;
12965
12966 real_ldexp (&ONE16r, &dconst1, 16);
12967 x = const_double_from_real_value (ONE16r, SFmode);
12968 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
12969 NULL, 0, OPTAB_DIRECT);
12970 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
12971 NULL, 0, OPTAB_DIRECT);
12972 fp_hi = gen_reg_rtx (SFmode);
12973 fp_lo = gen_reg_rtx (SFmode);
12974 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
12975 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
12976 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
12977 0, OPTAB_DIRECT);
12978 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
12979 0, OPTAB_DIRECT);
12980 if (!rtx_equal_p (target, fp_hi))
12981 emit_move_insn (target, fp_hi);
12982 }
12983
12984 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
12985 then replicate the value for all elements of the vector
12986 register. */
12987
12988 rtx
12989 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
12990 {
12991 rtvec v;
12992 switch (mode)
12993 {
12994 case SImode:
12995 gcc_assert (vect);
12996 v = gen_rtvec (4, value, value, value, value);
12997 return gen_rtx_CONST_VECTOR (V4SImode, v);
12998
12999 case DImode:
13000 gcc_assert (vect);
13001 v = gen_rtvec (2, value, value);
13002 return gen_rtx_CONST_VECTOR (V2DImode, v);
13003
13004 case SFmode:
13005 if (vect)
13006 v = gen_rtvec (4, value, value, value, value);
13007 else
13008 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13009 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13010 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13011
13012 case DFmode:
13013 if (vect)
13014 v = gen_rtvec (2, value, value);
13015 else
13016 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13017 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13018
13019 default:
13020 gcc_unreachable ();
13021 }
13022 }
13023
13024 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13025 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13026 for an SSE register. If VECT is true, then replicate the mask for
13027 all elements of the vector register. If INVERT is true, then create
13028 a mask excluding the sign bit. */
13029
13030 rtx
13031 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13032 {
13033 enum machine_mode vec_mode, imode;
13034 HOST_WIDE_INT hi, lo;
13035 int shift = 63;
13036 rtx v;
13037 rtx mask;
13038
13039 /* Find the sign bit, sign extended to 2*HWI. */
13040 switch (mode)
13041 {
13042 case SImode:
13043 case SFmode:
13044 imode = SImode;
13045 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13046 lo = 0x80000000, hi = lo < 0;
13047 break;
13048
13049 case DImode:
13050 case DFmode:
13051 imode = DImode;
13052 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13053 if (HOST_BITS_PER_WIDE_INT >= 64)
13054 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13055 else
13056 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13057 break;
13058
13059 case TImode:
13060 case TFmode:
13061 vec_mode = VOIDmode;
13062 if (HOST_BITS_PER_WIDE_INT >= 64)
13063 {
13064 imode = TImode;
13065 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13066 }
13067 else
13068 {
13069 rtvec vec;
13070
13071 imode = DImode;
13072 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13073
13074 if (invert)
13075 {
13076 lo = ~lo, hi = ~hi;
13077 v = constm1_rtx;
13078 }
13079 else
13080 v = const0_rtx;
13081
13082 mask = immed_double_const (lo, hi, imode);
13083
13084 vec = gen_rtvec (2, v, mask);
13085 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13086 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13087
13088 return v;
13089 }
13090 break;
13091
13092 default:
13093 gcc_unreachable ();
13094 }
13095
13096 if (invert)
13097 lo = ~lo, hi = ~hi;
13098
13099 /* Force this value into the low part of a fp vector constant. */
13100 mask = immed_double_const (lo, hi, imode);
13101 mask = gen_lowpart (mode, mask);
13102
13103 if (vec_mode == VOIDmode)
13104 return force_reg (mode, mask);
13105
13106 v = ix86_build_const_vector (mode, vect, mask);
13107 return force_reg (vec_mode, v);
13108 }
13109
13110 /* Generate code for floating point ABS or NEG. */
13111
13112 void
13113 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13114 rtx operands[])
13115 {
13116 rtx mask, set, use, clob, dst, src;
13117 bool use_sse = false;
13118 bool vector_mode = VECTOR_MODE_P (mode);
13119 enum machine_mode elt_mode = mode;
13120
13121 if (vector_mode)
13122 {
13123 elt_mode = GET_MODE_INNER (mode);
13124 use_sse = true;
13125 }
13126 else if (mode == TFmode)
13127 use_sse = true;
13128 else if (TARGET_SSE_MATH)
13129 use_sse = SSE_FLOAT_MODE_P (mode);
13130
13131 /* NEG and ABS performed with SSE use bitwise mask operations.
13132 Create the appropriate mask now. */
13133 if (use_sse)
13134 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13135 else
13136 mask = NULL_RTX;
13137
13138 dst = operands[0];
13139 src = operands[1];
13140
13141 if (vector_mode)
13142 {
13143 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13144 set = gen_rtx_SET (VOIDmode, dst, set);
13145 emit_insn (set);
13146 }
13147 else
13148 {
13149 set = gen_rtx_fmt_e (code, mode, src);
13150 set = gen_rtx_SET (VOIDmode, dst, set);
13151 if (mask)
13152 {
13153 use = gen_rtx_USE (VOIDmode, mask);
13154 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13155 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13156 gen_rtvec (3, set, use, clob)));
13157 }
13158 else
13159 emit_insn (set);
13160 }
13161 }
13162
13163 /* Expand a copysign operation. Special case operand 0 being a constant. */
13164
13165 void
13166 ix86_expand_copysign (rtx operands[])
13167 {
13168 enum machine_mode mode;
13169 rtx dest, op0, op1, mask, nmask;
13170
13171 dest = operands[0];
13172 op0 = operands[1];
13173 op1 = operands[2];
13174
13175 mode = GET_MODE (dest);
13176
13177 if (GET_CODE (op0) == CONST_DOUBLE)
13178 {
13179 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13180
13181 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13182 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13183
13184 if (mode == SFmode || mode == DFmode)
13185 {
13186 enum machine_mode vmode;
13187
13188 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13189
13190 if (op0 == CONST0_RTX (mode))
13191 op0 = CONST0_RTX (vmode);
13192 else
13193 {
13194 rtvec v;
13195
13196 if (mode == SFmode)
13197 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13198 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13199 else
13200 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13201
13202 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13203 }
13204 }
13205 else if (op0 != CONST0_RTX (mode))
13206 op0 = force_reg (mode, op0);
13207
13208 mask = ix86_build_signbit_mask (mode, 0, 0);
13209
13210 if (mode == SFmode)
13211 copysign_insn = gen_copysignsf3_const;
13212 else if (mode == DFmode)
13213 copysign_insn = gen_copysigndf3_const;
13214 else
13215 copysign_insn = gen_copysigntf3_const;
13216
13217 emit_insn (copysign_insn (dest, op0, op1, mask));
13218 }
13219 else
13220 {
13221 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13222
13223 nmask = ix86_build_signbit_mask (mode, 0, 1);
13224 mask = ix86_build_signbit_mask (mode, 0, 0);
13225
13226 if (mode == SFmode)
13227 copysign_insn = gen_copysignsf3_var;
13228 else if (mode == DFmode)
13229 copysign_insn = gen_copysigndf3_var;
13230 else
13231 copysign_insn = gen_copysigntf3_var;
13232
13233 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13234 }
13235 }
13236
13237 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13238 be a constant, and so has already been expanded into a vector constant. */
13239
13240 void
13241 ix86_split_copysign_const (rtx operands[])
13242 {
13243 enum machine_mode mode, vmode;
13244 rtx dest, op0, op1, mask, x;
13245
13246 dest = operands[0];
13247 op0 = operands[1];
13248 op1 = operands[2];
13249 mask = operands[3];
13250
13251 mode = GET_MODE (dest);
13252 vmode = GET_MODE (mask);
13253
13254 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13255 x = gen_rtx_AND (vmode, dest, mask);
13256 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13257
13258 if (op0 != CONST0_RTX (vmode))
13259 {
13260 x = gen_rtx_IOR (vmode, dest, op0);
13261 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13262 }
13263 }
13264
13265 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13266 so we have to do two masks. */
13267
13268 void
13269 ix86_split_copysign_var (rtx operands[])
13270 {
13271 enum machine_mode mode, vmode;
13272 rtx dest, scratch, op0, op1, mask, nmask, x;
13273
13274 dest = operands[0];
13275 scratch = operands[1];
13276 op0 = operands[2];
13277 op1 = operands[3];
13278 nmask = operands[4];
13279 mask = operands[5];
13280
13281 mode = GET_MODE (dest);
13282 vmode = GET_MODE (mask);
13283
13284 if (rtx_equal_p (op0, op1))
13285 {
13286 /* Shouldn't happen often (it's useless, obviously), but when it does
13287 we'd generate incorrect code if we continue below. */
13288 emit_move_insn (dest, op0);
13289 return;
13290 }
13291
13292 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13293 {
13294 gcc_assert (REGNO (op1) == REGNO (scratch));
13295
13296 x = gen_rtx_AND (vmode, scratch, mask);
13297 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13298
13299 dest = mask;
13300 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13301 x = gen_rtx_NOT (vmode, dest);
13302 x = gen_rtx_AND (vmode, x, op0);
13303 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13304 }
13305 else
13306 {
13307 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13308 {
13309 x = gen_rtx_AND (vmode, scratch, mask);
13310 }
13311 else /* alternative 2,4 */
13312 {
13313 gcc_assert (REGNO (mask) == REGNO (scratch));
13314 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13315 x = gen_rtx_AND (vmode, scratch, op1);
13316 }
13317 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13318
13319 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13320 {
13321 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13322 x = gen_rtx_AND (vmode, dest, nmask);
13323 }
13324 else /* alternative 3,4 */
13325 {
13326 gcc_assert (REGNO (nmask) == REGNO (dest));
13327 dest = nmask;
13328 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13329 x = gen_rtx_AND (vmode, dest, op0);
13330 }
13331 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13332 }
13333
13334 x = gen_rtx_IOR (vmode, dest, scratch);
13335 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13336 }
13337
13338 /* Return TRUE or FALSE depending on whether the first SET in INSN
13339 has source and destination with matching CC modes, and that the
13340 CC mode is at least as constrained as REQ_MODE. */
13341
13342 int
13343 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13344 {
13345 rtx set;
13346 enum machine_mode set_mode;
13347
13348 set = PATTERN (insn);
13349 if (GET_CODE (set) == PARALLEL)
13350 set = XVECEXP (set, 0, 0);
13351 gcc_assert (GET_CODE (set) == SET);
13352 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13353
13354 set_mode = GET_MODE (SET_DEST (set));
13355 switch (set_mode)
13356 {
13357 case CCNOmode:
13358 if (req_mode != CCNOmode
13359 && (req_mode != CCmode
13360 || XEXP (SET_SRC (set), 1) != const0_rtx))
13361 return 0;
13362 break;
13363 case CCmode:
13364 if (req_mode == CCGCmode)
13365 return 0;
13366 /* FALLTHRU */
13367 case CCGCmode:
13368 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13369 return 0;
13370 /* FALLTHRU */
13371 case CCGOCmode:
13372 if (req_mode == CCZmode)
13373 return 0;
13374 /* FALLTHRU */
13375 case CCAmode:
13376 case CCCmode:
13377 case CCOmode:
13378 case CCSmode:
13379 case CCZmode:
13380 break;
13381
13382 default:
13383 gcc_unreachable ();
13384 }
13385
13386 return (GET_MODE (SET_SRC (set)) == set_mode);
13387 }
13388
13389 /* Generate insn patterns to do an integer compare of OPERANDS. */
13390
13391 static rtx
13392 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13393 {
13394 enum machine_mode cmpmode;
13395 rtx tmp, flags;
13396
13397 cmpmode = SELECT_CC_MODE (code, op0, op1);
13398 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13399
13400 /* This is very simple, but making the interface the same as in the
13401 FP case makes the rest of the code easier. */
13402 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13403 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13404
13405 /* Return the test that should be put into the flags user, i.e.
13406 the bcc, scc, or cmov instruction. */
13407 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13408 }
13409
13410 /* Figure out whether to use ordered or unordered fp comparisons.
13411 Return the appropriate mode to use. */
13412
13413 enum machine_mode
13414 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13415 {
13416 /* ??? In order to make all comparisons reversible, we do all comparisons
13417 non-trapping when compiling for IEEE. Once gcc is able to distinguish
13418 all forms trapping and nontrapping comparisons, we can make inequality
13419 comparisons trapping again, since it results in better code when using
13420 FCOM based compares. */
13421 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13422 }
13423
13424 enum machine_mode
13425 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13426 {
13427 enum machine_mode mode = GET_MODE (op0);
13428
13429 if (SCALAR_FLOAT_MODE_P (mode))
13430 {
13431 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13432 return ix86_fp_compare_mode (code);
13433 }
13434
13435 switch (code)
13436 {
13437 /* Only zero flag is needed. */
13438 case EQ: /* ZF=0 */
13439 case NE: /* ZF!=0 */
13440 return CCZmode;
13441 /* Codes needing carry flag. */
13442 case GEU: /* CF=0 */
13443 case LTU: /* CF=1 */
13444 /* Detect overflow checks. They need just the carry flag. */
13445 if (GET_CODE (op0) == PLUS
13446 && rtx_equal_p (op1, XEXP (op0, 0)))
13447 return CCCmode;
13448 else
13449 return CCmode;
13450 case GTU: /* CF=0 & ZF=0 */
13451 case LEU: /* CF=1 | ZF=1 */
13452 /* Detect overflow checks. They need just the carry flag. */
13453 if (GET_CODE (op0) == MINUS
13454 && rtx_equal_p (op1, XEXP (op0, 0)))
13455 return CCCmode;
13456 else
13457 return CCmode;
13458 /* Codes possibly doable only with sign flag when
13459 comparing against zero. */
13460 case GE: /* SF=OF or SF=0 */
13461 case LT: /* SF<>OF or SF=1 */
13462 if (op1 == const0_rtx)
13463 return CCGOCmode;
13464 else
13465 /* For other cases Carry flag is not required. */
13466 return CCGCmode;
13467 /* Codes doable only with sign flag when comparing
13468 against zero, but we miss jump instruction for it
13469 so we need to use relational tests against overflow
13470 that thus needs to be zero. */
13471 case GT: /* ZF=0 & SF=OF */
13472 case LE: /* ZF=1 | SF<>OF */
13473 if (op1 == const0_rtx)
13474 return CCNOmode;
13475 else
13476 return CCGCmode;
13477 /* strcmp pattern do (use flags) and combine may ask us for proper
13478 mode. */
13479 case USE:
13480 return CCmode;
13481 default:
13482 gcc_unreachable ();
13483 }
13484 }
13485
13486 /* Return the fixed registers used for condition codes. */
13487
13488 static bool
13489 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13490 {
13491 *p1 = FLAGS_REG;
13492 *p2 = FPSR_REG;
13493 return true;
13494 }
13495
13496 /* If two condition code modes are compatible, return a condition code
13497 mode which is compatible with both. Otherwise, return
13498 VOIDmode. */
13499
13500 static enum machine_mode
13501 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
13502 {
13503 if (m1 == m2)
13504 return m1;
13505
13506 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
13507 return VOIDmode;
13508
13509 if ((m1 == CCGCmode && m2 == CCGOCmode)
13510 || (m1 == CCGOCmode && m2 == CCGCmode))
13511 return CCGCmode;
13512
13513 switch (m1)
13514 {
13515 default:
13516 gcc_unreachable ();
13517
13518 case CCmode:
13519 case CCGCmode:
13520 case CCGOCmode:
13521 case CCNOmode:
13522 case CCAmode:
13523 case CCCmode:
13524 case CCOmode:
13525 case CCSmode:
13526 case CCZmode:
13527 switch (m2)
13528 {
13529 default:
13530 return VOIDmode;
13531
13532 case CCmode:
13533 case CCGCmode:
13534 case CCGOCmode:
13535 case CCNOmode:
13536 case CCAmode:
13537 case CCCmode:
13538 case CCOmode:
13539 case CCSmode:
13540 case CCZmode:
13541 return CCmode;
13542 }
13543
13544 case CCFPmode:
13545 case CCFPUmode:
13546 /* These are only compatible with themselves, which we already
13547 checked above. */
13548 return VOIDmode;
13549 }
13550 }
13551
13552 /* Split comparison code CODE into comparisons we can do using branch
13553 instructions. BYPASS_CODE is comparison code for branch that will
13554 branch around FIRST_CODE and SECOND_CODE. If some of branches
13555 is not required, set value to UNKNOWN.
13556 We never require more than two branches. */
13557
13558 void
13559 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
13560 enum rtx_code *first_code,
13561 enum rtx_code *second_code)
13562 {
13563 *first_code = code;
13564 *bypass_code = UNKNOWN;
13565 *second_code = UNKNOWN;
13566
13567 /* The fcomi comparison sets flags as follows:
13568
13569 cmp ZF PF CF
13570 > 0 0 0
13571 < 0 0 1
13572 = 1 0 0
13573 un 1 1 1 */
13574
13575 switch (code)
13576 {
13577 case GT: /* GTU - CF=0 & ZF=0 */
13578 case GE: /* GEU - CF=0 */
13579 case ORDERED: /* PF=0 */
13580 case UNORDERED: /* PF=1 */
13581 case UNEQ: /* EQ - ZF=1 */
13582 case UNLT: /* LTU - CF=1 */
13583 case UNLE: /* LEU - CF=1 | ZF=1 */
13584 case LTGT: /* EQ - ZF=0 */
13585 break;
13586 case LT: /* LTU - CF=1 - fails on unordered */
13587 *first_code = UNLT;
13588 *bypass_code = UNORDERED;
13589 break;
13590 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
13591 *first_code = UNLE;
13592 *bypass_code = UNORDERED;
13593 break;
13594 case EQ: /* EQ - ZF=1 - fails on unordered */
13595 *first_code = UNEQ;
13596 *bypass_code = UNORDERED;
13597 break;
13598 case NE: /* NE - ZF=0 - fails on unordered */
13599 *first_code = LTGT;
13600 *second_code = UNORDERED;
13601 break;
13602 case UNGE: /* GEU - CF=0 - fails on unordered */
13603 *first_code = GE;
13604 *second_code = UNORDERED;
13605 break;
13606 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
13607 *first_code = GT;
13608 *second_code = UNORDERED;
13609 break;
13610 default:
13611 gcc_unreachable ();
13612 }
13613 if (!TARGET_IEEE_FP)
13614 {
13615 *second_code = UNKNOWN;
13616 *bypass_code = UNKNOWN;
13617 }
13618 }
13619
13620 /* Return cost of comparison done fcom + arithmetics operations on AX.
13621 All following functions do use number of instructions as a cost metrics.
13622 In future this should be tweaked to compute bytes for optimize_size and
13623 take into account performance of various instructions on various CPUs. */
13624 static int
13625 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
13626 {
13627 if (!TARGET_IEEE_FP)
13628 return 4;
13629 /* The cost of code output by ix86_expand_fp_compare. */
13630 switch (code)
13631 {
13632 case UNLE:
13633 case UNLT:
13634 case LTGT:
13635 case GT:
13636 case GE:
13637 case UNORDERED:
13638 case ORDERED:
13639 case UNEQ:
13640 return 4;
13641 break;
13642 case LT:
13643 case NE:
13644 case EQ:
13645 case UNGE:
13646 return 5;
13647 break;
13648 case LE:
13649 case UNGT:
13650 return 6;
13651 break;
13652 default:
13653 gcc_unreachable ();
13654 }
13655 }
13656
13657 /* Return cost of comparison done using fcomi operation.
13658 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13659 static int
13660 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
13661 {
13662 enum rtx_code bypass_code, first_code, second_code;
13663 /* Return arbitrarily high cost when instruction is not supported - this
13664 prevents gcc from using it. */
13665 if (!TARGET_CMOVE)
13666 return 1024;
13667 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13668 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
13669 }
13670
13671 /* Return cost of comparison done using sahf operation.
13672 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13673 static int
13674 ix86_fp_comparison_sahf_cost (enum rtx_code code)
13675 {
13676 enum rtx_code bypass_code, first_code, second_code;
13677 /* Return arbitrarily high cost when instruction is not preferred - this
13678 avoids gcc from using it. */
13679 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
13680 return 1024;
13681 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13682 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
13683 }
13684
13685 /* Compute cost of the comparison done using any method.
13686 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13687 static int
13688 ix86_fp_comparison_cost (enum rtx_code code)
13689 {
13690 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
13691 int min;
13692
13693 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
13694 sahf_cost = ix86_fp_comparison_sahf_cost (code);
13695
13696 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
13697 if (min > sahf_cost)
13698 min = sahf_cost;
13699 if (min > fcomi_cost)
13700 min = fcomi_cost;
13701 return min;
13702 }
13703
13704 /* Return true if we should use an FCOMI instruction for this
13705 fp comparison. */
13706
13707 int
13708 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
13709 {
13710 enum rtx_code swapped_code = swap_condition (code);
13711
13712 return ((ix86_fp_comparison_cost (code)
13713 == ix86_fp_comparison_fcomi_cost (code))
13714 || (ix86_fp_comparison_cost (swapped_code)
13715 == ix86_fp_comparison_fcomi_cost (swapped_code)));
13716 }
13717
13718 /* Swap, force into registers, or otherwise massage the two operands
13719 to a fp comparison. The operands are updated in place; the new
13720 comparison code is returned. */
13721
13722 static enum rtx_code
13723 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
13724 {
13725 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
13726 rtx op0 = *pop0, op1 = *pop1;
13727 enum machine_mode op_mode = GET_MODE (op0);
13728 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
13729
13730 /* All of the unordered compare instructions only work on registers.
13731 The same is true of the fcomi compare instructions. The XFmode
13732 compare instructions require registers except when comparing
13733 against zero or when converting operand 1 from fixed point to
13734 floating point. */
13735
13736 if (!is_sse
13737 && (fpcmp_mode == CCFPUmode
13738 || (op_mode == XFmode
13739 && ! (standard_80387_constant_p (op0) == 1
13740 || standard_80387_constant_p (op1) == 1)
13741 && GET_CODE (op1) != FLOAT)
13742 || ix86_use_fcomi_compare (code)))
13743 {
13744 op0 = force_reg (op_mode, op0);
13745 op1 = force_reg (op_mode, op1);
13746 }
13747 else
13748 {
13749 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
13750 things around if they appear profitable, otherwise force op0
13751 into a register. */
13752
13753 if (standard_80387_constant_p (op0) == 0
13754 || (MEM_P (op0)
13755 && ! (standard_80387_constant_p (op1) == 0
13756 || MEM_P (op1))))
13757 {
13758 rtx tmp;
13759 tmp = op0, op0 = op1, op1 = tmp;
13760 code = swap_condition (code);
13761 }
13762
13763 if (!REG_P (op0))
13764 op0 = force_reg (op_mode, op0);
13765
13766 if (CONSTANT_P (op1))
13767 {
13768 int tmp = standard_80387_constant_p (op1);
13769 if (tmp == 0)
13770 op1 = validize_mem (force_const_mem (op_mode, op1));
13771 else if (tmp == 1)
13772 {
13773 if (TARGET_CMOVE)
13774 op1 = force_reg (op_mode, op1);
13775 }
13776 else
13777 op1 = force_reg (op_mode, op1);
13778 }
13779 }
13780
13781 /* Try to rearrange the comparison to make it cheaper. */
13782 if (ix86_fp_comparison_cost (code)
13783 > ix86_fp_comparison_cost (swap_condition (code))
13784 && (REG_P (op1) || can_create_pseudo_p ()))
13785 {
13786 rtx tmp;
13787 tmp = op0, op0 = op1, op1 = tmp;
13788 code = swap_condition (code);
13789 if (!REG_P (op0))
13790 op0 = force_reg (op_mode, op0);
13791 }
13792
13793 *pop0 = op0;
13794 *pop1 = op1;
13795 return code;
13796 }
13797
13798 /* Convert comparison codes we use to represent FP comparison to integer
13799 code that will result in proper branch. Return UNKNOWN if no such code
13800 is available. */
13801
13802 enum rtx_code
13803 ix86_fp_compare_code_to_integer (enum rtx_code code)
13804 {
13805 switch (code)
13806 {
13807 case GT:
13808 return GTU;
13809 case GE:
13810 return GEU;
13811 case ORDERED:
13812 case UNORDERED:
13813 return code;
13814 break;
13815 case UNEQ:
13816 return EQ;
13817 break;
13818 case UNLT:
13819 return LTU;
13820 break;
13821 case UNLE:
13822 return LEU;
13823 break;
13824 case LTGT:
13825 return NE;
13826 break;
13827 default:
13828 return UNKNOWN;
13829 }
13830 }
13831
13832 /* Generate insn patterns to do a floating point compare of OPERANDS. */
13833
13834 static rtx
13835 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
13836 rtx *second_test, rtx *bypass_test)
13837 {
13838 enum machine_mode fpcmp_mode, intcmp_mode;
13839 rtx tmp, tmp2;
13840 int cost = ix86_fp_comparison_cost (code);
13841 enum rtx_code bypass_code, first_code, second_code;
13842
13843 fpcmp_mode = ix86_fp_compare_mode (code);
13844 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
13845
13846 if (second_test)
13847 *second_test = NULL_RTX;
13848 if (bypass_test)
13849 *bypass_test = NULL_RTX;
13850
13851 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13852
13853 /* Do fcomi/sahf based test when profitable. */
13854 if (ix86_fp_comparison_arithmetics_cost (code) > cost
13855 && (bypass_code == UNKNOWN || bypass_test)
13856 && (second_code == UNKNOWN || second_test))
13857 {
13858 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13859 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
13860 tmp);
13861 if (TARGET_CMOVE)
13862 emit_insn (tmp);
13863 else
13864 {
13865 gcc_assert (TARGET_SAHF);
13866
13867 if (!scratch)
13868 scratch = gen_reg_rtx (HImode);
13869 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
13870
13871 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
13872 }
13873
13874 /* The FP codes work out to act like unsigned. */
13875 intcmp_mode = fpcmp_mode;
13876 code = first_code;
13877 if (bypass_code != UNKNOWN)
13878 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
13879 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13880 const0_rtx);
13881 if (second_code != UNKNOWN)
13882 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
13883 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13884 const0_rtx);
13885 }
13886 else
13887 {
13888 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
13889 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13890 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
13891 if (!scratch)
13892 scratch = gen_reg_rtx (HImode);
13893 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
13894
13895 /* In the unordered case, we have to check C2 for NaN's, which
13896 doesn't happen to work out to anything nice combination-wise.
13897 So do some bit twiddling on the value we've got in AH to come
13898 up with an appropriate set of condition codes. */
13899
13900 intcmp_mode = CCNOmode;
13901 switch (code)
13902 {
13903 case GT:
13904 case UNGT:
13905 if (code == GT || !TARGET_IEEE_FP)
13906 {
13907 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13908 code = EQ;
13909 }
13910 else
13911 {
13912 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13913 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13914 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
13915 intcmp_mode = CCmode;
13916 code = GEU;
13917 }
13918 break;
13919 case LT:
13920 case UNLT:
13921 if (code == LT && TARGET_IEEE_FP)
13922 {
13923 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13924 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
13925 intcmp_mode = CCmode;
13926 code = EQ;
13927 }
13928 else
13929 {
13930 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
13931 code = NE;
13932 }
13933 break;
13934 case GE:
13935 case UNGE:
13936 if (code == GE || !TARGET_IEEE_FP)
13937 {
13938 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
13939 code = EQ;
13940 }
13941 else
13942 {
13943 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13944 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13945 GEN_INT (0x01)));
13946 code = NE;
13947 }
13948 break;
13949 case LE:
13950 case UNLE:
13951 if (code == LE && TARGET_IEEE_FP)
13952 {
13953 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13954 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13955 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13956 intcmp_mode = CCmode;
13957 code = LTU;
13958 }
13959 else
13960 {
13961 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13962 code = NE;
13963 }
13964 break;
13965 case EQ:
13966 case UNEQ:
13967 if (code == EQ && TARGET_IEEE_FP)
13968 {
13969 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13970 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13971 intcmp_mode = CCmode;
13972 code = EQ;
13973 }
13974 else
13975 {
13976 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13977 code = NE;
13978 break;
13979 }
13980 break;
13981 case NE:
13982 case LTGT:
13983 if (code == NE && TARGET_IEEE_FP)
13984 {
13985 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13986 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13987 GEN_INT (0x40)));
13988 code = NE;
13989 }
13990 else
13991 {
13992 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13993 code = EQ;
13994 }
13995 break;
13996
13997 case UNORDERED:
13998 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13999 code = NE;
14000 break;
14001 case ORDERED:
14002 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14003 code = EQ;
14004 break;
14005
14006 default:
14007 gcc_unreachable ();
14008 }
14009 }
14010
14011 /* Return the test that should be put into the flags user, i.e.
14012 the bcc, scc, or cmov instruction. */
14013 return gen_rtx_fmt_ee (code, VOIDmode,
14014 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14015 const0_rtx);
14016 }
14017
14018 rtx
14019 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14020 {
14021 rtx op0, op1, ret;
14022 op0 = ix86_compare_op0;
14023 op1 = ix86_compare_op1;
14024
14025 if (second_test)
14026 *second_test = NULL_RTX;
14027 if (bypass_test)
14028 *bypass_test = NULL_RTX;
14029
14030 if (ix86_compare_emitted)
14031 {
14032 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
14033 ix86_compare_emitted = NULL_RTX;
14034 }
14035 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14036 {
14037 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14038 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14039 second_test, bypass_test);
14040 }
14041 else
14042 ret = ix86_expand_int_compare (code, op0, op1);
14043
14044 return ret;
14045 }
14046
14047 /* Return true if the CODE will result in nontrivial jump sequence. */
14048 bool
14049 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14050 {
14051 enum rtx_code bypass_code, first_code, second_code;
14052 if (!TARGET_CMOVE)
14053 return true;
14054 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14055 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14056 }
14057
14058 void
14059 ix86_expand_branch (enum rtx_code code, rtx label)
14060 {
14061 rtx tmp;
14062
14063 /* If we have emitted a compare insn, go straight to simple.
14064 ix86_expand_compare won't emit anything if ix86_compare_emitted
14065 is non NULL. */
14066 if (ix86_compare_emitted)
14067 goto simple;
14068
14069 switch (GET_MODE (ix86_compare_op0))
14070 {
14071 case QImode:
14072 case HImode:
14073 case SImode:
14074 simple:
14075 tmp = ix86_expand_compare (code, NULL, NULL);
14076 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14077 gen_rtx_LABEL_REF (VOIDmode, label),
14078 pc_rtx);
14079 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14080 return;
14081
14082 case SFmode:
14083 case DFmode:
14084 case XFmode:
14085 {
14086 rtvec vec;
14087 int use_fcomi;
14088 enum rtx_code bypass_code, first_code, second_code;
14089
14090 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14091 &ix86_compare_op1);
14092
14093 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14094
14095 /* Check whether we will use the natural sequence with one jump. If
14096 so, we can expand jump early. Otherwise delay expansion by
14097 creating compound insn to not confuse optimizers. */
14098 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14099 {
14100 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14101 gen_rtx_LABEL_REF (VOIDmode, label),
14102 pc_rtx, NULL_RTX, NULL_RTX);
14103 }
14104 else
14105 {
14106 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14107 ix86_compare_op0, ix86_compare_op1);
14108 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14109 gen_rtx_LABEL_REF (VOIDmode, label),
14110 pc_rtx);
14111 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14112
14113 use_fcomi = ix86_use_fcomi_compare (code);
14114 vec = rtvec_alloc (3 + !use_fcomi);
14115 RTVEC_ELT (vec, 0) = tmp;
14116 RTVEC_ELT (vec, 1)
14117 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14118 RTVEC_ELT (vec, 2)
14119 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14120 if (! use_fcomi)
14121 RTVEC_ELT (vec, 3)
14122 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14123
14124 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14125 }
14126 return;
14127 }
14128
14129 case DImode:
14130 if (TARGET_64BIT)
14131 goto simple;
14132 case TImode:
14133 /* Expand DImode branch into multiple compare+branch. */
14134 {
14135 rtx lo[2], hi[2], label2;
14136 enum rtx_code code1, code2, code3;
14137 enum machine_mode submode;
14138
14139 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14140 {
14141 tmp = ix86_compare_op0;
14142 ix86_compare_op0 = ix86_compare_op1;
14143 ix86_compare_op1 = tmp;
14144 code = swap_condition (code);
14145 }
14146 if (GET_MODE (ix86_compare_op0) == DImode)
14147 {
14148 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14149 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14150 submode = SImode;
14151 }
14152 else
14153 {
14154 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14155 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14156 submode = DImode;
14157 }
14158
14159 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14160 avoid two branches. This costs one extra insn, so disable when
14161 optimizing for size. */
14162
14163 if ((code == EQ || code == NE)
14164 && (!optimize_insn_for_size_p ()
14165 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14166 {
14167 rtx xor0, xor1;
14168
14169 xor1 = hi[0];
14170 if (hi[1] != const0_rtx)
14171 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14172 NULL_RTX, 0, OPTAB_WIDEN);
14173
14174 xor0 = lo[0];
14175 if (lo[1] != const0_rtx)
14176 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14177 NULL_RTX, 0, OPTAB_WIDEN);
14178
14179 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14180 NULL_RTX, 0, OPTAB_WIDEN);
14181
14182 ix86_compare_op0 = tmp;
14183 ix86_compare_op1 = const0_rtx;
14184 ix86_expand_branch (code, label);
14185 return;
14186 }
14187
14188 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14189 op1 is a constant and the low word is zero, then we can just
14190 examine the high word. Similarly for low word -1 and
14191 less-or-equal-than or greater-than. */
14192
14193 if (CONST_INT_P (hi[1]))
14194 switch (code)
14195 {
14196 case LT: case LTU: case GE: case GEU:
14197 if (lo[1] == const0_rtx)
14198 {
14199 ix86_compare_op0 = hi[0];
14200 ix86_compare_op1 = hi[1];
14201 ix86_expand_branch (code, label);
14202 return;
14203 }
14204 break;
14205 case LE: case LEU: case GT: case GTU:
14206 if (lo[1] == constm1_rtx)
14207 {
14208 ix86_compare_op0 = hi[0];
14209 ix86_compare_op1 = hi[1];
14210 ix86_expand_branch (code, label);
14211 return;
14212 }
14213 break;
14214 default:
14215 break;
14216 }
14217
14218 /* Otherwise, we need two or three jumps. */
14219
14220 label2 = gen_label_rtx ();
14221
14222 code1 = code;
14223 code2 = swap_condition (code);
14224 code3 = unsigned_condition (code);
14225
14226 switch (code)
14227 {
14228 case LT: case GT: case LTU: case GTU:
14229 break;
14230
14231 case LE: code1 = LT; code2 = GT; break;
14232 case GE: code1 = GT; code2 = LT; break;
14233 case LEU: code1 = LTU; code2 = GTU; break;
14234 case GEU: code1 = GTU; code2 = LTU; break;
14235
14236 case EQ: code1 = UNKNOWN; code2 = NE; break;
14237 case NE: code2 = UNKNOWN; break;
14238
14239 default:
14240 gcc_unreachable ();
14241 }
14242
14243 /*
14244 * a < b =>
14245 * if (hi(a) < hi(b)) goto true;
14246 * if (hi(a) > hi(b)) goto false;
14247 * if (lo(a) < lo(b)) goto true;
14248 * false:
14249 */
14250
14251 ix86_compare_op0 = hi[0];
14252 ix86_compare_op1 = hi[1];
14253
14254 if (code1 != UNKNOWN)
14255 ix86_expand_branch (code1, label);
14256 if (code2 != UNKNOWN)
14257 ix86_expand_branch (code2, label2);
14258
14259 ix86_compare_op0 = lo[0];
14260 ix86_compare_op1 = lo[1];
14261 ix86_expand_branch (code3, label);
14262
14263 if (code2 != UNKNOWN)
14264 emit_label (label2);
14265 return;
14266 }
14267
14268 default:
14269 gcc_unreachable ();
14270 }
14271 }
14272
14273 /* Split branch based on floating point condition. */
14274 void
14275 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14276 rtx target1, rtx target2, rtx tmp, rtx pushed)
14277 {
14278 rtx second, bypass;
14279 rtx label = NULL_RTX;
14280 rtx condition;
14281 int bypass_probability = -1, second_probability = -1, probability = -1;
14282 rtx i;
14283
14284 if (target2 != pc_rtx)
14285 {
14286 rtx tmp = target2;
14287 code = reverse_condition_maybe_unordered (code);
14288 target2 = target1;
14289 target1 = tmp;
14290 }
14291
14292 condition = ix86_expand_fp_compare (code, op1, op2,
14293 tmp, &second, &bypass);
14294
14295 /* Remove pushed operand from stack. */
14296 if (pushed)
14297 ix86_free_from_memory (GET_MODE (pushed));
14298
14299 if (split_branch_probability >= 0)
14300 {
14301 /* Distribute the probabilities across the jumps.
14302 Assume the BYPASS and SECOND to be always test
14303 for UNORDERED. */
14304 probability = split_branch_probability;
14305
14306 /* Value of 1 is low enough to make no need for probability
14307 to be updated. Later we may run some experiments and see
14308 if unordered values are more frequent in practice. */
14309 if (bypass)
14310 bypass_probability = 1;
14311 if (second)
14312 second_probability = 1;
14313 }
14314 if (bypass != NULL_RTX)
14315 {
14316 label = gen_label_rtx ();
14317 i = emit_jump_insn (gen_rtx_SET
14318 (VOIDmode, pc_rtx,
14319 gen_rtx_IF_THEN_ELSE (VOIDmode,
14320 bypass,
14321 gen_rtx_LABEL_REF (VOIDmode,
14322 label),
14323 pc_rtx)));
14324 if (bypass_probability >= 0)
14325 REG_NOTES (i)
14326 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14327 GEN_INT (bypass_probability),
14328 REG_NOTES (i));
14329 }
14330 i = emit_jump_insn (gen_rtx_SET
14331 (VOIDmode, pc_rtx,
14332 gen_rtx_IF_THEN_ELSE (VOIDmode,
14333 condition, target1, target2)));
14334 if (probability >= 0)
14335 REG_NOTES (i)
14336 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14337 GEN_INT (probability),
14338 REG_NOTES (i));
14339 if (second != NULL_RTX)
14340 {
14341 i = emit_jump_insn (gen_rtx_SET
14342 (VOIDmode, pc_rtx,
14343 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14344 target2)));
14345 if (second_probability >= 0)
14346 REG_NOTES (i)
14347 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14348 GEN_INT (second_probability),
14349 REG_NOTES (i));
14350 }
14351 if (label != NULL_RTX)
14352 emit_label (label);
14353 }
14354
14355 int
14356 ix86_expand_setcc (enum rtx_code code, rtx dest)
14357 {
14358 rtx ret, tmp, tmpreg, equiv;
14359 rtx second_test, bypass_test;
14360
14361 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14362 return 0; /* FAIL */
14363
14364 gcc_assert (GET_MODE (dest) == QImode);
14365
14366 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14367 PUT_MODE (ret, QImode);
14368
14369 tmp = dest;
14370 tmpreg = dest;
14371
14372 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14373 if (bypass_test || second_test)
14374 {
14375 rtx test = second_test;
14376 int bypass = 0;
14377 rtx tmp2 = gen_reg_rtx (QImode);
14378 if (bypass_test)
14379 {
14380 gcc_assert (!second_test);
14381 test = bypass_test;
14382 bypass = 1;
14383 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14384 }
14385 PUT_MODE (test, QImode);
14386 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14387
14388 if (bypass)
14389 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14390 else
14391 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14392 }
14393
14394 /* Attach a REG_EQUAL note describing the comparison result. */
14395 if (ix86_compare_op0 && ix86_compare_op1)
14396 {
14397 equiv = simplify_gen_relational (code, QImode,
14398 GET_MODE (ix86_compare_op0),
14399 ix86_compare_op0, ix86_compare_op1);
14400 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14401 }
14402
14403 return 1; /* DONE */
14404 }
14405
14406 /* Expand comparison setting or clearing carry flag. Return true when
14407 successful and set pop for the operation. */
14408 static bool
14409 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14410 {
14411 enum machine_mode mode =
14412 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14413
14414 /* Do not handle DImode compares that go through special path. */
14415 if (mode == (TARGET_64BIT ? TImode : DImode))
14416 return false;
14417
14418 if (SCALAR_FLOAT_MODE_P (mode))
14419 {
14420 rtx second_test = NULL, bypass_test = NULL;
14421 rtx compare_op, compare_seq;
14422
14423 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14424
14425 /* Shortcut: following common codes never translate
14426 into carry flag compares. */
14427 if (code == EQ || code == NE || code == UNEQ || code == LTGT
14428 || code == ORDERED || code == UNORDERED)
14429 return false;
14430
14431 /* These comparisons require zero flag; swap operands so they won't. */
14432 if ((code == GT || code == UNLE || code == LE || code == UNGT)
14433 && !TARGET_IEEE_FP)
14434 {
14435 rtx tmp = op0;
14436 op0 = op1;
14437 op1 = tmp;
14438 code = swap_condition (code);
14439 }
14440
14441 /* Try to expand the comparison and verify that we end up with
14442 carry flag based comparison. This fails to be true only when
14443 we decide to expand comparison using arithmetic that is not
14444 too common scenario. */
14445 start_sequence ();
14446 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14447 &second_test, &bypass_test);
14448 compare_seq = get_insns ();
14449 end_sequence ();
14450
14451 if (second_test || bypass_test)
14452 return false;
14453
14454 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14455 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14456 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
14457 else
14458 code = GET_CODE (compare_op);
14459
14460 if (code != LTU && code != GEU)
14461 return false;
14462
14463 emit_insn (compare_seq);
14464 *pop = compare_op;
14465 return true;
14466 }
14467
14468 if (!INTEGRAL_MODE_P (mode))
14469 return false;
14470
14471 switch (code)
14472 {
14473 case LTU:
14474 case GEU:
14475 break;
14476
14477 /* Convert a==0 into (unsigned)a<1. */
14478 case EQ:
14479 case NE:
14480 if (op1 != const0_rtx)
14481 return false;
14482 op1 = const1_rtx;
14483 code = (code == EQ ? LTU : GEU);
14484 break;
14485
14486 /* Convert a>b into b<a or a>=b-1. */
14487 case GTU:
14488 case LEU:
14489 if (CONST_INT_P (op1))
14490 {
14491 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
14492 /* Bail out on overflow. We still can swap operands but that
14493 would force loading of the constant into register. */
14494 if (op1 == const0_rtx
14495 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
14496 return false;
14497 code = (code == GTU ? GEU : LTU);
14498 }
14499 else
14500 {
14501 rtx tmp = op1;
14502 op1 = op0;
14503 op0 = tmp;
14504 code = (code == GTU ? LTU : GEU);
14505 }
14506 break;
14507
14508 /* Convert a>=0 into (unsigned)a<0x80000000. */
14509 case LT:
14510 case GE:
14511 if (mode == DImode || op1 != const0_rtx)
14512 return false;
14513 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14514 code = (code == LT ? GEU : LTU);
14515 break;
14516 case LE:
14517 case GT:
14518 if (mode == DImode || op1 != constm1_rtx)
14519 return false;
14520 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14521 code = (code == LE ? GEU : LTU);
14522 break;
14523
14524 default:
14525 return false;
14526 }
14527 /* Swapping operands may cause constant to appear as first operand. */
14528 if (!nonimmediate_operand (op0, VOIDmode))
14529 {
14530 if (!can_create_pseudo_p ())
14531 return false;
14532 op0 = force_reg (mode, op0);
14533 }
14534 ix86_compare_op0 = op0;
14535 ix86_compare_op1 = op1;
14536 *pop = ix86_expand_compare (code, NULL, NULL);
14537 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
14538 return true;
14539 }
14540
14541 int
14542 ix86_expand_int_movcc (rtx operands[])
14543 {
14544 enum rtx_code code = GET_CODE (operands[1]), compare_code;
14545 rtx compare_seq, compare_op;
14546 rtx second_test, bypass_test;
14547 enum machine_mode mode = GET_MODE (operands[0]);
14548 bool sign_bit_compare_p = false;;
14549
14550 start_sequence ();
14551 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14552 compare_seq = get_insns ();
14553 end_sequence ();
14554
14555 compare_code = GET_CODE (compare_op);
14556
14557 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
14558 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
14559 sign_bit_compare_p = true;
14560
14561 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
14562 HImode insns, we'd be swallowed in word prefix ops. */
14563
14564 if ((mode != HImode || TARGET_FAST_PREFIX)
14565 && (mode != (TARGET_64BIT ? TImode : DImode))
14566 && CONST_INT_P (operands[2])
14567 && CONST_INT_P (operands[3]))
14568 {
14569 rtx out = operands[0];
14570 HOST_WIDE_INT ct = INTVAL (operands[2]);
14571 HOST_WIDE_INT cf = INTVAL (operands[3]);
14572 HOST_WIDE_INT diff;
14573
14574 diff = ct - cf;
14575 /* Sign bit compares are better done using shifts than we do by using
14576 sbb. */
14577 if (sign_bit_compare_p
14578 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14579 ix86_compare_op1, &compare_op))
14580 {
14581 /* Detect overlap between destination and compare sources. */
14582 rtx tmp = out;
14583
14584 if (!sign_bit_compare_p)
14585 {
14586 bool fpcmp = false;
14587
14588 compare_code = GET_CODE (compare_op);
14589
14590 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14591 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14592 {
14593 fpcmp = true;
14594 compare_code = ix86_fp_compare_code_to_integer (compare_code);
14595 }
14596
14597 /* To simplify rest of code, restrict to the GEU case. */
14598 if (compare_code == LTU)
14599 {
14600 HOST_WIDE_INT tmp = ct;
14601 ct = cf;
14602 cf = tmp;
14603 compare_code = reverse_condition (compare_code);
14604 code = reverse_condition (code);
14605 }
14606 else
14607 {
14608 if (fpcmp)
14609 PUT_CODE (compare_op,
14610 reverse_condition_maybe_unordered
14611 (GET_CODE (compare_op)));
14612 else
14613 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14614 }
14615 diff = ct - cf;
14616
14617 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
14618 || reg_overlap_mentioned_p (out, ix86_compare_op1))
14619 tmp = gen_reg_rtx (mode);
14620
14621 if (mode == DImode)
14622 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
14623 else
14624 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
14625 }
14626 else
14627 {
14628 if (code == GT || code == GE)
14629 code = reverse_condition (code);
14630 else
14631 {
14632 HOST_WIDE_INT tmp = ct;
14633 ct = cf;
14634 cf = tmp;
14635 diff = ct - cf;
14636 }
14637 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
14638 ix86_compare_op1, VOIDmode, 0, -1);
14639 }
14640
14641 if (diff == 1)
14642 {
14643 /*
14644 * cmpl op0,op1
14645 * sbbl dest,dest
14646 * [addl dest, ct]
14647 *
14648 * Size 5 - 8.
14649 */
14650 if (ct)
14651 tmp = expand_simple_binop (mode, PLUS,
14652 tmp, GEN_INT (ct),
14653 copy_rtx (tmp), 1, OPTAB_DIRECT);
14654 }
14655 else if (cf == -1)
14656 {
14657 /*
14658 * cmpl op0,op1
14659 * sbbl dest,dest
14660 * orl $ct, dest
14661 *
14662 * Size 8.
14663 */
14664 tmp = expand_simple_binop (mode, IOR,
14665 tmp, GEN_INT (ct),
14666 copy_rtx (tmp), 1, OPTAB_DIRECT);
14667 }
14668 else if (diff == -1 && ct)
14669 {
14670 /*
14671 * cmpl op0,op1
14672 * sbbl dest,dest
14673 * notl dest
14674 * [addl dest, cf]
14675 *
14676 * Size 8 - 11.
14677 */
14678 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14679 if (cf)
14680 tmp = expand_simple_binop (mode, PLUS,
14681 copy_rtx (tmp), GEN_INT (cf),
14682 copy_rtx (tmp), 1, OPTAB_DIRECT);
14683 }
14684 else
14685 {
14686 /*
14687 * cmpl op0,op1
14688 * sbbl dest,dest
14689 * [notl dest]
14690 * andl cf - ct, dest
14691 * [addl dest, ct]
14692 *
14693 * Size 8 - 11.
14694 */
14695
14696 if (cf == 0)
14697 {
14698 cf = ct;
14699 ct = 0;
14700 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14701 }
14702
14703 tmp = expand_simple_binop (mode, AND,
14704 copy_rtx (tmp),
14705 gen_int_mode (cf - ct, mode),
14706 copy_rtx (tmp), 1, OPTAB_DIRECT);
14707 if (ct)
14708 tmp = expand_simple_binop (mode, PLUS,
14709 copy_rtx (tmp), GEN_INT (ct),
14710 copy_rtx (tmp), 1, OPTAB_DIRECT);
14711 }
14712
14713 if (!rtx_equal_p (tmp, out))
14714 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
14715
14716 return 1; /* DONE */
14717 }
14718
14719 if (diff < 0)
14720 {
14721 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14722
14723 HOST_WIDE_INT tmp;
14724 tmp = ct, ct = cf, cf = tmp;
14725 diff = -diff;
14726
14727 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14728 {
14729 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14730
14731 /* We may be reversing unordered compare to normal compare, that
14732 is not valid in general (we may convert non-trapping condition
14733 to trapping one), however on i386 we currently emit all
14734 comparisons unordered. */
14735 compare_code = reverse_condition_maybe_unordered (compare_code);
14736 code = reverse_condition_maybe_unordered (code);
14737 }
14738 else
14739 {
14740 compare_code = reverse_condition (compare_code);
14741 code = reverse_condition (code);
14742 }
14743 }
14744
14745 compare_code = UNKNOWN;
14746 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
14747 && CONST_INT_P (ix86_compare_op1))
14748 {
14749 if (ix86_compare_op1 == const0_rtx
14750 && (code == LT || code == GE))
14751 compare_code = code;
14752 else if (ix86_compare_op1 == constm1_rtx)
14753 {
14754 if (code == LE)
14755 compare_code = LT;
14756 else if (code == GT)
14757 compare_code = GE;
14758 }
14759 }
14760
14761 /* Optimize dest = (op0 < 0) ? -1 : cf. */
14762 if (compare_code != UNKNOWN
14763 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
14764 && (cf == -1 || ct == -1))
14765 {
14766 /* If lea code below could be used, only optimize
14767 if it results in a 2 insn sequence. */
14768
14769 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
14770 || diff == 3 || diff == 5 || diff == 9)
14771 || (compare_code == LT && ct == -1)
14772 || (compare_code == GE && cf == -1))
14773 {
14774 /*
14775 * notl op1 (if necessary)
14776 * sarl $31, op1
14777 * orl cf, op1
14778 */
14779 if (ct != -1)
14780 {
14781 cf = ct;
14782 ct = -1;
14783 code = reverse_condition (code);
14784 }
14785
14786 out = emit_store_flag (out, code, ix86_compare_op0,
14787 ix86_compare_op1, VOIDmode, 0, -1);
14788
14789 out = expand_simple_binop (mode, IOR,
14790 out, GEN_INT (cf),
14791 out, 1, OPTAB_DIRECT);
14792 if (out != operands[0])
14793 emit_move_insn (operands[0], out);
14794
14795 return 1; /* DONE */
14796 }
14797 }
14798
14799
14800 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
14801 || diff == 3 || diff == 5 || diff == 9)
14802 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
14803 && (mode != DImode
14804 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
14805 {
14806 /*
14807 * xorl dest,dest
14808 * cmpl op1,op2
14809 * setcc dest
14810 * lea cf(dest*(ct-cf)),dest
14811 *
14812 * Size 14.
14813 *
14814 * This also catches the degenerate setcc-only case.
14815 */
14816
14817 rtx tmp;
14818 int nops;
14819
14820 out = emit_store_flag (out, code, ix86_compare_op0,
14821 ix86_compare_op1, VOIDmode, 0, 1);
14822
14823 nops = 0;
14824 /* On x86_64 the lea instruction operates on Pmode, so we need
14825 to get arithmetics done in proper mode to match. */
14826 if (diff == 1)
14827 tmp = copy_rtx (out);
14828 else
14829 {
14830 rtx out1;
14831 out1 = copy_rtx (out);
14832 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
14833 nops++;
14834 if (diff & 1)
14835 {
14836 tmp = gen_rtx_PLUS (mode, tmp, out1);
14837 nops++;
14838 }
14839 }
14840 if (cf != 0)
14841 {
14842 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
14843 nops++;
14844 }
14845 if (!rtx_equal_p (tmp, out))
14846 {
14847 if (nops == 1)
14848 out = force_operand (tmp, copy_rtx (out));
14849 else
14850 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
14851 }
14852 if (!rtx_equal_p (out, operands[0]))
14853 emit_move_insn (operands[0], copy_rtx (out));
14854
14855 return 1; /* DONE */
14856 }
14857
14858 /*
14859 * General case: Jumpful:
14860 * xorl dest,dest cmpl op1, op2
14861 * cmpl op1, op2 movl ct, dest
14862 * setcc dest jcc 1f
14863 * decl dest movl cf, dest
14864 * andl (cf-ct),dest 1:
14865 * addl ct,dest
14866 *
14867 * Size 20. Size 14.
14868 *
14869 * This is reasonably steep, but branch mispredict costs are
14870 * high on modern cpus, so consider failing only if optimizing
14871 * for space.
14872 */
14873
14874 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14875 && BRANCH_COST (optimize_insn_for_speed_p (),
14876 false) >= 2)
14877 {
14878 if (cf == 0)
14879 {
14880 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14881
14882 cf = ct;
14883 ct = 0;
14884
14885 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14886 {
14887 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14888
14889 /* We may be reversing unordered compare to normal compare,
14890 that is not valid in general (we may convert non-trapping
14891 condition to trapping one), however on i386 we currently
14892 emit all comparisons unordered. */
14893 code = reverse_condition_maybe_unordered (code);
14894 }
14895 else
14896 {
14897 code = reverse_condition (code);
14898 if (compare_code != UNKNOWN)
14899 compare_code = reverse_condition (compare_code);
14900 }
14901 }
14902
14903 if (compare_code != UNKNOWN)
14904 {
14905 /* notl op1 (if needed)
14906 sarl $31, op1
14907 andl (cf-ct), op1
14908 addl ct, op1
14909
14910 For x < 0 (resp. x <= -1) there will be no notl,
14911 so if possible swap the constants to get rid of the
14912 complement.
14913 True/false will be -1/0 while code below (store flag
14914 followed by decrement) is 0/-1, so the constants need
14915 to be exchanged once more. */
14916
14917 if (compare_code == GE || !cf)
14918 {
14919 code = reverse_condition (code);
14920 compare_code = LT;
14921 }
14922 else
14923 {
14924 HOST_WIDE_INT tmp = cf;
14925 cf = ct;
14926 ct = tmp;
14927 }
14928
14929 out = emit_store_flag (out, code, ix86_compare_op0,
14930 ix86_compare_op1, VOIDmode, 0, -1);
14931 }
14932 else
14933 {
14934 out = emit_store_flag (out, code, ix86_compare_op0,
14935 ix86_compare_op1, VOIDmode, 0, 1);
14936
14937 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
14938 copy_rtx (out), 1, OPTAB_DIRECT);
14939 }
14940
14941 out = expand_simple_binop (mode, AND, copy_rtx (out),
14942 gen_int_mode (cf - ct, mode),
14943 copy_rtx (out), 1, OPTAB_DIRECT);
14944 if (ct)
14945 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
14946 copy_rtx (out), 1, OPTAB_DIRECT);
14947 if (!rtx_equal_p (out, operands[0]))
14948 emit_move_insn (operands[0], copy_rtx (out));
14949
14950 return 1; /* DONE */
14951 }
14952 }
14953
14954 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14955 {
14956 /* Try a few things more with specific constants and a variable. */
14957
14958 optab op;
14959 rtx var, orig_out, out, tmp;
14960
14961 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
14962 return 0; /* FAIL */
14963
14964 /* If one of the two operands is an interesting constant, load a
14965 constant with the above and mask it in with a logical operation. */
14966
14967 if (CONST_INT_P (operands[2]))
14968 {
14969 var = operands[3];
14970 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
14971 operands[3] = constm1_rtx, op = and_optab;
14972 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
14973 operands[3] = const0_rtx, op = ior_optab;
14974 else
14975 return 0; /* FAIL */
14976 }
14977 else if (CONST_INT_P (operands[3]))
14978 {
14979 var = operands[2];
14980 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
14981 operands[2] = constm1_rtx, op = and_optab;
14982 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
14983 operands[2] = const0_rtx, op = ior_optab;
14984 else
14985 return 0; /* FAIL */
14986 }
14987 else
14988 return 0; /* FAIL */
14989
14990 orig_out = operands[0];
14991 tmp = gen_reg_rtx (mode);
14992 operands[0] = tmp;
14993
14994 /* Recurse to get the constant loaded. */
14995 if (ix86_expand_int_movcc (operands) == 0)
14996 return 0; /* FAIL */
14997
14998 /* Mask in the interesting variable. */
14999 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15000 OPTAB_WIDEN);
15001 if (!rtx_equal_p (out, orig_out))
15002 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15003
15004 return 1; /* DONE */
15005 }
15006
15007 /*
15008 * For comparison with above,
15009 *
15010 * movl cf,dest
15011 * movl ct,tmp
15012 * cmpl op1,op2
15013 * cmovcc tmp,dest
15014 *
15015 * Size 15.
15016 */
15017
15018 if (! nonimmediate_operand (operands[2], mode))
15019 operands[2] = force_reg (mode, operands[2]);
15020 if (! nonimmediate_operand (operands[3], mode))
15021 operands[3] = force_reg (mode, operands[3]);
15022
15023 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15024 {
15025 rtx tmp = gen_reg_rtx (mode);
15026 emit_move_insn (tmp, operands[3]);
15027 operands[3] = tmp;
15028 }
15029 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15030 {
15031 rtx tmp = gen_reg_rtx (mode);
15032 emit_move_insn (tmp, operands[2]);
15033 operands[2] = tmp;
15034 }
15035
15036 if (! register_operand (operands[2], VOIDmode)
15037 && (mode == QImode
15038 || ! register_operand (operands[3], VOIDmode)))
15039 operands[2] = force_reg (mode, operands[2]);
15040
15041 if (mode == QImode
15042 && ! register_operand (operands[3], VOIDmode))
15043 operands[3] = force_reg (mode, operands[3]);
15044
15045 emit_insn (compare_seq);
15046 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15047 gen_rtx_IF_THEN_ELSE (mode,
15048 compare_op, operands[2],
15049 operands[3])));
15050 if (bypass_test)
15051 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15052 gen_rtx_IF_THEN_ELSE (mode,
15053 bypass_test,
15054 copy_rtx (operands[3]),
15055 copy_rtx (operands[0]))));
15056 if (second_test)
15057 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15058 gen_rtx_IF_THEN_ELSE (mode,
15059 second_test,
15060 copy_rtx (operands[2]),
15061 copy_rtx (operands[0]))));
15062
15063 return 1; /* DONE */
15064 }
15065
15066 /* Swap, force into registers, or otherwise massage the two operands
15067 to an sse comparison with a mask result. Thus we differ a bit from
15068 ix86_prepare_fp_compare_args which expects to produce a flags result.
15069
15070 The DEST operand exists to help determine whether to commute commutative
15071 operators. The POP0/POP1 operands are updated in place. The new
15072 comparison code is returned, or UNKNOWN if not implementable. */
15073
15074 static enum rtx_code
15075 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15076 rtx *pop0, rtx *pop1)
15077 {
15078 rtx tmp;
15079
15080 switch (code)
15081 {
15082 case LTGT:
15083 case UNEQ:
15084 /* We have no LTGT as an operator. We could implement it with
15085 NE & ORDERED, but this requires an extra temporary. It's
15086 not clear that it's worth it. */
15087 return UNKNOWN;
15088
15089 case LT:
15090 case LE:
15091 case UNGT:
15092 case UNGE:
15093 /* These are supported directly. */
15094 break;
15095
15096 case EQ:
15097 case NE:
15098 case UNORDERED:
15099 case ORDERED:
15100 /* For commutative operators, try to canonicalize the destination
15101 operand to be first in the comparison - this helps reload to
15102 avoid extra moves. */
15103 if (!dest || !rtx_equal_p (dest, *pop1))
15104 break;
15105 /* FALLTHRU */
15106
15107 case GE:
15108 case GT:
15109 case UNLE:
15110 case UNLT:
15111 /* These are not supported directly. Swap the comparison operands
15112 to transform into something that is supported. */
15113 tmp = *pop0;
15114 *pop0 = *pop1;
15115 *pop1 = tmp;
15116 code = swap_condition (code);
15117 break;
15118
15119 default:
15120 gcc_unreachable ();
15121 }
15122
15123 return code;
15124 }
15125
15126 /* Detect conditional moves that exactly match min/max operational
15127 semantics. Note that this is IEEE safe, as long as we don't
15128 interchange the operands.
15129
15130 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15131 and TRUE if the operation is successful and instructions are emitted. */
15132
15133 static bool
15134 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15135 rtx cmp_op1, rtx if_true, rtx if_false)
15136 {
15137 enum machine_mode mode;
15138 bool is_min;
15139 rtx tmp;
15140
15141 if (code == LT)
15142 ;
15143 else if (code == UNGE)
15144 {
15145 tmp = if_true;
15146 if_true = if_false;
15147 if_false = tmp;
15148 }
15149 else
15150 return false;
15151
15152 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15153 is_min = true;
15154 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15155 is_min = false;
15156 else
15157 return false;
15158
15159 mode = GET_MODE (dest);
15160
15161 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15162 but MODE may be a vector mode and thus not appropriate. */
15163 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15164 {
15165 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15166 rtvec v;
15167
15168 if_true = force_reg (mode, if_true);
15169 v = gen_rtvec (2, if_true, if_false);
15170 tmp = gen_rtx_UNSPEC (mode, v, u);
15171 }
15172 else
15173 {
15174 code = is_min ? SMIN : SMAX;
15175 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15176 }
15177
15178 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15179 return true;
15180 }
15181
15182 /* Expand an sse vector comparison. Return the register with the result. */
15183
15184 static rtx
15185 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15186 rtx op_true, rtx op_false)
15187 {
15188 enum machine_mode mode = GET_MODE (dest);
15189 rtx x;
15190
15191 cmp_op0 = force_reg (mode, cmp_op0);
15192 if (!nonimmediate_operand (cmp_op1, mode))
15193 cmp_op1 = force_reg (mode, cmp_op1);
15194
15195 if (optimize
15196 || reg_overlap_mentioned_p (dest, op_true)
15197 || reg_overlap_mentioned_p (dest, op_false))
15198 dest = gen_reg_rtx (mode);
15199
15200 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15201 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15202
15203 return dest;
15204 }
15205
15206 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15207 operations. This is used for both scalar and vector conditional moves. */
15208
15209 static void
15210 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15211 {
15212 enum machine_mode mode = GET_MODE (dest);
15213 rtx t2, t3, x;
15214
15215 if (op_false == CONST0_RTX (mode))
15216 {
15217 op_true = force_reg (mode, op_true);
15218 x = gen_rtx_AND (mode, cmp, op_true);
15219 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15220 }
15221 else if (op_true == CONST0_RTX (mode))
15222 {
15223 op_false = force_reg (mode, op_false);
15224 x = gen_rtx_NOT (mode, cmp);
15225 x = gen_rtx_AND (mode, x, op_false);
15226 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15227 }
15228 else if (TARGET_SSE5)
15229 {
15230 rtx pcmov = gen_rtx_SET (mode, dest,
15231 gen_rtx_IF_THEN_ELSE (mode, cmp,
15232 op_true,
15233 op_false));
15234 emit_insn (pcmov);
15235 }
15236 else
15237 {
15238 op_true = force_reg (mode, op_true);
15239 op_false = force_reg (mode, op_false);
15240
15241 t2 = gen_reg_rtx (mode);
15242 if (optimize)
15243 t3 = gen_reg_rtx (mode);
15244 else
15245 t3 = dest;
15246
15247 x = gen_rtx_AND (mode, op_true, cmp);
15248 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15249
15250 x = gen_rtx_NOT (mode, cmp);
15251 x = gen_rtx_AND (mode, x, op_false);
15252 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15253
15254 x = gen_rtx_IOR (mode, t3, t2);
15255 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15256 }
15257 }
15258
15259 /* Expand a floating-point conditional move. Return true if successful. */
15260
15261 int
15262 ix86_expand_fp_movcc (rtx operands[])
15263 {
15264 enum machine_mode mode = GET_MODE (operands[0]);
15265 enum rtx_code code = GET_CODE (operands[1]);
15266 rtx tmp, compare_op, second_test, bypass_test;
15267
15268 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15269 {
15270 enum machine_mode cmode;
15271
15272 /* Since we've no cmove for sse registers, don't force bad register
15273 allocation just to gain access to it. Deny movcc when the
15274 comparison mode doesn't match the move mode. */
15275 cmode = GET_MODE (ix86_compare_op0);
15276 if (cmode == VOIDmode)
15277 cmode = GET_MODE (ix86_compare_op1);
15278 if (cmode != mode)
15279 return 0;
15280
15281 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15282 &ix86_compare_op0,
15283 &ix86_compare_op1);
15284 if (code == UNKNOWN)
15285 return 0;
15286
15287 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15288 ix86_compare_op1, operands[2],
15289 operands[3]))
15290 return 1;
15291
15292 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15293 ix86_compare_op1, operands[2], operands[3]);
15294 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15295 return 1;
15296 }
15297
15298 /* The floating point conditional move instructions don't directly
15299 support conditions resulting from a signed integer comparison. */
15300
15301 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15302
15303 /* The floating point conditional move instructions don't directly
15304 support signed integer comparisons. */
15305
15306 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15307 {
15308 gcc_assert (!second_test && !bypass_test);
15309 tmp = gen_reg_rtx (QImode);
15310 ix86_expand_setcc (code, tmp);
15311 code = NE;
15312 ix86_compare_op0 = tmp;
15313 ix86_compare_op1 = const0_rtx;
15314 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15315 }
15316 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15317 {
15318 tmp = gen_reg_rtx (mode);
15319 emit_move_insn (tmp, operands[3]);
15320 operands[3] = tmp;
15321 }
15322 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15323 {
15324 tmp = gen_reg_rtx (mode);
15325 emit_move_insn (tmp, operands[2]);
15326 operands[2] = tmp;
15327 }
15328
15329 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15330 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15331 operands[2], operands[3])));
15332 if (bypass_test)
15333 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15334 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15335 operands[3], operands[0])));
15336 if (second_test)
15337 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15338 gen_rtx_IF_THEN_ELSE (mode, second_test,
15339 operands[2], operands[0])));
15340
15341 return 1;
15342 }
15343
15344 /* Expand a floating-point vector conditional move; a vcond operation
15345 rather than a movcc operation. */
15346
15347 bool
15348 ix86_expand_fp_vcond (rtx operands[])
15349 {
15350 enum rtx_code code = GET_CODE (operands[3]);
15351 rtx cmp;
15352
15353 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15354 &operands[4], &operands[5]);
15355 if (code == UNKNOWN)
15356 return false;
15357
15358 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15359 operands[5], operands[1], operands[2]))
15360 return true;
15361
15362 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15363 operands[1], operands[2]);
15364 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15365 return true;
15366 }
15367
15368 /* Expand a signed/unsigned integral vector conditional move. */
15369
15370 bool
15371 ix86_expand_int_vcond (rtx operands[])
15372 {
15373 enum machine_mode mode = GET_MODE (operands[0]);
15374 enum rtx_code code = GET_CODE (operands[3]);
15375 bool negate = false;
15376 rtx x, cop0, cop1;
15377
15378 cop0 = operands[4];
15379 cop1 = operands[5];
15380
15381 /* SSE5 supports all of the comparisons on all vector int types. */
15382 if (!TARGET_SSE5)
15383 {
15384 /* Canonicalize the comparison to EQ, GT, GTU. */
15385 switch (code)
15386 {
15387 case EQ:
15388 case GT:
15389 case GTU:
15390 break;
15391
15392 case NE:
15393 case LE:
15394 case LEU:
15395 code = reverse_condition (code);
15396 negate = true;
15397 break;
15398
15399 case GE:
15400 case GEU:
15401 code = reverse_condition (code);
15402 negate = true;
15403 /* FALLTHRU */
15404
15405 case LT:
15406 case LTU:
15407 code = swap_condition (code);
15408 x = cop0, cop0 = cop1, cop1 = x;
15409 break;
15410
15411 default:
15412 gcc_unreachable ();
15413 }
15414
15415 /* Only SSE4.1/SSE4.2 supports V2DImode. */
15416 if (mode == V2DImode)
15417 {
15418 switch (code)
15419 {
15420 case EQ:
15421 /* SSE4.1 supports EQ. */
15422 if (!TARGET_SSE4_1)
15423 return false;
15424 break;
15425
15426 case GT:
15427 case GTU:
15428 /* SSE4.2 supports GT/GTU. */
15429 if (!TARGET_SSE4_2)
15430 return false;
15431 break;
15432
15433 default:
15434 gcc_unreachable ();
15435 }
15436 }
15437
15438 /* Unsigned parallel compare is not supported by the hardware. Play some
15439 tricks to turn this into a signed comparison against 0. */
15440 if (code == GTU)
15441 {
15442 cop0 = force_reg (mode, cop0);
15443
15444 switch (mode)
15445 {
15446 case V4SImode:
15447 case V2DImode:
15448 {
15449 rtx t1, t2, mask;
15450
15451 /* Perform a parallel modulo subtraction. */
15452 t1 = gen_reg_rtx (mode);
15453 emit_insn ((mode == V4SImode
15454 ? gen_subv4si3
15455 : gen_subv2di3) (t1, cop0, cop1));
15456
15457 /* Extract the original sign bit of op0. */
15458 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
15459 true, false);
15460 t2 = gen_reg_rtx (mode);
15461 emit_insn ((mode == V4SImode
15462 ? gen_andv4si3
15463 : gen_andv2di3) (t2, cop0, mask));
15464
15465 /* XOR it back into the result of the subtraction. This results
15466 in the sign bit set iff we saw unsigned underflow. */
15467 x = gen_reg_rtx (mode);
15468 emit_insn ((mode == V4SImode
15469 ? gen_xorv4si3
15470 : gen_xorv2di3) (x, t1, t2));
15471
15472 code = GT;
15473 }
15474 break;
15475
15476 case V16QImode:
15477 case V8HImode:
15478 /* Perform a parallel unsigned saturating subtraction. */
15479 x = gen_reg_rtx (mode);
15480 emit_insn (gen_rtx_SET (VOIDmode, x,
15481 gen_rtx_US_MINUS (mode, cop0, cop1)));
15482
15483 code = EQ;
15484 negate = !negate;
15485 break;
15486
15487 default:
15488 gcc_unreachable ();
15489 }
15490
15491 cop0 = x;
15492 cop1 = CONST0_RTX (mode);
15493 }
15494 }
15495
15496 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
15497 operands[1+negate], operands[2-negate]);
15498
15499 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
15500 operands[2-negate]);
15501 return true;
15502 }
15503
15504 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
15505 true if we should do zero extension, else sign extension. HIGH_P is
15506 true if we want the N/2 high elements, else the low elements. */
15507
15508 void
15509 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15510 {
15511 enum machine_mode imode = GET_MODE (operands[1]);
15512 rtx (*unpack)(rtx, rtx, rtx);
15513 rtx se, dest;
15514
15515 switch (imode)
15516 {
15517 case V16QImode:
15518 if (high_p)
15519 unpack = gen_vec_interleave_highv16qi;
15520 else
15521 unpack = gen_vec_interleave_lowv16qi;
15522 break;
15523 case V8HImode:
15524 if (high_p)
15525 unpack = gen_vec_interleave_highv8hi;
15526 else
15527 unpack = gen_vec_interleave_lowv8hi;
15528 break;
15529 case V4SImode:
15530 if (high_p)
15531 unpack = gen_vec_interleave_highv4si;
15532 else
15533 unpack = gen_vec_interleave_lowv4si;
15534 break;
15535 default:
15536 gcc_unreachable ();
15537 }
15538
15539 dest = gen_lowpart (imode, operands[0]);
15540
15541 if (unsigned_p)
15542 se = force_reg (imode, CONST0_RTX (imode));
15543 else
15544 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
15545 operands[1], pc_rtx, pc_rtx);
15546
15547 emit_insn (unpack (dest, operands[1], se));
15548 }
15549
15550 /* This function performs the same task as ix86_expand_sse_unpack,
15551 but with SSE4.1 instructions. */
15552
15553 void
15554 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15555 {
15556 enum machine_mode imode = GET_MODE (operands[1]);
15557 rtx (*unpack)(rtx, rtx);
15558 rtx src, dest;
15559
15560 switch (imode)
15561 {
15562 case V16QImode:
15563 if (unsigned_p)
15564 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
15565 else
15566 unpack = gen_sse4_1_extendv8qiv8hi2;
15567 break;
15568 case V8HImode:
15569 if (unsigned_p)
15570 unpack = gen_sse4_1_zero_extendv4hiv4si2;
15571 else
15572 unpack = gen_sse4_1_extendv4hiv4si2;
15573 break;
15574 case V4SImode:
15575 if (unsigned_p)
15576 unpack = gen_sse4_1_zero_extendv2siv2di2;
15577 else
15578 unpack = gen_sse4_1_extendv2siv2di2;
15579 break;
15580 default:
15581 gcc_unreachable ();
15582 }
15583
15584 dest = operands[0];
15585 if (high_p)
15586 {
15587 /* Shift higher 8 bytes to lower 8 bytes. */
15588 src = gen_reg_rtx (imode);
15589 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
15590 gen_lowpart (TImode, operands[1]),
15591 GEN_INT (64)));
15592 }
15593 else
15594 src = operands[1];
15595
15596 emit_insn (unpack (dest, src));
15597 }
15598
15599 /* This function performs the same task as ix86_expand_sse_unpack,
15600 but with sse5 instructions. */
15601
15602 void
15603 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15604 {
15605 enum machine_mode imode = GET_MODE (operands[1]);
15606 int pperm_bytes[16];
15607 int i;
15608 int h = (high_p) ? 8 : 0;
15609 int h2;
15610 int sign_extend;
15611 rtvec v = rtvec_alloc (16);
15612 rtvec vs;
15613 rtx x, p;
15614 rtx op0 = operands[0], op1 = operands[1];
15615
15616 switch (imode)
15617 {
15618 case V16QImode:
15619 vs = rtvec_alloc (8);
15620 h2 = (high_p) ? 8 : 0;
15621 for (i = 0; i < 8; i++)
15622 {
15623 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
15624 pperm_bytes[2*i+1] = ((unsigned_p)
15625 ? PPERM_ZERO
15626 : PPERM_SIGN | PPERM_SRC2 | i | h);
15627 }
15628
15629 for (i = 0; i < 16; i++)
15630 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15631
15632 for (i = 0; i < 8; i++)
15633 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15634
15635 p = gen_rtx_PARALLEL (VOIDmode, vs);
15636 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15637 if (unsigned_p)
15638 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
15639 else
15640 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
15641 break;
15642
15643 case V8HImode:
15644 vs = rtvec_alloc (4);
15645 h2 = (high_p) ? 4 : 0;
15646 for (i = 0; i < 4; i++)
15647 {
15648 sign_extend = ((unsigned_p)
15649 ? PPERM_ZERO
15650 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
15651 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
15652 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
15653 pperm_bytes[4*i+2] = sign_extend;
15654 pperm_bytes[4*i+3] = sign_extend;
15655 }
15656
15657 for (i = 0; i < 16; i++)
15658 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15659
15660 for (i = 0; i < 4; i++)
15661 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15662
15663 p = gen_rtx_PARALLEL (VOIDmode, vs);
15664 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15665 if (unsigned_p)
15666 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
15667 else
15668 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
15669 break;
15670
15671 case V4SImode:
15672 vs = rtvec_alloc (2);
15673 h2 = (high_p) ? 2 : 0;
15674 for (i = 0; i < 2; i++)
15675 {
15676 sign_extend = ((unsigned_p)
15677 ? PPERM_ZERO
15678 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
15679 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
15680 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
15681 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
15682 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
15683 pperm_bytes[8*i+4] = sign_extend;
15684 pperm_bytes[8*i+5] = sign_extend;
15685 pperm_bytes[8*i+6] = sign_extend;
15686 pperm_bytes[8*i+7] = sign_extend;
15687 }
15688
15689 for (i = 0; i < 16; i++)
15690 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15691
15692 for (i = 0; i < 2; i++)
15693 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15694
15695 p = gen_rtx_PARALLEL (VOIDmode, vs);
15696 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15697 if (unsigned_p)
15698 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
15699 else
15700 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
15701 break;
15702
15703 default:
15704 gcc_unreachable ();
15705 }
15706
15707 return;
15708 }
15709
15710 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
15711 next narrower integer vector type */
15712 void
15713 ix86_expand_sse5_pack (rtx operands[3])
15714 {
15715 enum machine_mode imode = GET_MODE (operands[0]);
15716 int pperm_bytes[16];
15717 int i;
15718 rtvec v = rtvec_alloc (16);
15719 rtx x;
15720 rtx op0 = operands[0];
15721 rtx op1 = operands[1];
15722 rtx op2 = operands[2];
15723
15724 switch (imode)
15725 {
15726 case V16QImode:
15727 for (i = 0; i < 8; i++)
15728 {
15729 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
15730 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
15731 }
15732
15733 for (i = 0; i < 16; i++)
15734 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15735
15736 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15737 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
15738 break;
15739
15740 case V8HImode:
15741 for (i = 0; i < 4; i++)
15742 {
15743 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
15744 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
15745 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
15746 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
15747 }
15748
15749 for (i = 0; i < 16; i++)
15750 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15751
15752 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15753 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
15754 break;
15755
15756 case V4SImode:
15757 for (i = 0; i < 2; i++)
15758 {
15759 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
15760 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
15761 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
15762 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
15763 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
15764 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
15765 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
15766 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
15767 }
15768
15769 for (i = 0; i < 16; i++)
15770 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15771
15772 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15773 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
15774 break;
15775
15776 default:
15777 gcc_unreachable ();
15778 }
15779
15780 return;
15781 }
15782
15783 /* Expand conditional increment or decrement using adb/sbb instructions.
15784 The default case using setcc followed by the conditional move can be
15785 done by generic code. */
15786 int
15787 ix86_expand_int_addcc (rtx operands[])
15788 {
15789 enum rtx_code code = GET_CODE (operands[1]);
15790 rtx compare_op;
15791 rtx val = const0_rtx;
15792 bool fpcmp = false;
15793 enum machine_mode mode = GET_MODE (operands[0]);
15794
15795 if (operands[3] != const1_rtx
15796 && operands[3] != constm1_rtx)
15797 return 0;
15798 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15799 ix86_compare_op1, &compare_op))
15800 return 0;
15801 code = GET_CODE (compare_op);
15802
15803 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15804 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15805 {
15806 fpcmp = true;
15807 code = ix86_fp_compare_code_to_integer (code);
15808 }
15809
15810 if (code != LTU)
15811 {
15812 val = constm1_rtx;
15813 if (fpcmp)
15814 PUT_CODE (compare_op,
15815 reverse_condition_maybe_unordered
15816 (GET_CODE (compare_op)));
15817 else
15818 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15819 }
15820 PUT_MODE (compare_op, mode);
15821
15822 /* Construct either adc or sbb insn. */
15823 if ((code == LTU) == (operands[3] == constm1_rtx))
15824 {
15825 switch (GET_MODE (operands[0]))
15826 {
15827 case QImode:
15828 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
15829 break;
15830 case HImode:
15831 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
15832 break;
15833 case SImode:
15834 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
15835 break;
15836 case DImode:
15837 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15838 break;
15839 default:
15840 gcc_unreachable ();
15841 }
15842 }
15843 else
15844 {
15845 switch (GET_MODE (operands[0]))
15846 {
15847 case QImode:
15848 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
15849 break;
15850 case HImode:
15851 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
15852 break;
15853 case SImode:
15854 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
15855 break;
15856 case DImode:
15857 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15858 break;
15859 default:
15860 gcc_unreachable ();
15861 }
15862 }
15863 return 1; /* DONE */
15864 }
15865
15866
15867 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
15868 works for floating pointer parameters and nonoffsetable memories.
15869 For pushes, it returns just stack offsets; the values will be saved
15870 in the right order. Maximally three parts are generated. */
15871
15872 static int
15873 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
15874 {
15875 int size;
15876
15877 if (!TARGET_64BIT)
15878 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
15879 else
15880 size = (GET_MODE_SIZE (mode) + 4) / 8;
15881
15882 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
15883 gcc_assert (size >= 2 && size <= 4);
15884
15885 /* Optimize constant pool reference to immediates. This is used by fp
15886 moves, that force all constants to memory to allow combining. */
15887 if (MEM_P (operand) && MEM_READONLY_P (operand))
15888 {
15889 rtx tmp = maybe_get_pool_constant (operand);
15890 if (tmp)
15891 operand = tmp;
15892 }
15893
15894 if (MEM_P (operand) && !offsettable_memref_p (operand))
15895 {
15896 /* The only non-offsetable memories we handle are pushes. */
15897 int ok = push_operand (operand, VOIDmode);
15898
15899 gcc_assert (ok);
15900
15901 operand = copy_rtx (operand);
15902 PUT_MODE (operand, Pmode);
15903 parts[0] = parts[1] = parts[2] = parts[3] = operand;
15904 return size;
15905 }
15906
15907 if (GET_CODE (operand) == CONST_VECTOR)
15908 {
15909 enum machine_mode imode = int_mode_for_mode (mode);
15910 /* Caution: if we looked through a constant pool memory above,
15911 the operand may actually have a different mode now. That's
15912 ok, since we want to pun this all the way back to an integer. */
15913 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
15914 gcc_assert (operand != NULL);
15915 mode = imode;
15916 }
15917
15918 if (!TARGET_64BIT)
15919 {
15920 if (mode == DImode)
15921 split_di (&operand, 1, &parts[0], &parts[1]);
15922 else
15923 {
15924 int i;
15925
15926 if (REG_P (operand))
15927 {
15928 gcc_assert (reload_completed);
15929 for (i = 0; i < size; i++)
15930 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
15931 }
15932 else if (offsettable_memref_p (operand))
15933 {
15934 operand = adjust_address (operand, SImode, 0);
15935 parts[0] = operand;
15936 for (i = 1; i < size; i++)
15937 parts[i] = adjust_address (operand, SImode, 4 * i);
15938 }
15939 else if (GET_CODE (operand) == CONST_DOUBLE)
15940 {
15941 REAL_VALUE_TYPE r;
15942 long l[4];
15943
15944 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15945 switch (mode)
15946 {
15947 case TFmode:
15948 real_to_target (l, &r, mode);
15949 parts[3] = gen_int_mode (l[3], SImode);
15950 parts[2] = gen_int_mode (l[2], SImode);
15951 break;
15952 case XFmode:
15953 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
15954 parts[2] = gen_int_mode (l[2], SImode);
15955 break;
15956 case DFmode:
15957 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15958 break;
15959 default:
15960 gcc_unreachable ();
15961 }
15962 parts[1] = gen_int_mode (l[1], SImode);
15963 parts[0] = gen_int_mode (l[0], SImode);
15964 }
15965 else
15966 gcc_unreachable ();
15967 }
15968 }
15969 else
15970 {
15971 if (mode == TImode)
15972 split_ti (&operand, 1, &parts[0], &parts[1]);
15973 if (mode == XFmode || mode == TFmode)
15974 {
15975 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
15976 if (REG_P (operand))
15977 {
15978 gcc_assert (reload_completed);
15979 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
15980 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
15981 }
15982 else if (offsettable_memref_p (operand))
15983 {
15984 operand = adjust_address (operand, DImode, 0);
15985 parts[0] = operand;
15986 parts[1] = adjust_address (operand, upper_mode, 8);
15987 }
15988 else if (GET_CODE (operand) == CONST_DOUBLE)
15989 {
15990 REAL_VALUE_TYPE r;
15991 long l[4];
15992
15993 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15994 real_to_target (l, &r, mode);
15995
15996 /* Do not use shift by 32 to avoid warning on 32bit systems. */
15997 if (HOST_BITS_PER_WIDE_INT >= 64)
15998 parts[0]
15999 = gen_int_mode
16000 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16001 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16002 DImode);
16003 else
16004 parts[0] = immed_double_const (l[0], l[1], DImode);
16005
16006 if (upper_mode == SImode)
16007 parts[1] = gen_int_mode (l[2], SImode);
16008 else if (HOST_BITS_PER_WIDE_INT >= 64)
16009 parts[1]
16010 = gen_int_mode
16011 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16012 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16013 DImode);
16014 else
16015 parts[1] = immed_double_const (l[2], l[3], DImode);
16016 }
16017 else
16018 gcc_unreachable ();
16019 }
16020 }
16021
16022 return size;
16023 }
16024
16025 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16026 Return false when normal moves are needed; true when all required
16027 insns have been emitted. Operands 2-4 contain the input values
16028 int the correct order; operands 5-7 contain the output values. */
16029
16030 void
16031 ix86_split_long_move (rtx operands[])
16032 {
16033 rtx part[2][4];
16034 int nparts, i, j;
16035 int push = 0;
16036 int collisions = 0;
16037 enum machine_mode mode = GET_MODE (operands[0]);
16038 bool collisionparts[4];
16039
16040 /* The DFmode expanders may ask us to move double.
16041 For 64bit target this is single move. By hiding the fact
16042 here we simplify i386.md splitters. */
16043 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16044 {
16045 /* Optimize constant pool reference to immediates. This is used by
16046 fp moves, that force all constants to memory to allow combining. */
16047
16048 if (MEM_P (operands[1])
16049 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16050 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16051 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16052 if (push_operand (operands[0], VOIDmode))
16053 {
16054 operands[0] = copy_rtx (operands[0]);
16055 PUT_MODE (operands[0], Pmode);
16056 }
16057 else
16058 operands[0] = gen_lowpart (DImode, operands[0]);
16059 operands[1] = gen_lowpart (DImode, operands[1]);
16060 emit_move_insn (operands[0], operands[1]);
16061 return;
16062 }
16063
16064 /* The only non-offsettable memory we handle is push. */
16065 if (push_operand (operands[0], VOIDmode))
16066 push = 1;
16067 else
16068 gcc_assert (!MEM_P (operands[0])
16069 || offsettable_memref_p (operands[0]));
16070
16071 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16072 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16073
16074 /* When emitting push, take care for source operands on the stack. */
16075 if (push && MEM_P (operands[1])
16076 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16077 for (i = 0; i < nparts - 1; i++)
16078 part[1][i] = change_address (part[1][i],
16079 GET_MODE (part[1][i]),
16080 XEXP (part[1][i + 1], 0));
16081
16082 /* We need to do copy in the right order in case an address register
16083 of the source overlaps the destination. */
16084 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16085 {
16086 rtx tmp;
16087
16088 for (i = 0; i < nparts; i++)
16089 {
16090 collisionparts[i]
16091 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16092 if (collisionparts[i])
16093 collisions++;
16094 }
16095
16096 /* Collision in the middle part can be handled by reordering. */
16097 if (collisions == 1 && nparts == 3 && collisionparts [1])
16098 {
16099 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16100 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16101 }
16102 else if (collisions == 1
16103 && nparts == 4
16104 && (collisionparts [1] || collisionparts [2]))
16105 {
16106 if (collisionparts [1])
16107 {
16108 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16109 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16110 }
16111 else
16112 {
16113 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16114 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16115 }
16116 }
16117
16118 /* If there are more collisions, we can't handle it by reordering.
16119 Do an lea to the last part and use only one colliding move. */
16120 else if (collisions > 1)
16121 {
16122 rtx base;
16123
16124 collisions = 1;
16125
16126 base = part[0][nparts - 1];
16127
16128 /* Handle the case when the last part isn't valid for lea.
16129 Happens in 64-bit mode storing the 12-byte XFmode. */
16130 if (GET_MODE (base) != Pmode)
16131 base = gen_rtx_REG (Pmode, REGNO (base));
16132
16133 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16134 part[1][0] = replace_equiv_address (part[1][0], base);
16135 for (i = 1; i < nparts; i++)
16136 {
16137 tmp = plus_constant (base, UNITS_PER_WORD * i);
16138 part[1][i] = replace_equiv_address (part[1][i], tmp);
16139 }
16140 }
16141 }
16142
16143 if (push)
16144 {
16145 if (!TARGET_64BIT)
16146 {
16147 if (nparts == 3)
16148 {
16149 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16150 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16151 emit_move_insn (part[0][2], part[1][2]);
16152 }
16153 else if (nparts == 4)
16154 {
16155 emit_move_insn (part[0][3], part[1][3]);
16156 emit_move_insn (part[0][2], part[1][2]);
16157 }
16158 }
16159 else
16160 {
16161 /* In 64bit mode we don't have 32bit push available. In case this is
16162 register, it is OK - we will just use larger counterpart. We also
16163 retype memory - these comes from attempt to avoid REX prefix on
16164 moving of second half of TFmode value. */
16165 if (GET_MODE (part[1][1]) == SImode)
16166 {
16167 switch (GET_CODE (part[1][1]))
16168 {
16169 case MEM:
16170 part[1][1] = adjust_address (part[1][1], DImode, 0);
16171 break;
16172
16173 case REG:
16174 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16175 break;
16176
16177 default:
16178 gcc_unreachable ();
16179 }
16180
16181 if (GET_MODE (part[1][0]) == SImode)
16182 part[1][0] = part[1][1];
16183 }
16184 }
16185 emit_move_insn (part[0][1], part[1][1]);
16186 emit_move_insn (part[0][0], part[1][0]);
16187 return;
16188 }
16189
16190 /* Choose correct order to not overwrite the source before it is copied. */
16191 if ((REG_P (part[0][0])
16192 && REG_P (part[1][1])
16193 && (REGNO (part[0][0]) == REGNO (part[1][1])
16194 || (nparts == 3
16195 && REGNO (part[0][0]) == REGNO (part[1][2]))
16196 || (nparts == 4
16197 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16198 || (collisions > 0
16199 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16200 {
16201 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16202 {
16203 operands[2 + i] = part[0][j];
16204 operands[6 + i] = part[1][j];
16205 }
16206 }
16207 else
16208 {
16209 for (i = 0; i < nparts; i++)
16210 {
16211 operands[2 + i] = part[0][i];
16212 operands[6 + i] = part[1][i];
16213 }
16214 }
16215
16216 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16217 if (optimize_insn_for_size_p ())
16218 {
16219 for (j = 0; j < nparts - 1; j++)
16220 if (CONST_INT_P (operands[6 + j])
16221 && operands[6 + j] != const0_rtx
16222 && REG_P (operands[2 + j]))
16223 for (i = j; i < nparts - 1; i++)
16224 if (CONST_INT_P (operands[7 + i])
16225 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16226 operands[7 + i] = operands[2 + j];
16227 }
16228
16229 for (i = 0; i < nparts; i++)
16230 emit_move_insn (operands[2 + i], operands[6 + i]);
16231
16232 return;
16233 }
16234
16235 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16236 left shift by a constant, either using a single shift or
16237 a sequence of add instructions. */
16238
16239 static void
16240 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16241 {
16242 if (count == 1)
16243 {
16244 emit_insn ((mode == DImode
16245 ? gen_addsi3
16246 : gen_adddi3) (operand, operand, operand));
16247 }
16248 else if (!optimize_insn_for_size_p ()
16249 && count * ix86_cost->add <= ix86_cost->shift_const)
16250 {
16251 int i;
16252 for (i=0; i<count; i++)
16253 {
16254 emit_insn ((mode == DImode
16255 ? gen_addsi3
16256 : gen_adddi3) (operand, operand, operand));
16257 }
16258 }
16259 else
16260 emit_insn ((mode == DImode
16261 ? gen_ashlsi3
16262 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16263 }
16264
16265 void
16266 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16267 {
16268 rtx low[2], high[2];
16269 int count;
16270 const int single_width = mode == DImode ? 32 : 64;
16271
16272 if (CONST_INT_P (operands[2]))
16273 {
16274 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16275 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16276
16277 if (count >= single_width)
16278 {
16279 emit_move_insn (high[0], low[1]);
16280 emit_move_insn (low[0], const0_rtx);
16281
16282 if (count > single_width)
16283 ix86_expand_ashl_const (high[0], count - single_width, mode);
16284 }
16285 else
16286 {
16287 if (!rtx_equal_p (operands[0], operands[1]))
16288 emit_move_insn (operands[0], operands[1]);
16289 emit_insn ((mode == DImode
16290 ? gen_x86_shld
16291 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16292 ix86_expand_ashl_const (low[0], count, mode);
16293 }
16294 return;
16295 }
16296
16297 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16298
16299 if (operands[1] == const1_rtx)
16300 {
16301 /* Assuming we've chosen a QImode capable registers, then 1 << N
16302 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16303 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16304 {
16305 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16306
16307 ix86_expand_clear (low[0]);
16308 ix86_expand_clear (high[0]);
16309 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16310
16311 d = gen_lowpart (QImode, low[0]);
16312 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16313 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16314 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16315
16316 d = gen_lowpart (QImode, high[0]);
16317 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16318 s = gen_rtx_NE (QImode, flags, const0_rtx);
16319 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16320 }
16321
16322 /* Otherwise, we can get the same results by manually performing
16323 a bit extract operation on bit 5/6, and then performing the two
16324 shifts. The two methods of getting 0/1 into low/high are exactly
16325 the same size. Avoiding the shift in the bit extract case helps
16326 pentium4 a bit; no one else seems to care much either way. */
16327 else
16328 {
16329 rtx x;
16330
16331 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16332 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16333 else
16334 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16335 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16336
16337 emit_insn ((mode == DImode
16338 ? gen_lshrsi3
16339 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16340 emit_insn ((mode == DImode
16341 ? gen_andsi3
16342 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16343 emit_move_insn (low[0], high[0]);
16344 emit_insn ((mode == DImode
16345 ? gen_xorsi3
16346 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16347 }
16348
16349 emit_insn ((mode == DImode
16350 ? gen_ashlsi3
16351 : gen_ashldi3) (low[0], low[0], operands[2]));
16352 emit_insn ((mode == DImode
16353 ? gen_ashlsi3
16354 : gen_ashldi3) (high[0], high[0], operands[2]));
16355 return;
16356 }
16357
16358 if (operands[1] == constm1_rtx)
16359 {
16360 /* For -1 << N, we can avoid the shld instruction, because we
16361 know that we're shifting 0...31/63 ones into a -1. */
16362 emit_move_insn (low[0], constm1_rtx);
16363 if (optimize_insn_for_size_p ())
16364 emit_move_insn (high[0], low[0]);
16365 else
16366 emit_move_insn (high[0], constm1_rtx);
16367 }
16368 else
16369 {
16370 if (!rtx_equal_p (operands[0], operands[1]))
16371 emit_move_insn (operands[0], operands[1]);
16372
16373 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16374 emit_insn ((mode == DImode
16375 ? gen_x86_shld
16376 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16377 }
16378
16379 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16380
16381 if (TARGET_CMOVE && scratch)
16382 {
16383 ix86_expand_clear (scratch);
16384 emit_insn ((mode == DImode
16385 ? gen_x86_shift_adj_1
16386 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16387 scratch));
16388 }
16389 else
16390 emit_insn ((mode == DImode
16391 ? gen_x86_shift_adj_2
16392 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16393 }
16394
16395 void
16396 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16397 {
16398 rtx low[2], high[2];
16399 int count;
16400 const int single_width = mode == DImode ? 32 : 64;
16401
16402 if (CONST_INT_P (operands[2]))
16403 {
16404 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16405 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16406
16407 if (count == single_width * 2 - 1)
16408 {
16409 emit_move_insn (high[0], high[1]);
16410 emit_insn ((mode == DImode
16411 ? gen_ashrsi3
16412 : gen_ashrdi3) (high[0], high[0],
16413 GEN_INT (single_width - 1)));
16414 emit_move_insn (low[0], high[0]);
16415
16416 }
16417 else if (count >= single_width)
16418 {
16419 emit_move_insn (low[0], high[1]);
16420 emit_move_insn (high[0], low[0]);
16421 emit_insn ((mode == DImode
16422 ? gen_ashrsi3
16423 : gen_ashrdi3) (high[0], high[0],
16424 GEN_INT (single_width - 1)));
16425 if (count > single_width)
16426 emit_insn ((mode == DImode
16427 ? gen_ashrsi3
16428 : gen_ashrdi3) (low[0], low[0],
16429 GEN_INT (count - single_width)));
16430 }
16431 else
16432 {
16433 if (!rtx_equal_p (operands[0], operands[1]))
16434 emit_move_insn (operands[0], operands[1]);
16435 emit_insn ((mode == DImode
16436 ? gen_x86_shrd
16437 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16438 emit_insn ((mode == DImode
16439 ? gen_ashrsi3
16440 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
16441 }
16442 }
16443 else
16444 {
16445 if (!rtx_equal_p (operands[0], operands[1]))
16446 emit_move_insn (operands[0], operands[1]);
16447
16448 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16449
16450 emit_insn ((mode == DImode
16451 ? gen_x86_shrd
16452 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16453 emit_insn ((mode == DImode
16454 ? gen_ashrsi3
16455 : gen_ashrdi3) (high[0], high[0], operands[2]));
16456
16457 if (TARGET_CMOVE && scratch)
16458 {
16459 emit_move_insn (scratch, high[0]);
16460 emit_insn ((mode == DImode
16461 ? gen_ashrsi3
16462 : gen_ashrdi3) (scratch, scratch,
16463 GEN_INT (single_width - 1)));
16464 emit_insn ((mode == DImode
16465 ? gen_x86_shift_adj_1
16466 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16467 scratch));
16468 }
16469 else
16470 emit_insn ((mode == DImode
16471 ? gen_x86_shift_adj_3
16472 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
16473 }
16474 }
16475
16476 void
16477 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
16478 {
16479 rtx low[2], high[2];
16480 int count;
16481 const int single_width = mode == DImode ? 32 : 64;
16482
16483 if (CONST_INT_P (operands[2]))
16484 {
16485 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16486 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16487
16488 if (count >= single_width)
16489 {
16490 emit_move_insn (low[0], high[1]);
16491 ix86_expand_clear (high[0]);
16492
16493 if (count > single_width)
16494 emit_insn ((mode == DImode
16495 ? gen_lshrsi3
16496 : gen_lshrdi3) (low[0], low[0],
16497 GEN_INT (count - single_width)));
16498 }
16499 else
16500 {
16501 if (!rtx_equal_p (operands[0], operands[1]))
16502 emit_move_insn (operands[0], operands[1]);
16503 emit_insn ((mode == DImode
16504 ? gen_x86_shrd
16505 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16506 emit_insn ((mode == DImode
16507 ? gen_lshrsi3
16508 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
16509 }
16510 }
16511 else
16512 {
16513 if (!rtx_equal_p (operands[0], operands[1]))
16514 emit_move_insn (operands[0], operands[1]);
16515
16516 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16517
16518 emit_insn ((mode == DImode
16519 ? gen_x86_shrd
16520 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16521 emit_insn ((mode == DImode
16522 ? gen_lshrsi3
16523 : gen_lshrdi3) (high[0], high[0], operands[2]));
16524
16525 /* Heh. By reversing the arguments, we can reuse this pattern. */
16526 if (TARGET_CMOVE && scratch)
16527 {
16528 ix86_expand_clear (scratch);
16529 emit_insn ((mode == DImode
16530 ? gen_x86_shift_adj_1
16531 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16532 scratch));
16533 }
16534 else
16535 emit_insn ((mode == DImode
16536 ? gen_x86_shift_adj_2
16537 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
16538 }
16539 }
16540
16541 /* Predict just emitted jump instruction to be taken with probability PROB. */
16542 static void
16543 predict_jump (int prob)
16544 {
16545 rtx insn = get_last_insn ();
16546 gcc_assert (JUMP_P (insn));
16547 REG_NOTES (insn)
16548 = gen_rtx_EXPR_LIST (REG_BR_PROB,
16549 GEN_INT (prob),
16550 REG_NOTES (insn));
16551 }
16552
16553 /* Helper function for the string operations below. Dest VARIABLE whether
16554 it is aligned to VALUE bytes. If true, jump to the label. */
16555 static rtx
16556 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
16557 {
16558 rtx label = gen_label_rtx ();
16559 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
16560 if (GET_MODE (variable) == DImode)
16561 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
16562 else
16563 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
16564 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
16565 1, label);
16566 if (epilogue)
16567 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16568 else
16569 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16570 return label;
16571 }
16572
16573 /* Adjust COUNTER by the VALUE. */
16574 static void
16575 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
16576 {
16577 if (GET_MODE (countreg) == DImode)
16578 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
16579 else
16580 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
16581 }
16582
16583 /* Zero extend possibly SImode EXP to Pmode register. */
16584 rtx
16585 ix86_zero_extend_to_Pmode (rtx exp)
16586 {
16587 rtx r;
16588 if (GET_MODE (exp) == VOIDmode)
16589 return force_reg (Pmode, exp);
16590 if (GET_MODE (exp) == Pmode)
16591 return copy_to_mode_reg (Pmode, exp);
16592 r = gen_reg_rtx (Pmode);
16593 emit_insn (gen_zero_extendsidi2 (r, exp));
16594 return r;
16595 }
16596
16597 /* Divide COUNTREG by SCALE. */
16598 static rtx
16599 scale_counter (rtx countreg, int scale)
16600 {
16601 rtx sc;
16602 rtx piece_size_mask;
16603
16604 if (scale == 1)
16605 return countreg;
16606 if (CONST_INT_P (countreg))
16607 return GEN_INT (INTVAL (countreg) / scale);
16608 gcc_assert (REG_P (countreg));
16609
16610 piece_size_mask = GEN_INT (scale - 1);
16611 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
16612 GEN_INT (exact_log2 (scale)),
16613 NULL, 1, OPTAB_DIRECT);
16614 return sc;
16615 }
16616
16617 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
16618 DImode for constant loop counts. */
16619
16620 static enum machine_mode
16621 counter_mode (rtx count_exp)
16622 {
16623 if (GET_MODE (count_exp) != VOIDmode)
16624 return GET_MODE (count_exp);
16625 if (GET_CODE (count_exp) != CONST_INT)
16626 return Pmode;
16627 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
16628 return DImode;
16629 return SImode;
16630 }
16631
16632 /* When SRCPTR is non-NULL, output simple loop to move memory
16633 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
16634 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
16635 equivalent loop to set memory by VALUE (supposed to be in MODE).
16636
16637 The size is rounded down to whole number of chunk size moved at once.
16638 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
16639
16640
16641 static void
16642 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
16643 rtx destptr, rtx srcptr, rtx value,
16644 rtx count, enum machine_mode mode, int unroll,
16645 int expected_size)
16646 {
16647 rtx out_label, top_label, iter, tmp;
16648 enum machine_mode iter_mode = counter_mode (count);
16649 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
16650 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
16651 rtx size;
16652 rtx x_addr;
16653 rtx y_addr;
16654 int i;
16655
16656 top_label = gen_label_rtx ();
16657 out_label = gen_label_rtx ();
16658 iter = gen_reg_rtx (iter_mode);
16659
16660 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
16661 NULL, 1, OPTAB_DIRECT);
16662 /* Those two should combine. */
16663 if (piece_size == const1_rtx)
16664 {
16665 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
16666 true, out_label);
16667 predict_jump (REG_BR_PROB_BASE * 10 / 100);
16668 }
16669 emit_move_insn (iter, const0_rtx);
16670
16671 emit_label (top_label);
16672
16673 tmp = convert_modes (Pmode, iter_mode, iter, true);
16674 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
16675 destmem = change_address (destmem, mode, x_addr);
16676
16677 if (srcmem)
16678 {
16679 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
16680 srcmem = change_address (srcmem, mode, y_addr);
16681
16682 /* When unrolling for chips that reorder memory reads and writes,
16683 we can save registers by using single temporary.
16684 Also using 4 temporaries is overkill in 32bit mode. */
16685 if (!TARGET_64BIT && 0)
16686 {
16687 for (i = 0; i < unroll; i++)
16688 {
16689 if (i)
16690 {
16691 destmem =
16692 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16693 srcmem =
16694 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16695 }
16696 emit_move_insn (destmem, srcmem);
16697 }
16698 }
16699 else
16700 {
16701 rtx tmpreg[4];
16702 gcc_assert (unroll <= 4);
16703 for (i = 0; i < unroll; i++)
16704 {
16705 tmpreg[i] = gen_reg_rtx (mode);
16706 if (i)
16707 {
16708 srcmem =
16709 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16710 }
16711 emit_move_insn (tmpreg[i], srcmem);
16712 }
16713 for (i = 0; i < unroll; i++)
16714 {
16715 if (i)
16716 {
16717 destmem =
16718 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16719 }
16720 emit_move_insn (destmem, tmpreg[i]);
16721 }
16722 }
16723 }
16724 else
16725 for (i = 0; i < unroll; i++)
16726 {
16727 if (i)
16728 destmem =
16729 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16730 emit_move_insn (destmem, value);
16731 }
16732
16733 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
16734 true, OPTAB_LIB_WIDEN);
16735 if (tmp != iter)
16736 emit_move_insn (iter, tmp);
16737
16738 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
16739 true, top_label);
16740 if (expected_size != -1)
16741 {
16742 expected_size /= GET_MODE_SIZE (mode) * unroll;
16743 if (expected_size == 0)
16744 predict_jump (0);
16745 else if (expected_size > REG_BR_PROB_BASE)
16746 predict_jump (REG_BR_PROB_BASE - 1);
16747 else
16748 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
16749 }
16750 else
16751 predict_jump (REG_BR_PROB_BASE * 80 / 100);
16752 iter = ix86_zero_extend_to_Pmode (iter);
16753 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
16754 true, OPTAB_LIB_WIDEN);
16755 if (tmp != destptr)
16756 emit_move_insn (destptr, tmp);
16757 if (srcptr)
16758 {
16759 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
16760 true, OPTAB_LIB_WIDEN);
16761 if (tmp != srcptr)
16762 emit_move_insn (srcptr, tmp);
16763 }
16764 emit_label (out_label);
16765 }
16766
16767 /* Output "rep; mov" instruction.
16768 Arguments have same meaning as for previous function */
16769 static void
16770 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
16771 rtx destptr, rtx srcptr,
16772 rtx count,
16773 enum machine_mode mode)
16774 {
16775 rtx destexp;
16776 rtx srcexp;
16777 rtx countreg;
16778
16779 /* If the size is known, it is shorter to use rep movs. */
16780 if (mode == QImode && CONST_INT_P (count)
16781 && !(INTVAL (count) & 3))
16782 mode = SImode;
16783
16784 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16785 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16786 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
16787 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
16788 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16789 if (mode != QImode)
16790 {
16791 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16792 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16793 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16794 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
16795 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16796 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
16797 }
16798 else
16799 {
16800 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16801 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
16802 }
16803 if (CONST_INT_P (count))
16804 {
16805 count = GEN_INT (INTVAL (count)
16806 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
16807 destmem = shallow_copy_rtx (destmem);
16808 srcmem = shallow_copy_rtx (srcmem);
16809 set_mem_size (destmem, count);
16810 set_mem_size (srcmem, count);
16811 }
16812 else
16813 {
16814 if (MEM_SIZE (destmem))
16815 set_mem_size (destmem, NULL_RTX);
16816 if (MEM_SIZE (srcmem))
16817 set_mem_size (srcmem, NULL_RTX);
16818 }
16819 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
16820 destexp, srcexp));
16821 }
16822
16823 /* Output "rep; stos" instruction.
16824 Arguments have same meaning as for previous function */
16825 static void
16826 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
16827 rtx count, enum machine_mode mode,
16828 rtx orig_value)
16829 {
16830 rtx destexp;
16831 rtx countreg;
16832
16833 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16834 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16835 value = force_reg (mode, gen_lowpart (mode, value));
16836 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16837 if (mode != QImode)
16838 {
16839 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16840 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16841 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16842 }
16843 else
16844 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16845 if (orig_value == const0_rtx && CONST_INT_P (count))
16846 {
16847 count = GEN_INT (INTVAL (count)
16848 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
16849 destmem = shallow_copy_rtx (destmem);
16850 set_mem_size (destmem, count);
16851 }
16852 else if (MEM_SIZE (destmem))
16853 set_mem_size (destmem, NULL_RTX);
16854 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
16855 }
16856
16857 static void
16858 emit_strmov (rtx destmem, rtx srcmem,
16859 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
16860 {
16861 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
16862 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
16863 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16864 }
16865
16866 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
16867 static void
16868 expand_movmem_epilogue (rtx destmem, rtx srcmem,
16869 rtx destptr, rtx srcptr, rtx count, int max_size)
16870 {
16871 rtx src, dest;
16872 if (CONST_INT_P (count))
16873 {
16874 HOST_WIDE_INT countval = INTVAL (count);
16875 int offset = 0;
16876
16877 if ((countval & 0x10) && max_size > 16)
16878 {
16879 if (TARGET_64BIT)
16880 {
16881 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16882 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
16883 }
16884 else
16885 gcc_unreachable ();
16886 offset += 16;
16887 }
16888 if ((countval & 0x08) && max_size > 8)
16889 {
16890 if (TARGET_64BIT)
16891 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16892 else
16893 {
16894 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16895 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
16896 }
16897 offset += 8;
16898 }
16899 if ((countval & 0x04) && max_size > 4)
16900 {
16901 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16902 offset += 4;
16903 }
16904 if ((countval & 0x02) && max_size > 2)
16905 {
16906 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
16907 offset += 2;
16908 }
16909 if ((countval & 0x01) && max_size > 1)
16910 {
16911 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
16912 offset += 1;
16913 }
16914 return;
16915 }
16916 if (max_size > 8)
16917 {
16918 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
16919 count, 1, OPTAB_DIRECT);
16920 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
16921 count, QImode, 1, 4);
16922 return;
16923 }
16924
16925 /* When there are stringops, we can cheaply increase dest and src pointers.
16926 Otherwise we save code size by maintaining offset (zero is readily
16927 available from preceding rep operation) and using x86 addressing modes.
16928 */
16929 if (TARGET_SINGLE_STRINGOP)
16930 {
16931 if (max_size > 4)
16932 {
16933 rtx label = ix86_expand_aligntest (count, 4, true);
16934 src = change_address (srcmem, SImode, srcptr);
16935 dest = change_address (destmem, SImode, destptr);
16936 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16937 emit_label (label);
16938 LABEL_NUSES (label) = 1;
16939 }
16940 if (max_size > 2)
16941 {
16942 rtx label = ix86_expand_aligntest (count, 2, true);
16943 src = change_address (srcmem, HImode, srcptr);
16944 dest = change_address (destmem, HImode, destptr);
16945 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16946 emit_label (label);
16947 LABEL_NUSES (label) = 1;
16948 }
16949 if (max_size > 1)
16950 {
16951 rtx label = ix86_expand_aligntest (count, 1, true);
16952 src = change_address (srcmem, QImode, srcptr);
16953 dest = change_address (destmem, QImode, destptr);
16954 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16955 emit_label (label);
16956 LABEL_NUSES (label) = 1;
16957 }
16958 }
16959 else
16960 {
16961 rtx offset = force_reg (Pmode, const0_rtx);
16962 rtx tmp;
16963
16964 if (max_size > 4)
16965 {
16966 rtx label = ix86_expand_aligntest (count, 4, true);
16967 src = change_address (srcmem, SImode, srcptr);
16968 dest = change_address (destmem, SImode, destptr);
16969 emit_move_insn (dest, src);
16970 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
16971 true, OPTAB_LIB_WIDEN);
16972 if (tmp != offset)
16973 emit_move_insn (offset, tmp);
16974 emit_label (label);
16975 LABEL_NUSES (label) = 1;
16976 }
16977 if (max_size > 2)
16978 {
16979 rtx label = ix86_expand_aligntest (count, 2, true);
16980 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16981 src = change_address (srcmem, HImode, tmp);
16982 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16983 dest = change_address (destmem, HImode, tmp);
16984 emit_move_insn (dest, src);
16985 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
16986 true, OPTAB_LIB_WIDEN);
16987 if (tmp != offset)
16988 emit_move_insn (offset, tmp);
16989 emit_label (label);
16990 LABEL_NUSES (label) = 1;
16991 }
16992 if (max_size > 1)
16993 {
16994 rtx label = ix86_expand_aligntest (count, 1, true);
16995 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16996 src = change_address (srcmem, QImode, tmp);
16997 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16998 dest = change_address (destmem, QImode, tmp);
16999 emit_move_insn (dest, src);
17000 emit_label (label);
17001 LABEL_NUSES (label) = 1;
17002 }
17003 }
17004 }
17005
17006 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17007 static void
17008 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17009 rtx count, int max_size)
17010 {
17011 count =
17012 expand_simple_binop (counter_mode (count), AND, count,
17013 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17014 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17015 gen_lowpart (QImode, value), count, QImode,
17016 1, max_size / 2);
17017 }
17018
17019 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17020 static void
17021 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17022 {
17023 rtx dest;
17024
17025 if (CONST_INT_P (count))
17026 {
17027 HOST_WIDE_INT countval = INTVAL (count);
17028 int offset = 0;
17029
17030 if ((countval & 0x10) && max_size > 16)
17031 {
17032 if (TARGET_64BIT)
17033 {
17034 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17035 emit_insn (gen_strset (destptr, dest, value));
17036 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17037 emit_insn (gen_strset (destptr, dest, value));
17038 }
17039 else
17040 gcc_unreachable ();
17041 offset += 16;
17042 }
17043 if ((countval & 0x08) && max_size > 8)
17044 {
17045 if (TARGET_64BIT)
17046 {
17047 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17048 emit_insn (gen_strset (destptr, dest, value));
17049 }
17050 else
17051 {
17052 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17053 emit_insn (gen_strset (destptr, dest, value));
17054 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17055 emit_insn (gen_strset (destptr, dest, value));
17056 }
17057 offset += 8;
17058 }
17059 if ((countval & 0x04) && max_size > 4)
17060 {
17061 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17062 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17063 offset += 4;
17064 }
17065 if ((countval & 0x02) && max_size > 2)
17066 {
17067 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17068 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17069 offset += 2;
17070 }
17071 if ((countval & 0x01) && max_size > 1)
17072 {
17073 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17074 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17075 offset += 1;
17076 }
17077 return;
17078 }
17079 if (max_size > 32)
17080 {
17081 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17082 return;
17083 }
17084 if (max_size > 16)
17085 {
17086 rtx label = ix86_expand_aligntest (count, 16, true);
17087 if (TARGET_64BIT)
17088 {
17089 dest = change_address (destmem, DImode, destptr);
17090 emit_insn (gen_strset (destptr, dest, value));
17091 emit_insn (gen_strset (destptr, dest, value));
17092 }
17093 else
17094 {
17095 dest = change_address (destmem, SImode, destptr);
17096 emit_insn (gen_strset (destptr, dest, value));
17097 emit_insn (gen_strset (destptr, dest, value));
17098 emit_insn (gen_strset (destptr, dest, value));
17099 emit_insn (gen_strset (destptr, dest, value));
17100 }
17101 emit_label (label);
17102 LABEL_NUSES (label) = 1;
17103 }
17104 if (max_size > 8)
17105 {
17106 rtx label = ix86_expand_aligntest (count, 8, true);
17107 if (TARGET_64BIT)
17108 {
17109 dest = change_address (destmem, DImode, destptr);
17110 emit_insn (gen_strset (destptr, dest, value));
17111 }
17112 else
17113 {
17114 dest = change_address (destmem, SImode, destptr);
17115 emit_insn (gen_strset (destptr, dest, value));
17116 emit_insn (gen_strset (destptr, dest, value));
17117 }
17118 emit_label (label);
17119 LABEL_NUSES (label) = 1;
17120 }
17121 if (max_size > 4)
17122 {
17123 rtx label = ix86_expand_aligntest (count, 4, true);
17124 dest = change_address (destmem, SImode, destptr);
17125 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17126 emit_label (label);
17127 LABEL_NUSES (label) = 1;
17128 }
17129 if (max_size > 2)
17130 {
17131 rtx label = ix86_expand_aligntest (count, 2, true);
17132 dest = change_address (destmem, HImode, destptr);
17133 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17134 emit_label (label);
17135 LABEL_NUSES (label) = 1;
17136 }
17137 if (max_size > 1)
17138 {
17139 rtx label = ix86_expand_aligntest (count, 1, true);
17140 dest = change_address (destmem, QImode, destptr);
17141 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17142 emit_label (label);
17143 LABEL_NUSES (label) = 1;
17144 }
17145 }
17146
17147 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17148 DESIRED_ALIGNMENT. */
17149 static void
17150 expand_movmem_prologue (rtx destmem, rtx srcmem,
17151 rtx destptr, rtx srcptr, rtx count,
17152 int align, int desired_alignment)
17153 {
17154 if (align <= 1 && desired_alignment > 1)
17155 {
17156 rtx label = ix86_expand_aligntest (destptr, 1, false);
17157 srcmem = change_address (srcmem, QImode, srcptr);
17158 destmem = change_address (destmem, QImode, destptr);
17159 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17160 ix86_adjust_counter (count, 1);
17161 emit_label (label);
17162 LABEL_NUSES (label) = 1;
17163 }
17164 if (align <= 2 && desired_alignment > 2)
17165 {
17166 rtx label = ix86_expand_aligntest (destptr, 2, false);
17167 srcmem = change_address (srcmem, HImode, srcptr);
17168 destmem = change_address (destmem, HImode, destptr);
17169 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17170 ix86_adjust_counter (count, 2);
17171 emit_label (label);
17172 LABEL_NUSES (label) = 1;
17173 }
17174 if (align <= 4 && desired_alignment > 4)
17175 {
17176 rtx label = ix86_expand_aligntest (destptr, 4, false);
17177 srcmem = change_address (srcmem, SImode, srcptr);
17178 destmem = change_address (destmem, SImode, destptr);
17179 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17180 ix86_adjust_counter (count, 4);
17181 emit_label (label);
17182 LABEL_NUSES (label) = 1;
17183 }
17184 gcc_assert (desired_alignment <= 8);
17185 }
17186
17187 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17188 ALIGN_BYTES is how many bytes need to be copied. */
17189 static rtx
17190 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17191 int desired_align, int align_bytes)
17192 {
17193 rtx src = *srcp;
17194 rtx src_size, dst_size;
17195 int off = 0;
17196 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17197 if (src_align_bytes >= 0)
17198 src_align_bytes = desired_align - src_align_bytes;
17199 src_size = MEM_SIZE (src);
17200 dst_size = MEM_SIZE (dst);
17201 if (align_bytes & 1)
17202 {
17203 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17204 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17205 off = 1;
17206 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17207 }
17208 if (align_bytes & 2)
17209 {
17210 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17211 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17212 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17213 set_mem_align (dst, 2 * BITS_PER_UNIT);
17214 if (src_align_bytes >= 0
17215 && (src_align_bytes & 1) == (align_bytes & 1)
17216 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17217 set_mem_align (src, 2 * BITS_PER_UNIT);
17218 off = 2;
17219 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17220 }
17221 if (align_bytes & 4)
17222 {
17223 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17224 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17225 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17226 set_mem_align (dst, 4 * BITS_PER_UNIT);
17227 if (src_align_bytes >= 0)
17228 {
17229 unsigned int src_align = 0;
17230 if ((src_align_bytes & 3) == (align_bytes & 3))
17231 src_align = 4;
17232 else if ((src_align_bytes & 1) == (align_bytes & 1))
17233 src_align = 2;
17234 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17235 set_mem_align (src, src_align * BITS_PER_UNIT);
17236 }
17237 off = 4;
17238 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17239 }
17240 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17241 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17242 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17243 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17244 if (src_align_bytes >= 0)
17245 {
17246 unsigned int src_align = 0;
17247 if ((src_align_bytes & 7) == (align_bytes & 7))
17248 src_align = 8;
17249 else if ((src_align_bytes & 3) == (align_bytes & 3))
17250 src_align = 4;
17251 else if ((src_align_bytes & 1) == (align_bytes & 1))
17252 src_align = 2;
17253 if (src_align > (unsigned int) desired_align)
17254 src_align = desired_align;
17255 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17256 set_mem_align (src, src_align * BITS_PER_UNIT);
17257 }
17258 if (dst_size)
17259 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17260 if (src_size)
17261 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17262 *srcp = src;
17263 return dst;
17264 }
17265
17266 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17267 DESIRED_ALIGNMENT. */
17268 static void
17269 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17270 int align, int desired_alignment)
17271 {
17272 if (align <= 1 && desired_alignment > 1)
17273 {
17274 rtx label = ix86_expand_aligntest (destptr, 1, false);
17275 destmem = change_address (destmem, QImode, destptr);
17276 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17277 ix86_adjust_counter (count, 1);
17278 emit_label (label);
17279 LABEL_NUSES (label) = 1;
17280 }
17281 if (align <= 2 && desired_alignment > 2)
17282 {
17283 rtx label = ix86_expand_aligntest (destptr, 2, false);
17284 destmem = change_address (destmem, HImode, destptr);
17285 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17286 ix86_adjust_counter (count, 2);
17287 emit_label (label);
17288 LABEL_NUSES (label) = 1;
17289 }
17290 if (align <= 4 && desired_alignment > 4)
17291 {
17292 rtx label = ix86_expand_aligntest (destptr, 4, false);
17293 destmem = change_address (destmem, SImode, destptr);
17294 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17295 ix86_adjust_counter (count, 4);
17296 emit_label (label);
17297 LABEL_NUSES (label) = 1;
17298 }
17299 gcc_assert (desired_alignment <= 8);
17300 }
17301
17302 /* Set enough from DST to align DST known to by aligned by ALIGN to
17303 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17304 static rtx
17305 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17306 int desired_align, int align_bytes)
17307 {
17308 int off = 0;
17309 rtx dst_size = MEM_SIZE (dst);
17310 if (align_bytes & 1)
17311 {
17312 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17313 off = 1;
17314 emit_insn (gen_strset (destreg, dst,
17315 gen_lowpart (QImode, value)));
17316 }
17317 if (align_bytes & 2)
17318 {
17319 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17320 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17321 set_mem_align (dst, 2 * BITS_PER_UNIT);
17322 off = 2;
17323 emit_insn (gen_strset (destreg, dst,
17324 gen_lowpart (HImode, value)));
17325 }
17326 if (align_bytes & 4)
17327 {
17328 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17329 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17330 set_mem_align (dst, 4 * BITS_PER_UNIT);
17331 off = 4;
17332 emit_insn (gen_strset (destreg, dst,
17333 gen_lowpart (SImode, value)));
17334 }
17335 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17336 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17337 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17338 if (dst_size)
17339 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17340 return dst;
17341 }
17342
17343 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17344 static enum stringop_alg
17345 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17346 int *dynamic_check)
17347 {
17348 const struct stringop_algs * algs;
17349 bool optimize_for_speed;
17350 /* Algorithms using the rep prefix want at least edi and ecx;
17351 additionally, memset wants eax and memcpy wants esi. Don't
17352 consider such algorithms if the user has appropriated those
17353 registers for their own purposes. */
17354 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17355 || (memset
17356 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17357
17358 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17359 || (alg != rep_prefix_1_byte \
17360 && alg != rep_prefix_4_byte \
17361 && alg != rep_prefix_8_byte))
17362 const struct processor_costs *cost;
17363
17364 /* Even if the string operation call is cold, we still might spend a lot
17365 of time processing large blocks. */
17366 if (optimize_function_for_size_p (cfun)
17367 || (optimize_insn_for_size_p ()
17368 && expected_size != -1 && expected_size < 256))
17369 optimize_for_speed = false;
17370 else
17371 optimize_for_speed = true;
17372
17373 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17374
17375 *dynamic_check = -1;
17376 if (memset)
17377 algs = &cost->memset[TARGET_64BIT != 0];
17378 else
17379 algs = &cost->memcpy[TARGET_64BIT != 0];
17380 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17381 return stringop_alg;
17382 /* rep; movq or rep; movl is the smallest variant. */
17383 else if (!optimize_for_speed)
17384 {
17385 if (!count || (count & 3))
17386 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17387 else
17388 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17389 }
17390 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17391 */
17392 else if (expected_size != -1 && expected_size < 4)
17393 return loop_1_byte;
17394 else if (expected_size != -1)
17395 {
17396 unsigned int i;
17397 enum stringop_alg alg = libcall;
17398 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17399 {
17400 /* We get here if the algorithms that were not libcall-based
17401 were rep-prefix based and we are unable to use rep prefixes
17402 based on global register usage. Break out of the loop and
17403 use the heuristic below. */
17404 if (algs->size[i].max == 0)
17405 break;
17406 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17407 {
17408 enum stringop_alg candidate = algs->size[i].alg;
17409
17410 if (candidate != libcall && ALG_USABLE_P (candidate))
17411 alg = candidate;
17412 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17413 last non-libcall inline algorithm. */
17414 if (TARGET_INLINE_ALL_STRINGOPS)
17415 {
17416 /* When the current size is best to be copied by a libcall,
17417 but we are still forced to inline, run the heuristic below
17418 that will pick code for medium sized blocks. */
17419 if (alg != libcall)
17420 return alg;
17421 break;
17422 }
17423 else if (ALG_USABLE_P (candidate))
17424 return candidate;
17425 }
17426 }
17427 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17428 }
17429 /* When asked to inline the call anyway, try to pick meaningful choice.
17430 We look for maximal size of block that is faster to copy by hand and
17431 take blocks of at most of that size guessing that average size will
17432 be roughly half of the block.
17433
17434 If this turns out to be bad, we might simply specify the preferred
17435 choice in ix86_costs. */
17436 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17437 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
17438 {
17439 int max = -1;
17440 enum stringop_alg alg;
17441 int i;
17442 bool any_alg_usable_p = true;
17443
17444 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17445 {
17446 enum stringop_alg candidate = algs->size[i].alg;
17447 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
17448
17449 if (candidate != libcall && candidate
17450 && ALG_USABLE_P (candidate))
17451 max = algs->size[i].max;
17452 }
17453 /* If there aren't any usable algorithms, then recursing on
17454 smaller sizes isn't going to find anything. Just return the
17455 simple byte-at-a-time copy loop. */
17456 if (!any_alg_usable_p)
17457 {
17458 /* Pick something reasonable. */
17459 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17460 *dynamic_check = 128;
17461 return loop_1_byte;
17462 }
17463 if (max == -1)
17464 max = 4096;
17465 alg = decide_alg (count, max / 2, memset, dynamic_check);
17466 gcc_assert (*dynamic_check == -1);
17467 gcc_assert (alg != libcall);
17468 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17469 *dynamic_check = max;
17470 return alg;
17471 }
17472 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
17473 #undef ALG_USABLE_P
17474 }
17475
17476 /* Decide on alignment. We know that the operand is already aligned to ALIGN
17477 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
17478 static int
17479 decide_alignment (int align,
17480 enum stringop_alg alg,
17481 int expected_size)
17482 {
17483 int desired_align = 0;
17484 switch (alg)
17485 {
17486 case no_stringop:
17487 gcc_unreachable ();
17488 case loop:
17489 case unrolled_loop:
17490 desired_align = GET_MODE_SIZE (Pmode);
17491 break;
17492 case rep_prefix_8_byte:
17493 desired_align = 8;
17494 break;
17495 case rep_prefix_4_byte:
17496 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17497 copying whole cacheline at once. */
17498 if (TARGET_PENTIUMPRO)
17499 desired_align = 8;
17500 else
17501 desired_align = 4;
17502 break;
17503 case rep_prefix_1_byte:
17504 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17505 copying whole cacheline at once. */
17506 if (TARGET_PENTIUMPRO)
17507 desired_align = 8;
17508 else
17509 desired_align = 1;
17510 break;
17511 case loop_1_byte:
17512 desired_align = 1;
17513 break;
17514 case libcall:
17515 return 0;
17516 }
17517
17518 if (optimize_size)
17519 desired_align = 1;
17520 if (desired_align < align)
17521 desired_align = align;
17522 if (expected_size != -1 && expected_size < 4)
17523 desired_align = align;
17524 return desired_align;
17525 }
17526
17527 /* Return the smallest power of 2 greater than VAL. */
17528 static int
17529 smallest_pow2_greater_than (int val)
17530 {
17531 int ret = 1;
17532 while (ret <= val)
17533 ret <<= 1;
17534 return ret;
17535 }
17536
17537 /* Expand string move (memcpy) operation. Use i386 string operations when
17538 profitable. expand_setmem contains similar code. The code depends upon
17539 architecture, block size and alignment, but always has the same
17540 overall structure:
17541
17542 1) Prologue guard: Conditional that jumps up to epilogues for small
17543 blocks that can be handled by epilogue alone. This is faster but
17544 also needed for correctness, since prologue assume the block is larger
17545 than the desired alignment.
17546
17547 Optional dynamic check for size and libcall for large
17548 blocks is emitted here too, with -minline-stringops-dynamically.
17549
17550 2) Prologue: copy first few bytes in order to get destination aligned
17551 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
17552 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
17553 We emit either a jump tree on power of two sized blocks, or a byte loop.
17554
17555 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
17556 with specified algorithm.
17557
17558 4) Epilogue: code copying tail of the block that is too small to be
17559 handled by main body (or up to size guarded by prologue guard). */
17560
17561 int
17562 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
17563 rtx expected_align_exp, rtx expected_size_exp)
17564 {
17565 rtx destreg;
17566 rtx srcreg;
17567 rtx label = NULL;
17568 rtx tmp;
17569 rtx jump_around_label = NULL;
17570 HOST_WIDE_INT align = 1;
17571 unsigned HOST_WIDE_INT count = 0;
17572 HOST_WIDE_INT expected_size = -1;
17573 int size_needed = 0, epilogue_size_needed;
17574 int desired_align = 0, align_bytes = 0;
17575 enum stringop_alg alg;
17576 int dynamic_check;
17577 bool need_zero_guard = false;
17578
17579 if (CONST_INT_P (align_exp))
17580 align = INTVAL (align_exp);
17581 /* i386 can do misaligned access on reasonably increased cost. */
17582 if (CONST_INT_P (expected_align_exp)
17583 && INTVAL (expected_align_exp) > align)
17584 align = INTVAL (expected_align_exp);
17585 /* ALIGN is the minimum of destination and source alignment, but we care here
17586 just about destination alignment. */
17587 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
17588 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
17589
17590 if (CONST_INT_P (count_exp))
17591 count = expected_size = INTVAL (count_exp);
17592 if (CONST_INT_P (expected_size_exp) && count == 0)
17593 expected_size = INTVAL (expected_size_exp);
17594
17595 /* Make sure we don't need to care about overflow later on. */
17596 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17597 return 0;
17598
17599 /* Step 0: Decide on preferred algorithm, desired alignment and
17600 size of chunks to be copied by main loop. */
17601
17602 alg = decide_alg (count, expected_size, false, &dynamic_check);
17603 desired_align = decide_alignment (align, alg, expected_size);
17604
17605 if (!TARGET_ALIGN_STRINGOPS)
17606 align = desired_align;
17607
17608 if (alg == libcall)
17609 return 0;
17610 gcc_assert (alg != no_stringop);
17611 if (!count)
17612 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
17613 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17614 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
17615 switch (alg)
17616 {
17617 case libcall:
17618 case no_stringop:
17619 gcc_unreachable ();
17620 case loop:
17621 need_zero_guard = true;
17622 size_needed = GET_MODE_SIZE (Pmode);
17623 break;
17624 case unrolled_loop:
17625 need_zero_guard = true;
17626 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
17627 break;
17628 case rep_prefix_8_byte:
17629 size_needed = 8;
17630 break;
17631 case rep_prefix_4_byte:
17632 size_needed = 4;
17633 break;
17634 case rep_prefix_1_byte:
17635 size_needed = 1;
17636 break;
17637 case loop_1_byte:
17638 need_zero_guard = true;
17639 size_needed = 1;
17640 break;
17641 }
17642
17643 epilogue_size_needed = size_needed;
17644
17645 /* Step 1: Prologue guard. */
17646
17647 /* Alignment code needs count to be in register. */
17648 if (CONST_INT_P (count_exp) && desired_align > align)
17649 {
17650 if (INTVAL (count_exp) > desired_align
17651 && INTVAL (count_exp) > size_needed)
17652 {
17653 align_bytes
17654 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
17655 if (align_bytes <= 0)
17656 align_bytes = 0;
17657 else
17658 align_bytes = desired_align - align_bytes;
17659 }
17660 if (align_bytes == 0)
17661 count_exp = force_reg (counter_mode (count_exp), count_exp);
17662 }
17663 gcc_assert (desired_align >= 1 && align >= 1);
17664
17665 /* Ensure that alignment prologue won't copy past end of block. */
17666 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17667 {
17668 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17669 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17670 Make sure it is power of 2. */
17671 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17672
17673 if (count)
17674 {
17675 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
17676 {
17677 /* If main algorithm works on QImode, no epilogue is needed.
17678 For small sizes just don't align anything. */
17679 if (size_needed == 1)
17680 desired_align = align;
17681 else
17682 goto epilogue;
17683 }
17684 }
17685 else
17686 {
17687 label = gen_label_rtx ();
17688 emit_cmp_and_jump_insns (count_exp,
17689 GEN_INT (epilogue_size_needed),
17690 LTU, 0, counter_mode (count_exp), 1, label);
17691 if (expected_size == -1 || expected_size < epilogue_size_needed)
17692 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17693 else
17694 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17695 }
17696 }
17697
17698 /* Emit code to decide on runtime whether library call or inline should be
17699 used. */
17700 if (dynamic_check != -1)
17701 {
17702 if (CONST_INT_P (count_exp))
17703 {
17704 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
17705 {
17706 emit_block_move_via_libcall (dst, src, count_exp, false);
17707 count_exp = const0_rtx;
17708 goto epilogue;
17709 }
17710 }
17711 else
17712 {
17713 rtx hot_label = gen_label_rtx ();
17714 jump_around_label = gen_label_rtx ();
17715 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17716 LEU, 0, GET_MODE (count_exp), 1, hot_label);
17717 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17718 emit_block_move_via_libcall (dst, src, count_exp, false);
17719 emit_jump (jump_around_label);
17720 emit_label (hot_label);
17721 }
17722 }
17723
17724 /* Step 2: Alignment prologue. */
17725
17726 if (desired_align > align)
17727 {
17728 if (align_bytes == 0)
17729 {
17730 /* Except for the first move in epilogue, we no longer know
17731 constant offset in aliasing info. It don't seems to worth
17732 the pain to maintain it for the first move, so throw away
17733 the info early. */
17734 src = change_address (src, BLKmode, srcreg);
17735 dst = change_address (dst, BLKmode, destreg);
17736 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
17737 desired_align);
17738 }
17739 else
17740 {
17741 /* If we know how many bytes need to be stored before dst is
17742 sufficiently aligned, maintain aliasing info accurately. */
17743 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
17744 desired_align, align_bytes);
17745 count_exp = plus_constant (count_exp, -align_bytes);
17746 count -= align_bytes;
17747 }
17748 if (need_zero_guard
17749 && (count < (unsigned HOST_WIDE_INT) size_needed
17750 || (align_bytes == 0
17751 && count < ((unsigned HOST_WIDE_INT) size_needed
17752 + desired_align - align))))
17753 {
17754 /* It is possible that we copied enough so the main loop will not
17755 execute. */
17756 gcc_assert (size_needed > 1);
17757 if (label == NULL_RTX)
17758 label = gen_label_rtx ();
17759 emit_cmp_and_jump_insns (count_exp,
17760 GEN_INT (size_needed),
17761 LTU, 0, counter_mode (count_exp), 1, label);
17762 if (expected_size == -1
17763 || expected_size < (desired_align - align) / 2 + size_needed)
17764 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17765 else
17766 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17767 }
17768 }
17769 if (label && size_needed == 1)
17770 {
17771 emit_label (label);
17772 LABEL_NUSES (label) = 1;
17773 label = NULL;
17774 epilogue_size_needed = 1;
17775 }
17776 else if (label == NULL_RTX)
17777 epilogue_size_needed = size_needed;
17778
17779 /* Step 3: Main loop. */
17780
17781 switch (alg)
17782 {
17783 case libcall:
17784 case no_stringop:
17785 gcc_unreachable ();
17786 case loop_1_byte:
17787 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17788 count_exp, QImode, 1, expected_size);
17789 break;
17790 case loop:
17791 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17792 count_exp, Pmode, 1, expected_size);
17793 break;
17794 case unrolled_loop:
17795 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
17796 registers for 4 temporaries anyway. */
17797 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17798 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
17799 expected_size);
17800 break;
17801 case rep_prefix_8_byte:
17802 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17803 DImode);
17804 break;
17805 case rep_prefix_4_byte:
17806 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17807 SImode);
17808 break;
17809 case rep_prefix_1_byte:
17810 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17811 QImode);
17812 break;
17813 }
17814 /* Adjust properly the offset of src and dest memory for aliasing. */
17815 if (CONST_INT_P (count_exp))
17816 {
17817 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
17818 (count / size_needed) * size_needed);
17819 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17820 (count / size_needed) * size_needed);
17821 }
17822 else
17823 {
17824 src = change_address (src, BLKmode, srcreg);
17825 dst = change_address (dst, BLKmode, destreg);
17826 }
17827
17828 /* Step 4: Epilogue to copy the remaining bytes. */
17829 epilogue:
17830 if (label)
17831 {
17832 /* When the main loop is done, COUNT_EXP might hold original count,
17833 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17834 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17835 bytes. Compensate if needed. */
17836
17837 if (size_needed < epilogue_size_needed)
17838 {
17839 tmp =
17840 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17841 GEN_INT (size_needed - 1), count_exp, 1,
17842 OPTAB_DIRECT);
17843 if (tmp != count_exp)
17844 emit_move_insn (count_exp, tmp);
17845 }
17846 emit_label (label);
17847 LABEL_NUSES (label) = 1;
17848 }
17849
17850 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17851 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
17852 epilogue_size_needed);
17853 if (jump_around_label)
17854 emit_label (jump_around_label);
17855 return 1;
17856 }
17857
17858 /* Helper function for memcpy. For QImode value 0xXY produce
17859 0xXYXYXYXY of wide specified by MODE. This is essentially
17860 a * 0x10101010, but we can do slightly better than
17861 synth_mult by unwinding the sequence by hand on CPUs with
17862 slow multiply. */
17863 static rtx
17864 promote_duplicated_reg (enum machine_mode mode, rtx val)
17865 {
17866 enum machine_mode valmode = GET_MODE (val);
17867 rtx tmp;
17868 int nops = mode == DImode ? 3 : 2;
17869
17870 gcc_assert (mode == SImode || mode == DImode);
17871 if (val == const0_rtx)
17872 return copy_to_mode_reg (mode, const0_rtx);
17873 if (CONST_INT_P (val))
17874 {
17875 HOST_WIDE_INT v = INTVAL (val) & 255;
17876
17877 v |= v << 8;
17878 v |= v << 16;
17879 if (mode == DImode)
17880 v |= (v << 16) << 16;
17881 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
17882 }
17883
17884 if (valmode == VOIDmode)
17885 valmode = QImode;
17886 if (valmode != QImode)
17887 val = gen_lowpart (QImode, val);
17888 if (mode == QImode)
17889 return val;
17890 if (!TARGET_PARTIAL_REG_STALL)
17891 nops--;
17892 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
17893 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
17894 <= (ix86_cost->shift_const + ix86_cost->add) * nops
17895 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
17896 {
17897 rtx reg = convert_modes (mode, QImode, val, true);
17898 tmp = promote_duplicated_reg (mode, const1_rtx);
17899 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
17900 OPTAB_DIRECT);
17901 }
17902 else
17903 {
17904 rtx reg = convert_modes (mode, QImode, val, true);
17905
17906 if (!TARGET_PARTIAL_REG_STALL)
17907 if (mode == SImode)
17908 emit_insn (gen_movsi_insv_1 (reg, reg));
17909 else
17910 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
17911 else
17912 {
17913 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
17914 NULL, 1, OPTAB_DIRECT);
17915 reg =
17916 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17917 }
17918 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
17919 NULL, 1, OPTAB_DIRECT);
17920 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17921 if (mode == SImode)
17922 return reg;
17923 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
17924 NULL, 1, OPTAB_DIRECT);
17925 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17926 return reg;
17927 }
17928 }
17929
17930 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
17931 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
17932 alignment from ALIGN to DESIRED_ALIGN. */
17933 static rtx
17934 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
17935 {
17936 rtx promoted_val;
17937
17938 if (TARGET_64BIT
17939 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
17940 promoted_val = promote_duplicated_reg (DImode, val);
17941 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
17942 promoted_val = promote_duplicated_reg (SImode, val);
17943 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
17944 promoted_val = promote_duplicated_reg (HImode, val);
17945 else
17946 promoted_val = val;
17947
17948 return promoted_val;
17949 }
17950
17951 /* Expand string clear operation (bzero). Use i386 string operations when
17952 profitable. See expand_movmem comment for explanation of individual
17953 steps performed. */
17954 int
17955 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
17956 rtx expected_align_exp, rtx expected_size_exp)
17957 {
17958 rtx destreg;
17959 rtx label = NULL;
17960 rtx tmp;
17961 rtx jump_around_label = NULL;
17962 HOST_WIDE_INT align = 1;
17963 unsigned HOST_WIDE_INT count = 0;
17964 HOST_WIDE_INT expected_size = -1;
17965 int size_needed = 0, epilogue_size_needed;
17966 int desired_align = 0, align_bytes = 0;
17967 enum stringop_alg alg;
17968 rtx promoted_val = NULL;
17969 bool force_loopy_epilogue = false;
17970 int dynamic_check;
17971 bool need_zero_guard = false;
17972
17973 if (CONST_INT_P (align_exp))
17974 align = INTVAL (align_exp);
17975 /* i386 can do misaligned access on reasonably increased cost. */
17976 if (CONST_INT_P (expected_align_exp)
17977 && INTVAL (expected_align_exp) > align)
17978 align = INTVAL (expected_align_exp);
17979 if (CONST_INT_P (count_exp))
17980 count = expected_size = INTVAL (count_exp);
17981 if (CONST_INT_P (expected_size_exp) && count == 0)
17982 expected_size = INTVAL (expected_size_exp);
17983
17984 /* Make sure we don't need to care about overflow later on. */
17985 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17986 return 0;
17987
17988 /* Step 0: Decide on preferred algorithm, desired alignment and
17989 size of chunks to be copied by main loop. */
17990
17991 alg = decide_alg (count, expected_size, true, &dynamic_check);
17992 desired_align = decide_alignment (align, alg, expected_size);
17993
17994 if (!TARGET_ALIGN_STRINGOPS)
17995 align = desired_align;
17996
17997 if (alg == libcall)
17998 return 0;
17999 gcc_assert (alg != no_stringop);
18000 if (!count)
18001 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18002 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18003 switch (alg)
18004 {
18005 case libcall:
18006 case no_stringop:
18007 gcc_unreachable ();
18008 case loop:
18009 need_zero_guard = true;
18010 size_needed = GET_MODE_SIZE (Pmode);
18011 break;
18012 case unrolled_loop:
18013 need_zero_guard = true;
18014 size_needed = GET_MODE_SIZE (Pmode) * 4;
18015 break;
18016 case rep_prefix_8_byte:
18017 size_needed = 8;
18018 break;
18019 case rep_prefix_4_byte:
18020 size_needed = 4;
18021 break;
18022 case rep_prefix_1_byte:
18023 size_needed = 1;
18024 break;
18025 case loop_1_byte:
18026 need_zero_guard = true;
18027 size_needed = 1;
18028 break;
18029 }
18030 epilogue_size_needed = size_needed;
18031
18032 /* Step 1: Prologue guard. */
18033
18034 /* Alignment code needs count to be in register. */
18035 if (CONST_INT_P (count_exp) && desired_align > align)
18036 {
18037 if (INTVAL (count_exp) > desired_align
18038 && INTVAL (count_exp) > size_needed)
18039 {
18040 align_bytes
18041 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18042 if (align_bytes <= 0)
18043 align_bytes = 0;
18044 else
18045 align_bytes = desired_align - align_bytes;
18046 }
18047 if (align_bytes == 0)
18048 {
18049 enum machine_mode mode = SImode;
18050 if (TARGET_64BIT && (count & ~0xffffffff))
18051 mode = DImode;
18052 count_exp = force_reg (mode, count_exp);
18053 }
18054 }
18055 /* Do the cheap promotion to allow better CSE across the
18056 main loop and epilogue (ie one load of the big constant in the
18057 front of all code. */
18058 if (CONST_INT_P (val_exp))
18059 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18060 desired_align, align);
18061 /* Ensure that alignment prologue won't copy past end of block. */
18062 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18063 {
18064 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18065 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18066 Make sure it is power of 2. */
18067 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18068
18069 /* To improve performance of small blocks, we jump around the VAL
18070 promoting mode. This mean that if the promoted VAL is not constant,
18071 we might not use it in the epilogue and have to use byte
18072 loop variant. */
18073 if (epilogue_size_needed > 2 && !promoted_val)
18074 force_loopy_epilogue = true;
18075 if (count)
18076 {
18077 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18078 {
18079 /* If main algorithm works on QImode, no epilogue is needed.
18080 For small sizes just don't align anything. */
18081 if (size_needed == 1)
18082 desired_align = align;
18083 else
18084 goto epilogue;
18085 }
18086 }
18087 else
18088 {
18089 label = gen_label_rtx ();
18090 emit_cmp_and_jump_insns (count_exp,
18091 GEN_INT (epilogue_size_needed),
18092 LTU, 0, counter_mode (count_exp), 1, label);
18093 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18094 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18095 else
18096 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18097 }
18098 }
18099 if (dynamic_check != -1)
18100 {
18101 rtx hot_label = gen_label_rtx ();
18102 jump_around_label = gen_label_rtx ();
18103 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18104 LEU, 0, counter_mode (count_exp), 1, hot_label);
18105 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18106 set_storage_via_libcall (dst, count_exp, val_exp, false);
18107 emit_jump (jump_around_label);
18108 emit_label (hot_label);
18109 }
18110
18111 /* Step 2: Alignment prologue. */
18112
18113 /* Do the expensive promotion once we branched off the small blocks. */
18114 if (!promoted_val)
18115 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18116 desired_align, align);
18117 gcc_assert (desired_align >= 1 && align >= 1);
18118
18119 if (desired_align > align)
18120 {
18121 if (align_bytes == 0)
18122 {
18123 /* Except for the first move in epilogue, we no longer know
18124 constant offset in aliasing info. It don't seems to worth
18125 the pain to maintain it for the first move, so throw away
18126 the info early. */
18127 dst = change_address (dst, BLKmode, destreg);
18128 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18129 desired_align);
18130 }
18131 else
18132 {
18133 /* If we know how many bytes need to be stored before dst is
18134 sufficiently aligned, maintain aliasing info accurately. */
18135 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18136 desired_align, align_bytes);
18137 count_exp = plus_constant (count_exp, -align_bytes);
18138 count -= align_bytes;
18139 }
18140 if (need_zero_guard
18141 && (count < (unsigned HOST_WIDE_INT) size_needed
18142 || (align_bytes == 0
18143 && count < ((unsigned HOST_WIDE_INT) size_needed
18144 + desired_align - align))))
18145 {
18146 /* It is possible that we copied enough so the main loop will not
18147 execute. */
18148 gcc_assert (size_needed > 1);
18149 if (label == NULL_RTX)
18150 label = gen_label_rtx ();
18151 emit_cmp_and_jump_insns (count_exp,
18152 GEN_INT (size_needed),
18153 LTU, 0, counter_mode (count_exp), 1, label);
18154 if (expected_size == -1
18155 || expected_size < (desired_align - align) / 2 + size_needed)
18156 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18157 else
18158 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18159 }
18160 }
18161 if (label && size_needed == 1)
18162 {
18163 emit_label (label);
18164 LABEL_NUSES (label) = 1;
18165 label = NULL;
18166 promoted_val = val_exp;
18167 epilogue_size_needed = 1;
18168 }
18169 else if (label == NULL_RTX)
18170 epilogue_size_needed = size_needed;
18171
18172 /* Step 3: Main loop. */
18173
18174 switch (alg)
18175 {
18176 case libcall:
18177 case no_stringop:
18178 gcc_unreachable ();
18179 case loop_1_byte:
18180 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18181 count_exp, QImode, 1, expected_size);
18182 break;
18183 case loop:
18184 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18185 count_exp, Pmode, 1, expected_size);
18186 break;
18187 case unrolled_loop:
18188 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18189 count_exp, Pmode, 4, expected_size);
18190 break;
18191 case rep_prefix_8_byte:
18192 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18193 DImode, val_exp);
18194 break;
18195 case rep_prefix_4_byte:
18196 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18197 SImode, val_exp);
18198 break;
18199 case rep_prefix_1_byte:
18200 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18201 QImode, val_exp);
18202 break;
18203 }
18204 /* Adjust properly the offset of src and dest memory for aliasing. */
18205 if (CONST_INT_P (count_exp))
18206 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18207 (count / size_needed) * size_needed);
18208 else
18209 dst = change_address (dst, BLKmode, destreg);
18210
18211 /* Step 4: Epilogue to copy the remaining bytes. */
18212
18213 if (label)
18214 {
18215 /* When the main loop is done, COUNT_EXP might hold original count,
18216 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18217 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18218 bytes. Compensate if needed. */
18219
18220 if (size_needed < epilogue_size_needed)
18221 {
18222 tmp =
18223 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18224 GEN_INT (size_needed - 1), count_exp, 1,
18225 OPTAB_DIRECT);
18226 if (tmp != count_exp)
18227 emit_move_insn (count_exp, tmp);
18228 }
18229 emit_label (label);
18230 LABEL_NUSES (label) = 1;
18231 }
18232 epilogue:
18233 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18234 {
18235 if (force_loopy_epilogue)
18236 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18237 epilogue_size_needed);
18238 else
18239 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18240 epilogue_size_needed);
18241 }
18242 if (jump_around_label)
18243 emit_label (jump_around_label);
18244 return 1;
18245 }
18246
18247 /* Expand the appropriate insns for doing strlen if not just doing
18248 repnz; scasb
18249
18250 out = result, initialized with the start address
18251 align_rtx = alignment of the address.
18252 scratch = scratch register, initialized with the startaddress when
18253 not aligned, otherwise undefined
18254
18255 This is just the body. It needs the initializations mentioned above and
18256 some address computing at the end. These things are done in i386.md. */
18257
18258 static void
18259 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18260 {
18261 int align;
18262 rtx tmp;
18263 rtx align_2_label = NULL_RTX;
18264 rtx align_3_label = NULL_RTX;
18265 rtx align_4_label = gen_label_rtx ();
18266 rtx end_0_label = gen_label_rtx ();
18267 rtx mem;
18268 rtx tmpreg = gen_reg_rtx (SImode);
18269 rtx scratch = gen_reg_rtx (SImode);
18270 rtx cmp;
18271
18272 align = 0;
18273 if (CONST_INT_P (align_rtx))
18274 align = INTVAL (align_rtx);
18275
18276 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18277
18278 /* Is there a known alignment and is it less than 4? */
18279 if (align < 4)
18280 {
18281 rtx scratch1 = gen_reg_rtx (Pmode);
18282 emit_move_insn (scratch1, out);
18283 /* Is there a known alignment and is it not 2? */
18284 if (align != 2)
18285 {
18286 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18287 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18288
18289 /* Leave just the 3 lower bits. */
18290 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18291 NULL_RTX, 0, OPTAB_WIDEN);
18292
18293 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18294 Pmode, 1, align_4_label);
18295 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18296 Pmode, 1, align_2_label);
18297 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18298 Pmode, 1, align_3_label);
18299 }
18300 else
18301 {
18302 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18303 check if is aligned to 4 - byte. */
18304
18305 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18306 NULL_RTX, 0, OPTAB_WIDEN);
18307
18308 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18309 Pmode, 1, align_4_label);
18310 }
18311
18312 mem = change_address (src, QImode, out);
18313
18314 /* Now compare the bytes. */
18315
18316 /* Compare the first n unaligned byte on a byte per byte basis. */
18317 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18318 QImode, 1, end_0_label);
18319
18320 /* Increment the address. */
18321 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18322
18323 /* Not needed with an alignment of 2 */
18324 if (align != 2)
18325 {
18326 emit_label (align_2_label);
18327
18328 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18329 end_0_label);
18330
18331 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18332
18333 emit_label (align_3_label);
18334 }
18335
18336 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18337 end_0_label);
18338
18339 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18340 }
18341
18342 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18343 align this loop. It gives only huge programs, but does not help to
18344 speed up. */
18345 emit_label (align_4_label);
18346
18347 mem = change_address (src, SImode, out);
18348 emit_move_insn (scratch, mem);
18349 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18350
18351 /* This formula yields a nonzero result iff one of the bytes is zero.
18352 This saves three branches inside loop and many cycles. */
18353
18354 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18355 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18356 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18357 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18358 gen_int_mode (0x80808080, SImode)));
18359 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18360 align_4_label);
18361
18362 if (TARGET_CMOVE)
18363 {
18364 rtx reg = gen_reg_rtx (SImode);
18365 rtx reg2 = gen_reg_rtx (Pmode);
18366 emit_move_insn (reg, tmpreg);
18367 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18368
18369 /* If zero is not in the first two bytes, move two bytes forward. */
18370 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18371 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18372 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18373 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18374 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18375 reg,
18376 tmpreg)));
18377 /* Emit lea manually to avoid clobbering of flags. */
18378 emit_insn (gen_rtx_SET (SImode, reg2,
18379 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18380
18381 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18382 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18383 emit_insn (gen_rtx_SET (VOIDmode, out,
18384 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18385 reg2,
18386 out)));
18387
18388 }
18389 else
18390 {
18391 rtx end_2_label = gen_label_rtx ();
18392 /* Is zero in the first two bytes? */
18393
18394 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18395 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18396 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18397 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18398 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18399 pc_rtx);
18400 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18401 JUMP_LABEL (tmp) = end_2_label;
18402
18403 /* Not in the first two. Move two bytes forward. */
18404 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18405 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18406
18407 emit_label (end_2_label);
18408
18409 }
18410
18411 /* Avoid branch in fixing the byte. */
18412 tmpreg = gen_lowpart (QImode, tmpreg);
18413 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
18414 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18415 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
18416
18417 emit_label (end_0_label);
18418 }
18419
18420 /* Expand strlen. */
18421
18422 int
18423 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
18424 {
18425 rtx addr, scratch1, scratch2, scratch3, scratch4;
18426
18427 /* The generic case of strlen expander is long. Avoid it's
18428 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
18429
18430 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18431 && !TARGET_INLINE_ALL_STRINGOPS
18432 && !optimize_insn_for_size_p ()
18433 && (!CONST_INT_P (align) || INTVAL (align) < 4))
18434 return 0;
18435
18436 addr = force_reg (Pmode, XEXP (src, 0));
18437 scratch1 = gen_reg_rtx (Pmode);
18438
18439 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18440 && !optimize_insn_for_size_p ())
18441 {
18442 /* Well it seems that some optimizer does not combine a call like
18443 foo(strlen(bar), strlen(bar));
18444 when the move and the subtraction is done here. It does calculate
18445 the length just once when these instructions are done inside of
18446 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
18447 often used and I use one fewer register for the lifetime of
18448 output_strlen_unroll() this is better. */
18449
18450 emit_move_insn (out, addr);
18451
18452 ix86_expand_strlensi_unroll_1 (out, src, align);
18453
18454 /* strlensi_unroll_1 returns the address of the zero at the end of
18455 the string, like memchr(), so compute the length by subtracting
18456 the start address. */
18457 emit_insn ((*ix86_gen_sub3) (out, out, addr));
18458 }
18459 else
18460 {
18461 rtx unspec;
18462
18463 /* Can't use this if the user has appropriated eax, ecx, or edi. */
18464 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
18465 return false;
18466
18467 scratch2 = gen_reg_rtx (Pmode);
18468 scratch3 = gen_reg_rtx (Pmode);
18469 scratch4 = force_reg (Pmode, constm1_rtx);
18470
18471 emit_move_insn (scratch3, addr);
18472 eoschar = force_reg (QImode, eoschar);
18473
18474 src = replace_equiv_address_nv (src, scratch3);
18475
18476 /* If .md starts supporting :P, this can be done in .md. */
18477 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
18478 scratch4), UNSPEC_SCAS);
18479 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
18480 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
18481 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
18482 }
18483 return 1;
18484 }
18485
18486 /* For given symbol (function) construct code to compute address of it's PLT
18487 entry in large x86-64 PIC model. */
18488 rtx
18489 construct_plt_address (rtx symbol)
18490 {
18491 rtx tmp = gen_reg_rtx (Pmode);
18492 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
18493
18494 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
18495 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
18496
18497 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
18498 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
18499 return tmp;
18500 }
18501
18502 void
18503 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
18504 rtx callarg2,
18505 rtx pop, int sibcall)
18506 {
18507 rtx use = NULL, call;
18508 enum calling_abi function_call_abi;
18509
18510 if (callarg2 && INTVAL (callarg2) == -2)
18511 function_call_abi = MS_ABI;
18512 else
18513 function_call_abi = SYSV_ABI;
18514 if (pop == const0_rtx)
18515 pop = NULL;
18516 gcc_assert (!TARGET_64BIT || !pop);
18517
18518 if (TARGET_MACHO && !TARGET_64BIT)
18519 {
18520 #if TARGET_MACHO
18521 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
18522 fnaddr = machopic_indirect_call_target (fnaddr);
18523 #endif
18524 }
18525 else
18526 {
18527 /* Static functions and indirect calls don't need the pic register. */
18528 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
18529 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18530 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
18531 use_reg (&use, pic_offset_table_rtx);
18532 }
18533
18534 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
18535 {
18536 rtx al = gen_rtx_REG (QImode, AX_REG);
18537 emit_move_insn (al, callarg2);
18538 use_reg (&use, al);
18539 }
18540
18541 if (ix86_cmodel == CM_LARGE_PIC
18542 && GET_CODE (fnaddr) == MEM
18543 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18544 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
18545 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
18546 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
18547 {
18548 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18549 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18550 }
18551 if (sibcall && TARGET_64BIT
18552 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
18553 {
18554 rtx addr;
18555 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18556 fnaddr = gen_rtx_REG (Pmode, R11_REG);
18557 emit_move_insn (fnaddr, addr);
18558 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18559 }
18560
18561 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
18562 if (retval)
18563 call = gen_rtx_SET (VOIDmode, retval, call);
18564 if (pop)
18565 {
18566 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
18567 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
18568 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
18569 gcc_assert (ix86_cfun_abi () != MS_ABI || function_call_abi != SYSV_ABI);
18570 }
18571 /* We need to represent that SI and DI registers are clobbered
18572 by SYSV calls. */
18573 if (ix86_cfun_abi () == MS_ABI && function_call_abi == SYSV_ABI)
18574 {
18575 static int clobbered_registers[] = {27, 28, 45, 46, 47, 48, 49, 50, 51,
18576 52, SI_REG, DI_REG};
18577 unsigned int i;
18578 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
18579 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
18580 UNSPEC_MS_TO_SYSV_CALL);
18581
18582 vec[0] = call;
18583 vec[1] = unspec;
18584 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
18585 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
18586 ? TImode : DImode,
18587 gen_rtx_REG
18588 (SSE_REGNO_P (clobbered_registers[i])
18589 ? TImode : DImode,
18590 clobbered_registers[i]));
18591
18592 call = gen_rtx_PARALLEL (VOIDmode,
18593 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
18594 + 2, vec));
18595 }
18596
18597 call = emit_call_insn (call);
18598 if (use)
18599 CALL_INSN_FUNCTION_USAGE (call) = use;
18600 }
18601
18602 \f
18603 /* Clear stack slot assignments remembered from previous functions.
18604 This is called from INIT_EXPANDERS once before RTL is emitted for each
18605 function. */
18606
18607 static struct machine_function *
18608 ix86_init_machine_status (void)
18609 {
18610 struct machine_function *f;
18611
18612 f = GGC_CNEW (struct machine_function);
18613 f->use_fast_prologue_epilogue_nregs = -1;
18614 f->tls_descriptor_call_expanded_p = 0;
18615 f->call_abi = DEFAULT_ABI;
18616
18617 return f;
18618 }
18619
18620 /* Return a MEM corresponding to a stack slot with mode MODE.
18621 Allocate a new slot if necessary.
18622
18623 The RTL for a function can have several slots available: N is
18624 which slot to use. */
18625
18626 rtx
18627 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
18628 {
18629 struct stack_local_entry *s;
18630
18631 gcc_assert (n < MAX_386_STACK_LOCALS);
18632
18633 /* Virtual slot is valid only before vregs are instantiated. */
18634 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
18635
18636 for (s = ix86_stack_locals; s; s = s->next)
18637 if (s->mode == mode && s->n == n)
18638 return copy_rtx (s->rtl);
18639
18640 s = (struct stack_local_entry *)
18641 ggc_alloc (sizeof (struct stack_local_entry));
18642 s->n = n;
18643 s->mode = mode;
18644 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18645
18646 s->next = ix86_stack_locals;
18647 ix86_stack_locals = s;
18648 return s->rtl;
18649 }
18650
18651 /* Construct the SYMBOL_REF for the tls_get_addr function. */
18652
18653 static GTY(()) rtx ix86_tls_symbol;
18654 rtx
18655 ix86_tls_get_addr (void)
18656 {
18657
18658 if (!ix86_tls_symbol)
18659 {
18660 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
18661 (TARGET_ANY_GNU_TLS
18662 && !TARGET_64BIT)
18663 ? "___tls_get_addr"
18664 : "__tls_get_addr");
18665 }
18666
18667 return ix86_tls_symbol;
18668 }
18669
18670 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
18671
18672 static GTY(()) rtx ix86_tls_module_base_symbol;
18673 rtx
18674 ix86_tls_module_base (void)
18675 {
18676
18677 if (!ix86_tls_module_base_symbol)
18678 {
18679 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
18680 "_TLS_MODULE_BASE_");
18681 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
18682 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
18683 }
18684
18685 return ix86_tls_module_base_symbol;
18686 }
18687 \f
18688 /* Calculate the length of the memory address in the instruction
18689 encoding. Does not include the one-byte modrm, opcode, or prefix. */
18690
18691 int
18692 memory_address_length (rtx addr)
18693 {
18694 struct ix86_address parts;
18695 rtx base, index, disp;
18696 int len;
18697 int ok;
18698
18699 if (GET_CODE (addr) == PRE_DEC
18700 || GET_CODE (addr) == POST_INC
18701 || GET_CODE (addr) == PRE_MODIFY
18702 || GET_CODE (addr) == POST_MODIFY)
18703 return 0;
18704
18705 ok = ix86_decompose_address (addr, &parts);
18706 gcc_assert (ok);
18707
18708 if (parts.base && GET_CODE (parts.base) == SUBREG)
18709 parts.base = SUBREG_REG (parts.base);
18710 if (parts.index && GET_CODE (parts.index) == SUBREG)
18711 parts.index = SUBREG_REG (parts.index);
18712
18713 base = parts.base;
18714 index = parts.index;
18715 disp = parts.disp;
18716 len = 0;
18717
18718 /* Rule of thumb:
18719 - esp as the base always wants an index,
18720 - ebp as the base always wants a displacement. */
18721
18722 /* Register Indirect. */
18723 if (base && !index && !disp)
18724 {
18725 /* esp (for its index) and ebp (for its displacement) need
18726 the two-byte modrm form. */
18727 if (addr == stack_pointer_rtx
18728 || addr == arg_pointer_rtx
18729 || addr == frame_pointer_rtx
18730 || addr == hard_frame_pointer_rtx)
18731 len = 1;
18732 }
18733
18734 /* Direct Addressing. */
18735 else if (disp && !base && !index)
18736 len = 4;
18737
18738 else
18739 {
18740 /* Find the length of the displacement constant. */
18741 if (disp)
18742 {
18743 if (base && satisfies_constraint_K (disp))
18744 len = 1;
18745 else
18746 len = 4;
18747 }
18748 /* ebp always wants a displacement. */
18749 else if (base == hard_frame_pointer_rtx)
18750 len = 1;
18751
18752 /* An index requires the two-byte modrm form.... */
18753 if (index
18754 /* ...like esp, which always wants an index. */
18755 || base == stack_pointer_rtx
18756 || base == arg_pointer_rtx
18757 || base == frame_pointer_rtx)
18758 len += 1;
18759 }
18760
18761 return len;
18762 }
18763
18764 /* Compute default value for "length_immediate" attribute. When SHORTFORM
18765 is set, expect that insn have 8bit immediate alternative. */
18766 int
18767 ix86_attr_length_immediate_default (rtx insn, int shortform)
18768 {
18769 int len = 0;
18770 int i;
18771 extract_insn_cached (insn);
18772 for (i = recog_data.n_operands - 1; i >= 0; --i)
18773 if (CONSTANT_P (recog_data.operand[i]))
18774 {
18775 gcc_assert (!len);
18776 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
18777 len = 1;
18778 else
18779 {
18780 switch (get_attr_mode (insn))
18781 {
18782 case MODE_QI:
18783 len+=1;
18784 break;
18785 case MODE_HI:
18786 len+=2;
18787 break;
18788 case MODE_SI:
18789 len+=4;
18790 break;
18791 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
18792 case MODE_DI:
18793 len+=4;
18794 break;
18795 default:
18796 fatal_insn ("unknown insn mode", insn);
18797 }
18798 }
18799 }
18800 return len;
18801 }
18802 /* Compute default value for "length_address" attribute. */
18803 int
18804 ix86_attr_length_address_default (rtx insn)
18805 {
18806 int i;
18807
18808 if (get_attr_type (insn) == TYPE_LEA)
18809 {
18810 rtx set = PATTERN (insn);
18811
18812 if (GET_CODE (set) == PARALLEL)
18813 set = XVECEXP (set, 0, 0);
18814
18815 gcc_assert (GET_CODE (set) == SET);
18816
18817 return memory_address_length (SET_SRC (set));
18818 }
18819
18820 extract_insn_cached (insn);
18821 for (i = recog_data.n_operands - 1; i >= 0; --i)
18822 if (MEM_P (recog_data.operand[i]))
18823 {
18824 return memory_address_length (XEXP (recog_data.operand[i], 0));
18825 break;
18826 }
18827 return 0;
18828 }
18829
18830 /* Compute default value for "length_vex" attribute. It includes
18831 2 or 3 byte VEX prefix and 1 opcode byte. */
18832
18833 int
18834 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
18835 int has_vex_w)
18836 {
18837 int i;
18838
18839 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
18840 byte VEX prefix. */
18841 if (!has_0f_opcode || has_vex_w)
18842 return 3 + 1;
18843
18844 /* We can always use 2 byte VEX prefix in 32bit. */
18845 if (!TARGET_64BIT)
18846 return 2 + 1;
18847
18848 extract_insn_cached (insn);
18849
18850 for (i = recog_data.n_operands - 1; i >= 0; --i)
18851 if (REG_P (recog_data.operand[i]))
18852 {
18853 /* REX.W bit uses 3 byte VEX prefix. */
18854 if (GET_MODE (recog_data.operand[i]) == DImode)
18855 return 3 + 1;
18856 }
18857 else
18858 {
18859 /* REX.X or REX.B bits use 3 byte VEX prefix. */
18860 if (MEM_P (recog_data.operand[i])
18861 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
18862 return 3 + 1;
18863 }
18864
18865 return 2 + 1;
18866 }
18867 \f
18868 /* Return the maximum number of instructions a cpu can issue. */
18869
18870 static int
18871 ix86_issue_rate (void)
18872 {
18873 switch (ix86_tune)
18874 {
18875 case PROCESSOR_PENTIUM:
18876 case PROCESSOR_K6:
18877 return 2;
18878
18879 case PROCESSOR_PENTIUMPRO:
18880 case PROCESSOR_PENTIUM4:
18881 case PROCESSOR_ATHLON:
18882 case PROCESSOR_K8:
18883 case PROCESSOR_AMDFAM10:
18884 case PROCESSOR_NOCONA:
18885 case PROCESSOR_GENERIC32:
18886 case PROCESSOR_GENERIC64:
18887 return 3;
18888
18889 case PROCESSOR_CORE2:
18890 return 4;
18891
18892 default:
18893 return 1;
18894 }
18895 }
18896
18897 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
18898 by DEP_INSN and nothing set by DEP_INSN. */
18899
18900 static int
18901 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
18902 {
18903 rtx set, set2;
18904
18905 /* Simplify the test for uninteresting insns. */
18906 if (insn_type != TYPE_SETCC
18907 && insn_type != TYPE_ICMOV
18908 && insn_type != TYPE_FCMOV
18909 && insn_type != TYPE_IBR)
18910 return 0;
18911
18912 if ((set = single_set (dep_insn)) != 0)
18913 {
18914 set = SET_DEST (set);
18915 set2 = NULL_RTX;
18916 }
18917 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
18918 && XVECLEN (PATTERN (dep_insn), 0) == 2
18919 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
18920 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
18921 {
18922 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
18923 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
18924 }
18925 else
18926 return 0;
18927
18928 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
18929 return 0;
18930
18931 /* This test is true if the dependent insn reads the flags but
18932 not any other potentially set register. */
18933 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
18934 return 0;
18935
18936 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
18937 return 0;
18938
18939 return 1;
18940 }
18941
18942 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
18943 address with operands set by DEP_INSN. */
18944
18945 static int
18946 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
18947 {
18948 rtx addr;
18949
18950 if (insn_type == TYPE_LEA
18951 && TARGET_PENTIUM)
18952 {
18953 addr = PATTERN (insn);
18954
18955 if (GET_CODE (addr) == PARALLEL)
18956 addr = XVECEXP (addr, 0, 0);
18957
18958 gcc_assert (GET_CODE (addr) == SET);
18959
18960 addr = SET_SRC (addr);
18961 }
18962 else
18963 {
18964 int i;
18965 extract_insn_cached (insn);
18966 for (i = recog_data.n_operands - 1; i >= 0; --i)
18967 if (MEM_P (recog_data.operand[i]))
18968 {
18969 addr = XEXP (recog_data.operand[i], 0);
18970 goto found;
18971 }
18972 return 0;
18973 found:;
18974 }
18975
18976 return modified_in_p (addr, dep_insn);
18977 }
18978
18979 static int
18980 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
18981 {
18982 enum attr_type insn_type, dep_insn_type;
18983 enum attr_memory memory;
18984 rtx set, set2;
18985 int dep_insn_code_number;
18986
18987 /* Anti and output dependencies have zero cost on all CPUs. */
18988 if (REG_NOTE_KIND (link) != 0)
18989 return 0;
18990
18991 dep_insn_code_number = recog_memoized (dep_insn);
18992
18993 /* If we can't recognize the insns, we can't really do anything. */
18994 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
18995 return cost;
18996
18997 insn_type = get_attr_type (insn);
18998 dep_insn_type = get_attr_type (dep_insn);
18999
19000 switch (ix86_tune)
19001 {
19002 case PROCESSOR_PENTIUM:
19003 /* Address Generation Interlock adds a cycle of latency. */
19004 if (ix86_agi_dependent (insn, dep_insn, insn_type))
19005 cost += 1;
19006
19007 /* ??? Compares pair with jump/setcc. */
19008 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19009 cost = 0;
19010
19011 /* Floating point stores require value to be ready one cycle earlier. */
19012 if (insn_type == TYPE_FMOV
19013 && get_attr_memory (insn) == MEMORY_STORE
19014 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19015 cost += 1;
19016 break;
19017
19018 case PROCESSOR_PENTIUMPRO:
19019 memory = get_attr_memory (insn);
19020
19021 /* INT->FP conversion is expensive. */
19022 if (get_attr_fp_int_src (dep_insn))
19023 cost += 5;
19024
19025 /* There is one cycle extra latency between an FP op and a store. */
19026 if (insn_type == TYPE_FMOV
19027 && (set = single_set (dep_insn)) != NULL_RTX
19028 && (set2 = single_set (insn)) != NULL_RTX
19029 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19030 && MEM_P (SET_DEST (set2)))
19031 cost += 1;
19032
19033 /* Show ability of reorder buffer to hide latency of load by executing
19034 in parallel with previous instruction in case
19035 previous instruction is not needed to compute the address. */
19036 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19037 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19038 {
19039 /* Claim moves to take one cycle, as core can issue one load
19040 at time and the next load can start cycle later. */
19041 if (dep_insn_type == TYPE_IMOV
19042 || dep_insn_type == TYPE_FMOV)
19043 cost = 1;
19044 else if (cost > 1)
19045 cost--;
19046 }
19047 break;
19048
19049 case PROCESSOR_K6:
19050 memory = get_attr_memory (insn);
19051
19052 /* The esp dependency is resolved before the instruction is really
19053 finished. */
19054 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19055 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19056 return 1;
19057
19058 /* INT->FP conversion is expensive. */
19059 if (get_attr_fp_int_src (dep_insn))
19060 cost += 5;
19061
19062 /* Show ability of reorder buffer to hide latency of load by executing
19063 in parallel with previous instruction in case
19064 previous instruction is not needed to compute the address. */
19065 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19066 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19067 {
19068 /* Claim moves to take one cycle, as core can issue one load
19069 at time and the next load can start cycle later. */
19070 if (dep_insn_type == TYPE_IMOV
19071 || dep_insn_type == TYPE_FMOV)
19072 cost = 1;
19073 else if (cost > 2)
19074 cost -= 2;
19075 else
19076 cost = 1;
19077 }
19078 break;
19079
19080 case PROCESSOR_ATHLON:
19081 case PROCESSOR_K8:
19082 case PROCESSOR_AMDFAM10:
19083 case PROCESSOR_GENERIC32:
19084 case PROCESSOR_GENERIC64:
19085 memory = get_attr_memory (insn);
19086
19087 /* Show ability of reorder buffer to hide latency of load by executing
19088 in parallel with previous instruction in case
19089 previous instruction is not needed to compute the address. */
19090 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19091 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19092 {
19093 enum attr_unit unit = get_attr_unit (insn);
19094 int loadcost = 3;
19095
19096 /* Because of the difference between the length of integer and
19097 floating unit pipeline preparation stages, the memory operands
19098 for floating point are cheaper.
19099
19100 ??? For Athlon it the difference is most probably 2. */
19101 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19102 loadcost = 3;
19103 else
19104 loadcost = TARGET_ATHLON ? 2 : 0;
19105
19106 if (cost >= loadcost)
19107 cost -= loadcost;
19108 else
19109 cost = 0;
19110 }
19111
19112 default:
19113 break;
19114 }
19115
19116 return cost;
19117 }
19118
19119 /* How many alternative schedules to try. This should be as wide as the
19120 scheduling freedom in the DFA, but no wider. Making this value too
19121 large results extra work for the scheduler. */
19122
19123 static int
19124 ia32_multipass_dfa_lookahead (void)
19125 {
19126 switch (ix86_tune)
19127 {
19128 case PROCESSOR_PENTIUM:
19129 return 2;
19130
19131 case PROCESSOR_PENTIUMPRO:
19132 case PROCESSOR_K6:
19133 return 1;
19134
19135 default:
19136 return 0;
19137 }
19138 }
19139
19140 \f
19141 /* Compute the alignment given to a constant that is being placed in memory.
19142 EXP is the constant and ALIGN is the alignment that the object would
19143 ordinarily have.
19144 The value of this function is used instead of that alignment to align
19145 the object. */
19146
19147 int
19148 ix86_constant_alignment (tree exp, int align)
19149 {
19150 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19151 || TREE_CODE (exp) == INTEGER_CST)
19152 {
19153 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19154 return 64;
19155 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19156 return 128;
19157 }
19158 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19159 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19160 return BITS_PER_WORD;
19161
19162 return align;
19163 }
19164
19165 /* Compute the alignment for a static variable.
19166 TYPE is the data type, and ALIGN is the alignment that
19167 the object would ordinarily have. The value of this function is used
19168 instead of that alignment to align the object. */
19169
19170 int
19171 ix86_data_alignment (tree type, int align)
19172 {
19173 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19174
19175 if (AGGREGATE_TYPE_P (type)
19176 && TYPE_SIZE (type)
19177 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19178 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19179 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19180 && align < max_align)
19181 align = max_align;
19182
19183 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19184 to 16byte boundary. */
19185 if (TARGET_64BIT)
19186 {
19187 if (AGGREGATE_TYPE_P (type)
19188 && TYPE_SIZE (type)
19189 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19190 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19191 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19192 return 128;
19193 }
19194
19195 if (TREE_CODE (type) == ARRAY_TYPE)
19196 {
19197 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19198 return 64;
19199 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19200 return 128;
19201 }
19202 else if (TREE_CODE (type) == COMPLEX_TYPE)
19203 {
19204
19205 if (TYPE_MODE (type) == DCmode && align < 64)
19206 return 64;
19207 if ((TYPE_MODE (type) == XCmode
19208 || TYPE_MODE (type) == TCmode) && align < 128)
19209 return 128;
19210 }
19211 else if ((TREE_CODE (type) == RECORD_TYPE
19212 || TREE_CODE (type) == UNION_TYPE
19213 || TREE_CODE (type) == QUAL_UNION_TYPE)
19214 && TYPE_FIELDS (type))
19215 {
19216 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19217 return 64;
19218 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19219 return 128;
19220 }
19221 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19222 || TREE_CODE (type) == INTEGER_TYPE)
19223 {
19224 if (TYPE_MODE (type) == DFmode && align < 64)
19225 return 64;
19226 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19227 return 128;
19228 }
19229
19230 return align;
19231 }
19232
19233 /* Compute the alignment for a local variable or a stack slot. TYPE is
19234 the data type, MODE is the widest mode available and ALIGN is the
19235 alignment that the object would ordinarily have. The value of this
19236 macro is used instead of that alignment to align the object. */
19237
19238 unsigned int
19239 ix86_local_alignment (tree type, enum machine_mode mode,
19240 unsigned int align)
19241 {
19242 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19243 register in MODE. We will return the largest alignment of XF
19244 and DF. */
19245 if (!type)
19246 {
19247 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19248 align = GET_MODE_ALIGNMENT (DFmode);
19249 return align;
19250 }
19251
19252 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19253 to 16byte boundary. */
19254 if (TARGET_64BIT)
19255 {
19256 if (AGGREGATE_TYPE_P (type)
19257 && TYPE_SIZE (type)
19258 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19259 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19260 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19261 return 128;
19262 }
19263 if (TREE_CODE (type) == ARRAY_TYPE)
19264 {
19265 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19266 return 64;
19267 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19268 return 128;
19269 }
19270 else if (TREE_CODE (type) == COMPLEX_TYPE)
19271 {
19272 if (TYPE_MODE (type) == DCmode && align < 64)
19273 return 64;
19274 if ((TYPE_MODE (type) == XCmode
19275 || TYPE_MODE (type) == TCmode) && align < 128)
19276 return 128;
19277 }
19278 else if ((TREE_CODE (type) == RECORD_TYPE
19279 || TREE_CODE (type) == UNION_TYPE
19280 || TREE_CODE (type) == QUAL_UNION_TYPE)
19281 && TYPE_FIELDS (type))
19282 {
19283 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19284 return 64;
19285 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19286 return 128;
19287 }
19288 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19289 || TREE_CODE (type) == INTEGER_TYPE)
19290 {
19291
19292 if (TYPE_MODE (type) == DFmode && align < 64)
19293 return 64;
19294 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19295 return 128;
19296 }
19297 return align;
19298 }
19299 \f
19300 /* Emit RTL insns to initialize the variable parts of a trampoline.
19301 FNADDR is an RTX for the address of the function's pure code.
19302 CXT is an RTX for the static chain value for the function. */
19303 void
19304 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19305 {
19306 if (!TARGET_64BIT)
19307 {
19308 /* Compute offset from the end of the jmp to the target function. */
19309 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19310 plus_constant (tramp, 10),
19311 NULL_RTX, 1, OPTAB_DIRECT);
19312 emit_move_insn (gen_rtx_MEM (QImode, tramp),
19313 gen_int_mode (0xb9, QImode));
19314 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19315 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19316 gen_int_mode (0xe9, QImode));
19317 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19318 }
19319 else
19320 {
19321 int offset = 0;
19322 /* Try to load address using shorter movl instead of movabs.
19323 We may want to support movq for kernel mode, but kernel does not use
19324 trampolines at the moment. */
19325 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19326 {
19327 fnaddr = copy_to_mode_reg (DImode, fnaddr);
19328 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19329 gen_int_mode (0xbb41, HImode));
19330 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19331 gen_lowpart (SImode, fnaddr));
19332 offset += 6;
19333 }
19334 else
19335 {
19336 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19337 gen_int_mode (0xbb49, HImode));
19338 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19339 fnaddr);
19340 offset += 10;
19341 }
19342 /* Load static chain using movabs to r10. */
19343 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19344 gen_int_mode (0xba49, HImode));
19345 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19346 cxt);
19347 offset += 10;
19348 /* Jump to the r11 */
19349 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19350 gen_int_mode (0xff49, HImode));
19351 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19352 gen_int_mode (0xe3, QImode));
19353 offset += 3;
19354 gcc_assert (offset <= TRAMPOLINE_SIZE);
19355 }
19356
19357 #ifdef ENABLE_EXECUTE_STACK
19358 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19359 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19360 #endif
19361 }
19362 \f
19363 /* Codes for all the SSE/MMX builtins. */
19364 enum ix86_builtins
19365 {
19366 IX86_BUILTIN_ADDPS,
19367 IX86_BUILTIN_ADDSS,
19368 IX86_BUILTIN_DIVPS,
19369 IX86_BUILTIN_DIVSS,
19370 IX86_BUILTIN_MULPS,
19371 IX86_BUILTIN_MULSS,
19372 IX86_BUILTIN_SUBPS,
19373 IX86_BUILTIN_SUBSS,
19374
19375 IX86_BUILTIN_CMPEQPS,
19376 IX86_BUILTIN_CMPLTPS,
19377 IX86_BUILTIN_CMPLEPS,
19378 IX86_BUILTIN_CMPGTPS,
19379 IX86_BUILTIN_CMPGEPS,
19380 IX86_BUILTIN_CMPNEQPS,
19381 IX86_BUILTIN_CMPNLTPS,
19382 IX86_BUILTIN_CMPNLEPS,
19383 IX86_BUILTIN_CMPNGTPS,
19384 IX86_BUILTIN_CMPNGEPS,
19385 IX86_BUILTIN_CMPORDPS,
19386 IX86_BUILTIN_CMPUNORDPS,
19387 IX86_BUILTIN_CMPEQSS,
19388 IX86_BUILTIN_CMPLTSS,
19389 IX86_BUILTIN_CMPLESS,
19390 IX86_BUILTIN_CMPNEQSS,
19391 IX86_BUILTIN_CMPNLTSS,
19392 IX86_BUILTIN_CMPNLESS,
19393 IX86_BUILTIN_CMPNGTSS,
19394 IX86_BUILTIN_CMPNGESS,
19395 IX86_BUILTIN_CMPORDSS,
19396 IX86_BUILTIN_CMPUNORDSS,
19397
19398 IX86_BUILTIN_COMIEQSS,
19399 IX86_BUILTIN_COMILTSS,
19400 IX86_BUILTIN_COMILESS,
19401 IX86_BUILTIN_COMIGTSS,
19402 IX86_BUILTIN_COMIGESS,
19403 IX86_BUILTIN_COMINEQSS,
19404 IX86_BUILTIN_UCOMIEQSS,
19405 IX86_BUILTIN_UCOMILTSS,
19406 IX86_BUILTIN_UCOMILESS,
19407 IX86_BUILTIN_UCOMIGTSS,
19408 IX86_BUILTIN_UCOMIGESS,
19409 IX86_BUILTIN_UCOMINEQSS,
19410
19411 IX86_BUILTIN_CVTPI2PS,
19412 IX86_BUILTIN_CVTPS2PI,
19413 IX86_BUILTIN_CVTSI2SS,
19414 IX86_BUILTIN_CVTSI642SS,
19415 IX86_BUILTIN_CVTSS2SI,
19416 IX86_BUILTIN_CVTSS2SI64,
19417 IX86_BUILTIN_CVTTPS2PI,
19418 IX86_BUILTIN_CVTTSS2SI,
19419 IX86_BUILTIN_CVTTSS2SI64,
19420
19421 IX86_BUILTIN_MAXPS,
19422 IX86_BUILTIN_MAXSS,
19423 IX86_BUILTIN_MINPS,
19424 IX86_BUILTIN_MINSS,
19425
19426 IX86_BUILTIN_LOADUPS,
19427 IX86_BUILTIN_STOREUPS,
19428 IX86_BUILTIN_MOVSS,
19429
19430 IX86_BUILTIN_MOVHLPS,
19431 IX86_BUILTIN_MOVLHPS,
19432 IX86_BUILTIN_LOADHPS,
19433 IX86_BUILTIN_LOADLPS,
19434 IX86_BUILTIN_STOREHPS,
19435 IX86_BUILTIN_STORELPS,
19436
19437 IX86_BUILTIN_MASKMOVQ,
19438 IX86_BUILTIN_MOVMSKPS,
19439 IX86_BUILTIN_PMOVMSKB,
19440
19441 IX86_BUILTIN_MOVNTPS,
19442 IX86_BUILTIN_MOVNTQ,
19443
19444 IX86_BUILTIN_LOADDQU,
19445 IX86_BUILTIN_STOREDQU,
19446
19447 IX86_BUILTIN_PACKSSWB,
19448 IX86_BUILTIN_PACKSSDW,
19449 IX86_BUILTIN_PACKUSWB,
19450
19451 IX86_BUILTIN_PADDB,
19452 IX86_BUILTIN_PADDW,
19453 IX86_BUILTIN_PADDD,
19454 IX86_BUILTIN_PADDQ,
19455 IX86_BUILTIN_PADDSB,
19456 IX86_BUILTIN_PADDSW,
19457 IX86_BUILTIN_PADDUSB,
19458 IX86_BUILTIN_PADDUSW,
19459 IX86_BUILTIN_PSUBB,
19460 IX86_BUILTIN_PSUBW,
19461 IX86_BUILTIN_PSUBD,
19462 IX86_BUILTIN_PSUBQ,
19463 IX86_BUILTIN_PSUBSB,
19464 IX86_BUILTIN_PSUBSW,
19465 IX86_BUILTIN_PSUBUSB,
19466 IX86_BUILTIN_PSUBUSW,
19467
19468 IX86_BUILTIN_PAND,
19469 IX86_BUILTIN_PANDN,
19470 IX86_BUILTIN_POR,
19471 IX86_BUILTIN_PXOR,
19472
19473 IX86_BUILTIN_PAVGB,
19474 IX86_BUILTIN_PAVGW,
19475
19476 IX86_BUILTIN_PCMPEQB,
19477 IX86_BUILTIN_PCMPEQW,
19478 IX86_BUILTIN_PCMPEQD,
19479 IX86_BUILTIN_PCMPGTB,
19480 IX86_BUILTIN_PCMPGTW,
19481 IX86_BUILTIN_PCMPGTD,
19482
19483 IX86_BUILTIN_PMADDWD,
19484
19485 IX86_BUILTIN_PMAXSW,
19486 IX86_BUILTIN_PMAXUB,
19487 IX86_BUILTIN_PMINSW,
19488 IX86_BUILTIN_PMINUB,
19489
19490 IX86_BUILTIN_PMULHUW,
19491 IX86_BUILTIN_PMULHW,
19492 IX86_BUILTIN_PMULLW,
19493
19494 IX86_BUILTIN_PSADBW,
19495 IX86_BUILTIN_PSHUFW,
19496
19497 IX86_BUILTIN_PSLLW,
19498 IX86_BUILTIN_PSLLD,
19499 IX86_BUILTIN_PSLLQ,
19500 IX86_BUILTIN_PSRAW,
19501 IX86_BUILTIN_PSRAD,
19502 IX86_BUILTIN_PSRLW,
19503 IX86_BUILTIN_PSRLD,
19504 IX86_BUILTIN_PSRLQ,
19505 IX86_BUILTIN_PSLLWI,
19506 IX86_BUILTIN_PSLLDI,
19507 IX86_BUILTIN_PSLLQI,
19508 IX86_BUILTIN_PSRAWI,
19509 IX86_BUILTIN_PSRADI,
19510 IX86_BUILTIN_PSRLWI,
19511 IX86_BUILTIN_PSRLDI,
19512 IX86_BUILTIN_PSRLQI,
19513
19514 IX86_BUILTIN_PUNPCKHBW,
19515 IX86_BUILTIN_PUNPCKHWD,
19516 IX86_BUILTIN_PUNPCKHDQ,
19517 IX86_BUILTIN_PUNPCKLBW,
19518 IX86_BUILTIN_PUNPCKLWD,
19519 IX86_BUILTIN_PUNPCKLDQ,
19520
19521 IX86_BUILTIN_SHUFPS,
19522
19523 IX86_BUILTIN_RCPPS,
19524 IX86_BUILTIN_RCPSS,
19525 IX86_BUILTIN_RSQRTPS,
19526 IX86_BUILTIN_RSQRTPS_NR,
19527 IX86_BUILTIN_RSQRTSS,
19528 IX86_BUILTIN_RSQRTF,
19529 IX86_BUILTIN_SQRTPS,
19530 IX86_BUILTIN_SQRTPS_NR,
19531 IX86_BUILTIN_SQRTSS,
19532
19533 IX86_BUILTIN_UNPCKHPS,
19534 IX86_BUILTIN_UNPCKLPS,
19535
19536 IX86_BUILTIN_ANDPS,
19537 IX86_BUILTIN_ANDNPS,
19538 IX86_BUILTIN_ORPS,
19539 IX86_BUILTIN_XORPS,
19540
19541 IX86_BUILTIN_EMMS,
19542 IX86_BUILTIN_LDMXCSR,
19543 IX86_BUILTIN_STMXCSR,
19544 IX86_BUILTIN_SFENCE,
19545
19546 /* 3DNow! Original */
19547 IX86_BUILTIN_FEMMS,
19548 IX86_BUILTIN_PAVGUSB,
19549 IX86_BUILTIN_PF2ID,
19550 IX86_BUILTIN_PFACC,
19551 IX86_BUILTIN_PFADD,
19552 IX86_BUILTIN_PFCMPEQ,
19553 IX86_BUILTIN_PFCMPGE,
19554 IX86_BUILTIN_PFCMPGT,
19555 IX86_BUILTIN_PFMAX,
19556 IX86_BUILTIN_PFMIN,
19557 IX86_BUILTIN_PFMUL,
19558 IX86_BUILTIN_PFRCP,
19559 IX86_BUILTIN_PFRCPIT1,
19560 IX86_BUILTIN_PFRCPIT2,
19561 IX86_BUILTIN_PFRSQIT1,
19562 IX86_BUILTIN_PFRSQRT,
19563 IX86_BUILTIN_PFSUB,
19564 IX86_BUILTIN_PFSUBR,
19565 IX86_BUILTIN_PI2FD,
19566 IX86_BUILTIN_PMULHRW,
19567
19568 /* 3DNow! Athlon Extensions */
19569 IX86_BUILTIN_PF2IW,
19570 IX86_BUILTIN_PFNACC,
19571 IX86_BUILTIN_PFPNACC,
19572 IX86_BUILTIN_PI2FW,
19573 IX86_BUILTIN_PSWAPDSI,
19574 IX86_BUILTIN_PSWAPDSF,
19575
19576 /* SSE2 */
19577 IX86_BUILTIN_ADDPD,
19578 IX86_BUILTIN_ADDSD,
19579 IX86_BUILTIN_DIVPD,
19580 IX86_BUILTIN_DIVSD,
19581 IX86_BUILTIN_MULPD,
19582 IX86_BUILTIN_MULSD,
19583 IX86_BUILTIN_SUBPD,
19584 IX86_BUILTIN_SUBSD,
19585
19586 IX86_BUILTIN_CMPEQPD,
19587 IX86_BUILTIN_CMPLTPD,
19588 IX86_BUILTIN_CMPLEPD,
19589 IX86_BUILTIN_CMPGTPD,
19590 IX86_BUILTIN_CMPGEPD,
19591 IX86_BUILTIN_CMPNEQPD,
19592 IX86_BUILTIN_CMPNLTPD,
19593 IX86_BUILTIN_CMPNLEPD,
19594 IX86_BUILTIN_CMPNGTPD,
19595 IX86_BUILTIN_CMPNGEPD,
19596 IX86_BUILTIN_CMPORDPD,
19597 IX86_BUILTIN_CMPUNORDPD,
19598 IX86_BUILTIN_CMPEQSD,
19599 IX86_BUILTIN_CMPLTSD,
19600 IX86_BUILTIN_CMPLESD,
19601 IX86_BUILTIN_CMPNEQSD,
19602 IX86_BUILTIN_CMPNLTSD,
19603 IX86_BUILTIN_CMPNLESD,
19604 IX86_BUILTIN_CMPORDSD,
19605 IX86_BUILTIN_CMPUNORDSD,
19606
19607 IX86_BUILTIN_COMIEQSD,
19608 IX86_BUILTIN_COMILTSD,
19609 IX86_BUILTIN_COMILESD,
19610 IX86_BUILTIN_COMIGTSD,
19611 IX86_BUILTIN_COMIGESD,
19612 IX86_BUILTIN_COMINEQSD,
19613 IX86_BUILTIN_UCOMIEQSD,
19614 IX86_BUILTIN_UCOMILTSD,
19615 IX86_BUILTIN_UCOMILESD,
19616 IX86_BUILTIN_UCOMIGTSD,
19617 IX86_BUILTIN_UCOMIGESD,
19618 IX86_BUILTIN_UCOMINEQSD,
19619
19620 IX86_BUILTIN_MAXPD,
19621 IX86_BUILTIN_MAXSD,
19622 IX86_BUILTIN_MINPD,
19623 IX86_BUILTIN_MINSD,
19624
19625 IX86_BUILTIN_ANDPD,
19626 IX86_BUILTIN_ANDNPD,
19627 IX86_BUILTIN_ORPD,
19628 IX86_BUILTIN_XORPD,
19629
19630 IX86_BUILTIN_SQRTPD,
19631 IX86_BUILTIN_SQRTSD,
19632
19633 IX86_BUILTIN_UNPCKHPD,
19634 IX86_BUILTIN_UNPCKLPD,
19635
19636 IX86_BUILTIN_SHUFPD,
19637
19638 IX86_BUILTIN_LOADUPD,
19639 IX86_BUILTIN_STOREUPD,
19640 IX86_BUILTIN_MOVSD,
19641
19642 IX86_BUILTIN_LOADHPD,
19643 IX86_BUILTIN_LOADLPD,
19644
19645 IX86_BUILTIN_CVTDQ2PD,
19646 IX86_BUILTIN_CVTDQ2PS,
19647
19648 IX86_BUILTIN_CVTPD2DQ,
19649 IX86_BUILTIN_CVTPD2PI,
19650 IX86_BUILTIN_CVTPD2PS,
19651 IX86_BUILTIN_CVTTPD2DQ,
19652 IX86_BUILTIN_CVTTPD2PI,
19653
19654 IX86_BUILTIN_CVTPI2PD,
19655 IX86_BUILTIN_CVTSI2SD,
19656 IX86_BUILTIN_CVTSI642SD,
19657
19658 IX86_BUILTIN_CVTSD2SI,
19659 IX86_BUILTIN_CVTSD2SI64,
19660 IX86_BUILTIN_CVTSD2SS,
19661 IX86_BUILTIN_CVTSS2SD,
19662 IX86_BUILTIN_CVTTSD2SI,
19663 IX86_BUILTIN_CVTTSD2SI64,
19664
19665 IX86_BUILTIN_CVTPS2DQ,
19666 IX86_BUILTIN_CVTPS2PD,
19667 IX86_BUILTIN_CVTTPS2DQ,
19668
19669 IX86_BUILTIN_MOVNTI,
19670 IX86_BUILTIN_MOVNTPD,
19671 IX86_BUILTIN_MOVNTDQ,
19672
19673 IX86_BUILTIN_MOVQ128,
19674
19675 /* SSE2 MMX */
19676 IX86_BUILTIN_MASKMOVDQU,
19677 IX86_BUILTIN_MOVMSKPD,
19678 IX86_BUILTIN_PMOVMSKB128,
19679
19680 IX86_BUILTIN_PACKSSWB128,
19681 IX86_BUILTIN_PACKSSDW128,
19682 IX86_BUILTIN_PACKUSWB128,
19683
19684 IX86_BUILTIN_PADDB128,
19685 IX86_BUILTIN_PADDW128,
19686 IX86_BUILTIN_PADDD128,
19687 IX86_BUILTIN_PADDQ128,
19688 IX86_BUILTIN_PADDSB128,
19689 IX86_BUILTIN_PADDSW128,
19690 IX86_BUILTIN_PADDUSB128,
19691 IX86_BUILTIN_PADDUSW128,
19692 IX86_BUILTIN_PSUBB128,
19693 IX86_BUILTIN_PSUBW128,
19694 IX86_BUILTIN_PSUBD128,
19695 IX86_BUILTIN_PSUBQ128,
19696 IX86_BUILTIN_PSUBSB128,
19697 IX86_BUILTIN_PSUBSW128,
19698 IX86_BUILTIN_PSUBUSB128,
19699 IX86_BUILTIN_PSUBUSW128,
19700
19701 IX86_BUILTIN_PAND128,
19702 IX86_BUILTIN_PANDN128,
19703 IX86_BUILTIN_POR128,
19704 IX86_BUILTIN_PXOR128,
19705
19706 IX86_BUILTIN_PAVGB128,
19707 IX86_BUILTIN_PAVGW128,
19708
19709 IX86_BUILTIN_PCMPEQB128,
19710 IX86_BUILTIN_PCMPEQW128,
19711 IX86_BUILTIN_PCMPEQD128,
19712 IX86_BUILTIN_PCMPGTB128,
19713 IX86_BUILTIN_PCMPGTW128,
19714 IX86_BUILTIN_PCMPGTD128,
19715
19716 IX86_BUILTIN_PMADDWD128,
19717
19718 IX86_BUILTIN_PMAXSW128,
19719 IX86_BUILTIN_PMAXUB128,
19720 IX86_BUILTIN_PMINSW128,
19721 IX86_BUILTIN_PMINUB128,
19722
19723 IX86_BUILTIN_PMULUDQ,
19724 IX86_BUILTIN_PMULUDQ128,
19725 IX86_BUILTIN_PMULHUW128,
19726 IX86_BUILTIN_PMULHW128,
19727 IX86_BUILTIN_PMULLW128,
19728
19729 IX86_BUILTIN_PSADBW128,
19730 IX86_BUILTIN_PSHUFHW,
19731 IX86_BUILTIN_PSHUFLW,
19732 IX86_BUILTIN_PSHUFD,
19733
19734 IX86_BUILTIN_PSLLDQI128,
19735 IX86_BUILTIN_PSLLWI128,
19736 IX86_BUILTIN_PSLLDI128,
19737 IX86_BUILTIN_PSLLQI128,
19738 IX86_BUILTIN_PSRAWI128,
19739 IX86_BUILTIN_PSRADI128,
19740 IX86_BUILTIN_PSRLDQI128,
19741 IX86_BUILTIN_PSRLWI128,
19742 IX86_BUILTIN_PSRLDI128,
19743 IX86_BUILTIN_PSRLQI128,
19744
19745 IX86_BUILTIN_PSLLDQ128,
19746 IX86_BUILTIN_PSLLW128,
19747 IX86_BUILTIN_PSLLD128,
19748 IX86_BUILTIN_PSLLQ128,
19749 IX86_BUILTIN_PSRAW128,
19750 IX86_BUILTIN_PSRAD128,
19751 IX86_BUILTIN_PSRLW128,
19752 IX86_BUILTIN_PSRLD128,
19753 IX86_BUILTIN_PSRLQ128,
19754
19755 IX86_BUILTIN_PUNPCKHBW128,
19756 IX86_BUILTIN_PUNPCKHWD128,
19757 IX86_BUILTIN_PUNPCKHDQ128,
19758 IX86_BUILTIN_PUNPCKHQDQ128,
19759 IX86_BUILTIN_PUNPCKLBW128,
19760 IX86_BUILTIN_PUNPCKLWD128,
19761 IX86_BUILTIN_PUNPCKLDQ128,
19762 IX86_BUILTIN_PUNPCKLQDQ128,
19763
19764 IX86_BUILTIN_CLFLUSH,
19765 IX86_BUILTIN_MFENCE,
19766 IX86_BUILTIN_LFENCE,
19767
19768 /* SSE3. */
19769 IX86_BUILTIN_ADDSUBPS,
19770 IX86_BUILTIN_HADDPS,
19771 IX86_BUILTIN_HSUBPS,
19772 IX86_BUILTIN_MOVSHDUP,
19773 IX86_BUILTIN_MOVSLDUP,
19774 IX86_BUILTIN_ADDSUBPD,
19775 IX86_BUILTIN_HADDPD,
19776 IX86_BUILTIN_HSUBPD,
19777 IX86_BUILTIN_LDDQU,
19778
19779 IX86_BUILTIN_MONITOR,
19780 IX86_BUILTIN_MWAIT,
19781
19782 /* SSSE3. */
19783 IX86_BUILTIN_PHADDW,
19784 IX86_BUILTIN_PHADDD,
19785 IX86_BUILTIN_PHADDSW,
19786 IX86_BUILTIN_PHSUBW,
19787 IX86_BUILTIN_PHSUBD,
19788 IX86_BUILTIN_PHSUBSW,
19789 IX86_BUILTIN_PMADDUBSW,
19790 IX86_BUILTIN_PMULHRSW,
19791 IX86_BUILTIN_PSHUFB,
19792 IX86_BUILTIN_PSIGNB,
19793 IX86_BUILTIN_PSIGNW,
19794 IX86_BUILTIN_PSIGND,
19795 IX86_BUILTIN_PALIGNR,
19796 IX86_BUILTIN_PABSB,
19797 IX86_BUILTIN_PABSW,
19798 IX86_BUILTIN_PABSD,
19799
19800 IX86_BUILTIN_PHADDW128,
19801 IX86_BUILTIN_PHADDD128,
19802 IX86_BUILTIN_PHADDSW128,
19803 IX86_BUILTIN_PHSUBW128,
19804 IX86_BUILTIN_PHSUBD128,
19805 IX86_BUILTIN_PHSUBSW128,
19806 IX86_BUILTIN_PMADDUBSW128,
19807 IX86_BUILTIN_PMULHRSW128,
19808 IX86_BUILTIN_PSHUFB128,
19809 IX86_BUILTIN_PSIGNB128,
19810 IX86_BUILTIN_PSIGNW128,
19811 IX86_BUILTIN_PSIGND128,
19812 IX86_BUILTIN_PALIGNR128,
19813 IX86_BUILTIN_PABSB128,
19814 IX86_BUILTIN_PABSW128,
19815 IX86_BUILTIN_PABSD128,
19816
19817 /* AMDFAM10 - SSE4A New Instructions. */
19818 IX86_BUILTIN_MOVNTSD,
19819 IX86_BUILTIN_MOVNTSS,
19820 IX86_BUILTIN_EXTRQI,
19821 IX86_BUILTIN_EXTRQ,
19822 IX86_BUILTIN_INSERTQI,
19823 IX86_BUILTIN_INSERTQ,
19824
19825 /* SSE4.1. */
19826 IX86_BUILTIN_BLENDPD,
19827 IX86_BUILTIN_BLENDPS,
19828 IX86_BUILTIN_BLENDVPD,
19829 IX86_BUILTIN_BLENDVPS,
19830 IX86_BUILTIN_PBLENDVB128,
19831 IX86_BUILTIN_PBLENDW128,
19832
19833 IX86_BUILTIN_DPPD,
19834 IX86_BUILTIN_DPPS,
19835
19836 IX86_BUILTIN_INSERTPS128,
19837
19838 IX86_BUILTIN_MOVNTDQA,
19839 IX86_BUILTIN_MPSADBW128,
19840 IX86_BUILTIN_PACKUSDW128,
19841 IX86_BUILTIN_PCMPEQQ,
19842 IX86_BUILTIN_PHMINPOSUW128,
19843
19844 IX86_BUILTIN_PMAXSB128,
19845 IX86_BUILTIN_PMAXSD128,
19846 IX86_BUILTIN_PMAXUD128,
19847 IX86_BUILTIN_PMAXUW128,
19848
19849 IX86_BUILTIN_PMINSB128,
19850 IX86_BUILTIN_PMINSD128,
19851 IX86_BUILTIN_PMINUD128,
19852 IX86_BUILTIN_PMINUW128,
19853
19854 IX86_BUILTIN_PMOVSXBW128,
19855 IX86_BUILTIN_PMOVSXBD128,
19856 IX86_BUILTIN_PMOVSXBQ128,
19857 IX86_BUILTIN_PMOVSXWD128,
19858 IX86_BUILTIN_PMOVSXWQ128,
19859 IX86_BUILTIN_PMOVSXDQ128,
19860
19861 IX86_BUILTIN_PMOVZXBW128,
19862 IX86_BUILTIN_PMOVZXBD128,
19863 IX86_BUILTIN_PMOVZXBQ128,
19864 IX86_BUILTIN_PMOVZXWD128,
19865 IX86_BUILTIN_PMOVZXWQ128,
19866 IX86_BUILTIN_PMOVZXDQ128,
19867
19868 IX86_BUILTIN_PMULDQ128,
19869 IX86_BUILTIN_PMULLD128,
19870
19871 IX86_BUILTIN_ROUNDPD,
19872 IX86_BUILTIN_ROUNDPS,
19873 IX86_BUILTIN_ROUNDSD,
19874 IX86_BUILTIN_ROUNDSS,
19875
19876 IX86_BUILTIN_PTESTZ,
19877 IX86_BUILTIN_PTESTC,
19878 IX86_BUILTIN_PTESTNZC,
19879
19880 IX86_BUILTIN_VEC_INIT_V2SI,
19881 IX86_BUILTIN_VEC_INIT_V4HI,
19882 IX86_BUILTIN_VEC_INIT_V8QI,
19883 IX86_BUILTIN_VEC_EXT_V2DF,
19884 IX86_BUILTIN_VEC_EXT_V2DI,
19885 IX86_BUILTIN_VEC_EXT_V4SF,
19886 IX86_BUILTIN_VEC_EXT_V4SI,
19887 IX86_BUILTIN_VEC_EXT_V8HI,
19888 IX86_BUILTIN_VEC_EXT_V2SI,
19889 IX86_BUILTIN_VEC_EXT_V4HI,
19890 IX86_BUILTIN_VEC_EXT_V16QI,
19891 IX86_BUILTIN_VEC_SET_V2DI,
19892 IX86_BUILTIN_VEC_SET_V4SF,
19893 IX86_BUILTIN_VEC_SET_V4SI,
19894 IX86_BUILTIN_VEC_SET_V8HI,
19895 IX86_BUILTIN_VEC_SET_V4HI,
19896 IX86_BUILTIN_VEC_SET_V16QI,
19897
19898 IX86_BUILTIN_VEC_PACK_SFIX,
19899
19900 /* SSE4.2. */
19901 IX86_BUILTIN_CRC32QI,
19902 IX86_BUILTIN_CRC32HI,
19903 IX86_BUILTIN_CRC32SI,
19904 IX86_BUILTIN_CRC32DI,
19905
19906 IX86_BUILTIN_PCMPESTRI128,
19907 IX86_BUILTIN_PCMPESTRM128,
19908 IX86_BUILTIN_PCMPESTRA128,
19909 IX86_BUILTIN_PCMPESTRC128,
19910 IX86_BUILTIN_PCMPESTRO128,
19911 IX86_BUILTIN_PCMPESTRS128,
19912 IX86_BUILTIN_PCMPESTRZ128,
19913 IX86_BUILTIN_PCMPISTRI128,
19914 IX86_BUILTIN_PCMPISTRM128,
19915 IX86_BUILTIN_PCMPISTRA128,
19916 IX86_BUILTIN_PCMPISTRC128,
19917 IX86_BUILTIN_PCMPISTRO128,
19918 IX86_BUILTIN_PCMPISTRS128,
19919 IX86_BUILTIN_PCMPISTRZ128,
19920
19921 IX86_BUILTIN_PCMPGTQ,
19922
19923 /* AES instructions */
19924 IX86_BUILTIN_AESENC128,
19925 IX86_BUILTIN_AESENCLAST128,
19926 IX86_BUILTIN_AESDEC128,
19927 IX86_BUILTIN_AESDECLAST128,
19928 IX86_BUILTIN_AESIMC128,
19929 IX86_BUILTIN_AESKEYGENASSIST128,
19930
19931 /* PCLMUL instruction */
19932 IX86_BUILTIN_PCLMULQDQ128,
19933
19934 /* AVX */
19935 IX86_BUILTIN_ADDPD256,
19936 IX86_BUILTIN_ADDPS256,
19937 IX86_BUILTIN_ADDSUBPD256,
19938 IX86_BUILTIN_ADDSUBPS256,
19939 IX86_BUILTIN_ANDPD256,
19940 IX86_BUILTIN_ANDPS256,
19941 IX86_BUILTIN_ANDNPD256,
19942 IX86_BUILTIN_ANDNPS256,
19943 IX86_BUILTIN_BLENDPD256,
19944 IX86_BUILTIN_BLENDPS256,
19945 IX86_BUILTIN_BLENDVPD256,
19946 IX86_BUILTIN_BLENDVPS256,
19947 IX86_BUILTIN_DIVPD256,
19948 IX86_BUILTIN_DIVPS256,
19949 IX86_BUILTIN_DPPS256,
19950 IX86_BUILTIN_HADDPD256,
19951 IX86_BUILTIN_HADDPS256,
19952 IX86_BUILTIN_HSUBPD256,
19953 IX86_BUILTIN_HSUBPS256,
19954 IX86_BUILTIN_MAXPD256,
19955 IX86_BUILTIN_MAXPS256,
19956 IX86_BUILTIN_MINPD256,
19957 IX86_BUILTIN_MINPS256,
19958 IX86_BUILTIN_MULPD256,
19959 IX86_BUILTIN_MULPS256,
19960 IX86_BUILTIN_ORPD256,
19961 IX86_BUILTIN_ORPS256,
19962 IX86_BUILTIN_SHUFPD256,
19963 IX86_BUILTIN_SHUFPS256,
19964 IX86_BUILTIN_SUBPD256,
19965 IX86_BUILTIN_SUBPS256,
19966 IX86_BUILTIN_XORPD256,
19967 IX86_BUILTIN_XORPS256,
19968 IX86_BUILTIN_CMPSD,
19969 IX86_BUILTIN_CMPSS,
19970 IX86_BUILTIN_CMPPD,
19971 IX86_BUILTIN_CMPPS,
19972 IX86_BUILTIN_CMPPD256,
19973 IX86_BUILTIN_CMPPS256,
19974 IX86_BUILTIN_CVTDQ2PD256,
19975 IX86_BUILTIN_CVTDQ2PS256,
19976 IX86_BUILTIN_CVTPD2PS256,
19977 IX86_BUILTIN_CVTPS2DQ256,
19978 IX86_BUILTIN_CVTPS2PD256,
19979 IX86_BUILTIN_CVTTPD2DQ256,
19980 IX86_BUILTIN_CVTPD2DQ256,
19981 IX86_BUILTIN_CVTTPS2DQ256,
19982 IX86_BUILTIN_EXTRACTF128PD256,
19983 IX86_BUILTIN_EXTRACTF128PS256,
19984 IX86_BUILTIN_EXTRACTF128SI256,
19985 IX86_BUILTIN_VZEROALL,
19986 IX86_BUILTIN_VZEROUPPER,
19987 IX86_BUILTIN_VZEROUPPER_REX64,
19988 IX86_BUILTIN_VPERMILVARPD,
19989 IX86_BUILTIN_VPERMILVARPS,
19990 IX86_BUILTIN_VPERMILVARPD256,
19991 IX86_BUILTIN_VPERMILVARPS256,
19992 IX86_BUILTIN_VPERMILPD,
19993 IX86_BUILTIN_VPERMILPS,
19994 IX86_BUILTIN_VPERMILPD256,
19995 IX86_BUILTIN_VPERMILPS256,
19996 IX86_BUILTIN_VPERM2F128PD256,
19997 IX86_BUILTIN_VPERM2F128PS256,
19998 IX86_BUILTIN_VPERM2F128SI256,
19999 IX86_BUILTIN_VBROADCASTSS,
20000 IX86_BUILTIN_VBROADCASTSD256,
20001 IX86_BUILTIN_VBROADCASTSS256,
20002 IX86_BUILTIN_VBROADCASTPD256,
20003 IX86_BUILTIN_VBROADCASTPS256,
20004 IX86_BUILTIN_VINSERTF128PD256,
20005 IX86_BUILTIN_VINSERTF128PS256,
20006 IX86_BUILTIN_VINSERTF128SI256,
20007 IX86_BUILTIN_LOADUPD256,
20008 IX86_BUILTIN_LOADUPS256,
20009 IX86_BUILTIN_STOREUPD256,
20010 IX86_BUILTIN_STOREUPS256,
20011 IX86_BUILTIN_LDDQU256,
20012 IX86_BUILTIN_MOVNTDQ256,
20013 IX86_BUILTIN_MOVNTPD256,
20014 IX86_BUILTIN_MOVNTPS256,
20015 IX86_BUILTIN_LOADDQU256,
20016 IX86_BUILTIN_STOREDQU256,
20017 IX86_BUILTIN_MASKLOADPD,
20018 IX86_BUILTIN_MASKLOADPS,
20019 IX86_BUILTIN_MASKSTOREPD,
20020 IX86_BUILTIN_MASKSTOREPS,
20021 IX86_BUILTIN_MASKLOADPD256,
20022 IX86_BUILTIN_MASKLOADPS256,
20023 IX86_BUILTIN_MASKSTOREPD256,
20024 IX86_BUILTIN_MASKSTOREPS256,
20025 IX86_BUILTIN_MOVSHDUP256,
20026 IX86_BUILTIN_MOVSLDUP256,
20027 IX86_BUILTIN_MOVDDUP256,
20028
20029 IX86_BUILTIN_SQRTPD256,
20030 IX86_BUILTIN_SQRTPS256,
20031 IX86_BUILTIN_SQRTPS_NR256,
20032 IX86_BUILTIN_RSQRTPS256,
20033 IX86_BUILTIN_RSQRTPS_NR256,
20034
20035 IX86_BUILTIN_RCPPS256,
20036
20037 IX86_BUILTIN_ROUNDPD256,
20038 IX86_BUILTIN_ROUNDPS256,
20039
20040 IX86_BUILTIN_UNPCKHPD256,
20041 IX86_BUILTIN_UNPCKLPD256,
20042 IX86_BUILTIN_UNPCKHPS256,
20043 IX86_BUILTIN_UNPCKLPS256,
20044
20045 IX86_BUILTIN_SI256_SI,
20046 IX86_BUILTIN_PS256_PS,
20047 IX86_BUILTIN_PD256_PD,
20048 IX86_BUILTIN_SI_SI256,
20049 IX86_BUILTIN_PS_PS256,
20050 IX86_BUILTIN_PD_PD256,
20051
20052 IX86_BUILTIN_VTESTZPD,
20053 IX86_BUILTIN_VTESTCPD,
20054 IX86_BUILTIN_VTESTNZCPD,
20055 IX86_BUILTIN_VTESTZPS,
20056 IX86_BUILTIN_VTESTCPS,
20057 IX86_BUILTIN_VTESTNZCPS,
20058 IX86_BUILTIN_VTESTZPD256,
20059 IX86_BUILTIN_VTESTCPD256,
20060 IX86_BUILTIN_VTESTNZCPD256,
20061 IX86_BUILTIN_VTESTZPS256,
20062 IX86_BUILTIN_VTESTCPS256,
20063 IX86_BUILTIN_VTESTNZCPS256,
20064 IX86_BUILTIN_PTESTZ256,
20065 IX86_BUILTIN_PTESTC256,
20066 IX86_BUILTIN_PTESTNZC256,
20067
20068 IX86_BUILTIN_MOVMSKPD256,
20069 IX86_BUILTIN_MOVMSKPS256,
20070
20071 /* TFmode support builtins. */
20072 IX86_BUILTIN_INFQ,
20073 IX86_BUILTIN_FABSQ,
20074 IX86_BUILTIN_COPYSIGNQ,
20075
20076 /* SSE5 instructions */
20077 IX86_BUILTIN_FMADDSS,
20078 IX86_BUILTIN_FMADDSD,
20079 IX86_BUILTIN_FMADDPS,
20080 IX86_BUILTIN_FMADDPD,
20081 IX86_BUILTIN_FMSUBSS,
20082 IX86_BUILTIN_FMSUBSD,
20083 IX86_BUILTIN_FMSUBPS,
20084 IX86_BUILTIN_FMSUBPD,
20085 IX86_BUILTIN_FNMADDSS,
20086 IX86_BUILTIN_FNMADDSD,
20087 IX86_BUILTIN_FNMADDPS,
20088 IX86_BUILTIN_FNMADDPD,
20089 IX86_BUILTIN_FNMSUBSS,
20090 IX86_BUILTIN_FNMSUBSD,
20091 IX86_BUILTIN_FNMSUBPS,
20092 IX86_BUILTIN_FNMSUBPD,
20093 IX86_BUILTIN_PCMOV,
20094 IX86_BUILTIN_PCMOV_V2DI,
20095 IX86_BUILTIN_PCMOV_V4SI,
20096 IX86_BUILTIN_PCMOV_V8HI,
20097 IX86_BUILTIN_PCMOV_V16QI,
20098 IX86_BUILTIN_PCMOV_V4SF,
20099 IX86_BUILTIN_PCMOV_V2DF,
20100 IX86_BUILTIN_PPERM,
20101 IX86_BUILTIN_PERMPS,
20102 IX86_BUILTIN_PERMPD,
20103 IX86_BUILTIN_PMACSSWW,
20104 IX86_BUILTIN_PMACSWW,
20105 IX86_BUILTIN_PMACSSWD,
20106 IX86_BUILTIN_PMACSWD,
20107 IX86_BUILTIN_PMACSSDD,
20108 IX86_BUILTIN_PMACSDD,
20109 IX86_BUILTIN_PMACSSDQL,
20110 IX86_BUILTIN_PMACSSDQH,
20111 IX86_BUILTIN_PMACSDQL,
20112 IX86_BUILTIN_PMACSDQH,
20113 IX86_BUILTIN_PMADCSSWD,
20114 IX86_BUILTIN_PMADCSWD,
20115 IX86_BUILTIN_PHADDBW,
20116 IX86_BUILTIN_PHADDBD,
20117 IX86_BUILTIN_PHADDBQ,
20118 IX86_BUILTIN_PHADDWD,
20119 IX86_BUILTIN_PHADDWQ,
20120 IX86_BUILTIN_PHADDDQ,
20121 IX86_BUILTIN_PHADDUBW,
20122 IX86_BUILTIN_PHADDUBD,
20123 IX86_BUILTIN_PHADDUBQ,
20124 IX86_BUILTIN_PHADDUWD,
20125 IX86_BUILTIN_PHADDUWQ,
20126 IX86_BUILTIN_PHADDUDQ,
20127 IX86_BUILTIN_PHSUBBW,
20128 IX86_BUILTIN_PHSUBWD,
20129 IX86_BUILTIN_PHSUBDQ,
20130 IX86_BUILTIN_PROTB,
20131 IX86_BUILTIN_PROTW,
20132 IX86_BUILTIN_PROTD,
20133 IX86_BUILTIN_PROTQ,
20134 IX86_BUILTIN_PROTB_IMM,
20135 IX86_BUILTIN_PROTW_IMM,
20136 IX86_BUILTIN_PROTD_IMM,
20137 IX86_BUILTIN_PROTQ_IMM,
20138 IX86_BUILTIN_PSHLB,
20139 IX86_BUILTIN_PSHLW,
20140 IX86_BUILTIN_PSHLD,
20141 IX86_BUILTIN_PSHLQ,
20142 IX86_BUILTIN_PSHAB,
20143 IX86_BUILTIN_PSHAW,
20144 IX86_BUILTIN_PSHAD,
20145 IX86_BUILTIN_PSHAQ,
20146 IX86_BUILTIN_FRCZSS,
20147 IX86_BUILTIN_FRCZSD,
20148 IX86_BUILTIN_FRCZPS,
20149 IX86_BUILTIN_FRCZPD,
20150 IX86_BUILTIN_CVTPH2PS,
20151 IX86_BUILTIN_CVTPS2PH,
20152
20153 IX86_BUILTIN_COMEQSS,
20154 IX86_BUILTIN_COMNESS,
20155 IX86_BUILTIN_COMLTSS,
20156 IX86_BUILTIN_COMLESS,
20157 IX86_BUILTIN_COMGTSS,
20158 IX86_BUILTIN_COMGESS,
20159 IX86_BUILTIN_COMUEQSS,
20160 IX86_BUILTIN_COMUNESS,
20161 IX86_BUILTIN_COMULTSS,
20162 IX86_BUILTIN_COMULESS,
20163 IX86_BUILTIN_COMUGTSS,
20164 IX86_BUILTIN_COMUGESS,
20165 IX86_BUILTIN_COMORDSS,
20166 IX86_BUILTIN_COMUNORDSS,
20167 IX86_BUILTIN_COMFALSESS,
20168 IX86_BUILTIN_COMTRUESS,
20169
20170 IX86_BUILTIN_COMEQSD,
20171 IX86_BUILTIN_COMNESD,
20172 IX86_BUILTIN_COMLTSD,
20173 IX86_BUILTIN_COMLESD,
20174 IX86_BUILTIN_COMGTSD,
20175 IX86_BUILTIN_COMGESD,
20176 IX86_BUILTIN_COMUEQSD,
20177 IX86_BUILTIN_COMUNESD,
20178 IX86_BUILTIN_COMULTSD,
20179 IX86_BUILTIN_COMULESD,
20180 IX86_BUILTIN_COMUGTSD,
20181 IX86_BUILTIN_COMUGESD,
20182 IX86_BUILTIN_COMORDSD,
20183 IX86_BUILTIN_COMUNORDSD,
20184 IX86_BUILTIN_COMFALSESD,
20185 IX86_BUILTIN_COMTRUESD,
20186
20187 IX86_BUILTIN_COMEQPS,
20188 IX86_BUILTIN_COMNEPS,
20189 IX86_BUILTIN_COMLTPS,
20190 IX86_BUILTIN_COMLEPS,
20191 IX86_BUILTIN_COMGTPS,
20192 IX86_BUILTIN_COMGEPS,
20193 IX86_BUILTIN_COMUEQPS,
20194 IX86_BUILTIN_COMUNEPS,
20195 IX86_BUILTIN_COMULTPS,
20196 IX86_BUILTIN_COMULEPS,
20197 IX86_BUILTIN_COMUGTPS,
20198 IX86_BUILTIN_COMUGEPS,
20199 IX86_BUILTIN_COMORDPS,
20200 IX86_BUILTIN_COMUNORDPS,
20201 IX86_BUILTIN_COMFALSEPS,
20202 IX86_BUILTIN_COMTRUEPS,
20203
20204 IX86_BUILTIN_COMEQPD,
20205 IX86_BUILTIN_COMNEPD,
20206 IX86_BUILTIN_COMLTPD,
20207 IX86_BUILTIN_COMLEPD,
20208 IX86_BUILTIN_COMGTPD,
20209 IX86_BUILTIN_COMGEPD,
20210 IX86_BUILTIN_COMUEQPD,
20211 IX86_BUILTIN_COMUNEPD,
20212 IX86_BUILTIN_COMULTPD,
20213 IX86_BUILTIN_COMULEPD,
20214 IX86_BUILTIN_COMUGTPD,
20215 IX86_BUILTIN_COMUGEPD,
20216 IX86_BUILTIN_COMORDPD,
20217 IX86_BUILTIN_COMUNORDPD,
20218 IX86_BUILTIN_COMFALSEPD,
20219 IX86_BUILTIN_COMTRUEPD,
20220
20221 IX86_BUILTIN_PCOMEQUB,
20222 IX86_BUILTIN_PCOMNEUB,
20223 IX86_BUILTIN_PCOMLTUB,
20224 IX86_BUILTIN_PCOMLEUB,
20225 IX86_BUILTIN_PCOMGTUB,
20226 IX86_BUILTIN_PCOMGEUB,
20227 IX86_BUILTIN_PCOMFALSEUB,
20228 IX86_BUILTIN_PCOMTRUEUB,
20229 IX86_BUILTIN_PCOMEQUW,
20230 IX86_BUILTIN_PCOMNEUW,
20231 IX86_BUILTIN_PCOMLTUW,
20232 IX86_BUILTIN_PCOMLEUW,
20233 IX86_BUILTIN_PCOMGTUW,
20234 IX86_BUILTIN_PCOMGEUW,
20235 IX86_BUILTIN_PCOMFALSEUW,
20236 IX86_BUILTIN_PCOMTRUEUW,
20237 IX86_BUILTIN_PCOMEQUD,
20238 IX86_BUILTIN_PCOMNEUD,
20239 IX86_BUILTIN_PCOMLTUD,
20240 IX86_BUILTIN_PCOMLEUD,
20241 IX86_BUILTIN_PCOMGTUD,
20242 IX86_BUILTIN_PCOMGEUD,
20243 IX86_BUILTIN_PCOMFALSEUD,
20244 IX86_BUILTIN_PCOMTRUEUD,
20245 IX86_BUILTIN_PCOMEQUQ,
20246 IX86_BUILTIN_PCOMNEUQ,
20247 IX86_BUILTIN_PCOMLTUQ,
20248 IX86_BUILTIN_PCOMLEUQ,
20249 IX86_BUILTIN_PCOMGTUQ,
20250 IX86_BUILTIN_PCOMGEUQ,
20251 IX86_BUILTIN_PCOMFALSEUQ,
20252 IX86_BUILTIN_PCOMTRUEUQ,
20253
20254 IX86_BUILTIN_PCOMEQB,
20255 IX86_BUILTIN_PCOMNEB,
20256 IX86_BUILTIN_PCOMLTB,
20257 IX86_BUILTIN_PCOMLEB,
20258 IX86_BUILTIN_PCOMGTB,
20259 IX86_BUILTIN_PCOMGEB,
20260 IX86_BUILTIN_PCOMFALSEB,
20261 IX86_BUILTIN_PCOMTRUEB,
20262 IX86_BUILTIN_PCOMEQW,
20263 IX86_BUILTIN_PCOMNEW,
20264 IX86_BUILTIN_PCOMLTW,
20265 IX86_BUILTIN_PCOMLEW,
20266 IX86_BUILTIN_PCOMGTW,
20267 IX86_BUILTIN_PCOMGEW,
20268 IX86_BUILTIN_PCOMFALSEW,
20269 IX86_BUILTIN_PCOMTRUEW,
20270 IX86_BUILTIN_PCOMEQD,
20271 IX86_BUILTIN_PCOMNED,
20272 IX86_BUILTIN_PCOMLTD,
20273 IX86_BUILTIN_PCOMLED,
20274 IX86_BUILTIN_PCOMGTD,
20275 IX86_BUILTIN_PCOMGED,
20276 IX86_BUILTIN_PCOMFALSED,
20277 IX86_BUILTIN_PCOMTRUED,
20278 IX86_BUILTIN_PCOMEQQ,
20279 IX86_BUILTIN_PCOMNEQ,
20280 IX86_BUILTIN_PCOMLTQ,
20281 IX86_BUILTIN_PCOMLEQ,
20282 IX86_BUILTIN_PCOMGTQ,
20283 IX86_BUILTIN_PCOMGEQ,
20284 IX86_BUILTIN_PCOMFALSEQ,
20285 IX86_BUILTIN_PCOMTRUEQ,
20286
20287 IX86_BUILTIN_MAX
20288 };
20289
20290 /* Table for the ix86 builtin decls. */
20291 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20292
20293 /* Table of all of the builtin functions that are possible with different ISA's
20294 but are waiting to be built until a function is declared to use that
20295 ISA. */
20296 struct builtin_isa GTY(())
20297 {
20298 tree type; /* builtin type to use in the declaration */
20299 const char *name; /* function name */
20300 int isa; /* isa_flags this builtin is defined for */
20301 bool const_p; /* true if the declaration is constant */
20302 };
20303
20304 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20305
20306
20307 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
20308 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
20309 * function decl in the ix86_builtins array. Returns the function decl or
20310 * NULL_TREE, if the builtin was not added.
20311 *
20312 * If the front end has a special hook for builtin functions, delay adding
20313 * builtin functions that aren't in the current ISA until the ISA is changed
20314 * with function specific optimization. Doing so, can save about 300K for the
20315 * default compiler. When the builtin is expanded, check at that time whether
20316 * it is valid.
20317 *
20318 * If the front end doesn't have a special hook, record all builtins, even if
20319 * it isn't an instruction set in the current ISA in case the user uses
20320 * function specific options for a different ISA, so that we don't get scope
20321 * errors if a builtin is added in the middle of a function scope. */
20322
20323 static inline tree
20324 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20325 {
20326 tree decl = NULL_TREE;
20327
20328 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20329 {
20330 ix86_builtins_isa[(int) code].isa = mask;
20331
20332 if ((mask & ix86_isa_flags) != 0
20333 || (lang_hooks.builtin_function
20334 == lang_hooks.builtin_function_ext_scope))
20335
20336 {
20337 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20338 NULL_TREE);
20339 ix86_builtins[(int) code] = decl;
20340 ix86_builtins_isa[(int) code].type = NULL_TREE;
20341 }
20342 else
20343 {
20344 ix86_builtins[(int) code] = NULL_TREE;
20345 ix86_builtins_isa[(int) code].const_p = false;
20346 ix86_builtins_isa[(int) code].type = type;
20347 ix86_builtins_isa[(int) code].name = name;
20348 }
20349 }
20350
20351 return decl;
20352 }
20353
20354 /* Like def_builtin, but also marks the function decl "const". */
20355
20356 static inline tree
20357 def_builtin_const (int mask, const char *name, tree type,
20358 enum ix86_builtins code)
20359 {
20360 tree decl = def_builtin (mask, name, type, code);
20361 if (decl)
20362 TREE_READONLY (decl) = 1;
20363 else
20364 ix86_builtins_isa[(int) code].const_p = true;
20365
20366 return decl;
20367 }
20368
20369 /* Add any new builtin functions for a given ISA that may not have been
20370 declared. This saves a bit of space compared to adding all of the
20371 declarations to the tree, even if we didn't use them. */
20372
20373 static void
20374 ix86_add_new_builtins (int isa)
20375 {
20376 int i;
20377 tree decl;
20378
20379 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20380 {
20381 if ((ix86_builtins_isa[i].isa & isa) != 0
20382 && ix86_builtins_isa[i].type != NULL_TREE)
20383 {
20384 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20385 ix86_builtins_isa[i].type,
20386 i, BUILT_IN_MD, NULL,
20387 NULL_TREE);
20388
20389 ix86_builtins[i] = decl;
20390 ix86_builtins_isa[i].type = NULL_TREE;
20391 if (ix86_builtins_isa[i].const_p)
20392 TREE_READONLY (decl) = 1;
20393 }
20394 }
20395 }
20396
20397 /* Bits for builtin_description.flag. */
20398
20399 /* Set when we don't support the comparison natively, and should
20400 swap_comparison in order to support it. */
20401 #define BUILTIN_DESC_SWAP_OPERANDS 1
20402
20403 struct builtin_description
20404 {
20405 const unsigned int mask;
20406 const enum insn_code icode;
20407 const char *const name;
20408 const enum ix86_builtins code;
20409 const enum rtx_code comparison;
20410 const int flag;
20411 };
20412
20413 static const struct builtin_description bdesc_comi[] =
20414 {
20415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
20416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
20417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
20418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
20419 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
20420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
20421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
20422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
20423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
20424 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
20425 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
20426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
20427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
20428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
20429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
20430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
20431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
20432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
20433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
20434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
20435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
20436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
20437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
20438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
20439 };
20440
20441 static const struct builtin_description bdesc_pcmpestr[] =
20442 {
20443 /* SSE4.2 */
20444 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
20445 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
20446 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
20447 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
20448 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
20449 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
20450 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
20451 };
20452
20453 static const struct builtin_description bdesc_pcmpistr[] =
20454 {
20455 /* SSE4.2 */
20456 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
20457 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
20458 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
20459 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
20460 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
20461 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
20462 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
20463 };
20464
20465 /* Special builtin types */
20466 enum ix86_special_builtin_type
20467 {
20468 SPECIAL_FTYPE_UNKNOWN,
20469 VOID_FTYPE_VOID,
20470 V32QI_FTYPE_PCCHAR,
20471 V16QI_FTYPE_PCCHAR,
20472 V8SF_FTYPE_PCV4SF,
20473 V8SF_FTYPE_PCFLOAT,
20474 V4DF_FTYPE_PCV2DF,
20475 V4DF_FTYPE_PCDOUBLE,
20476 V4SF_FTYPE_PCFLOAT,
20477 V2DF_FTYPE_PCDOUBLE,
20478 V8SF_FTYPE_PCV8SF_V8SF,
20479 V4DF_FTYPE_PCV4DF_V4DF,
20480 V4SF_FTYPE_V4SF_PCV2SF,
20481 V4SF_FTYPE_PCV4SF_V4SF,
20482 V2DF_FTYPE_V2DF_PCDOUBLE,
20483 V2DF_FTYPE_PCV2DF_V2DF,
20484 V2DI_FTYPE_PV2DI,
20485 VOID_FTYPE_PV2SF_V4SF,
20486 VOID_FTYPE_PV4DI_V4DI,
20487 VOID_FTYPE_PV2DI_V2DI,
20488 VOID_FTYPE_PCHAR_V32QI,
20489 VOID_FTYPE_PCHAR_V16QI,
20490 VOID_FTYPE_PFLOAT_V8SF,
20491 VOID_FTYPE_PFLOAT_V4SF,
20492 VOID_FTYPE_PDOUBLE_V4DF,
20493 VOID_FTYPE_PDOUBLE_V2DF,
20494 VOID_FTYPE_PDI_DI,
20495 VOID_FTYPE_PINT_INT,
20496 VOID_FTYPE_PV8SF_V8SF_V8SF,
20497 VOID_FTYPE_PV4DF_V4DF_V4DF,
20498 VOID_FTYPE_PV4SF_V4SF_V4SF,
20499 VOID_FTYPE_PV2DF_V2DF_V2DF
20500 };
20501
20502 /* Builtin types */
20503 enum ix86_builtin_type
20504 {
20505 FTYPE_UNKNOWN,
20506 FLOAT128_FTYPE_FLOAT128,
20507 FLOAT_FTYPE_FLOAT,
20508 FLOAT128_FTYPE_FLOAT128_FLOAT128,
20509 INT_FTYPE_V8SF_V8SF_PTEST,
20510 INT_FTYPE_V4DI_V4DI_PTEST,
20511 INT_FTYPE_V4DF_V4DF_PTEST,
20512 INT_FTYPE_V4SF_V4SF_PTEST,
20513 INT_FTYPE_V2DI_V2DI_PTEST,
20514 INT_FTYPE_V2DF_V2DF_PTEST,
20515 INT64_FTYPE_V4SF,
20516 INT64_FTYPE_V2DF,
20517 INT_FTYPE_V16QI,
20518 INT_FTYPE_V8QI,
20519 INT_FTYPE_V8SF,
20520 INT_FTYPE_V4DF,
20521 INT_FTYPE_V4SF,
20522 INT_FTYPE_V2DF,
20523 V16QI_FTYPE_V16QI,
20524 V8SI_FTYPE_V8SF,
20525 V8SI_FTYPE_V4SI,
20526 V8HI_FTYPE_V8HI,
20527 V8HI_FTYPE_V16QI,
20528 V8QI_FTYPE_V8QI,
20529 V8SF_FTYPE_V8SF,
20530 V8SF_FTYPE_V8SI,
20531 V8SF_FTYPE_V4SF,
20532 V4SI_FTYPE_V4SI,
20533 V4SI_FTYPE_V16QI,
20534 V4SI_FTYPE_V8SI,
20535 V4SI_FTYPE_V8HI,
20536 V4SI_FTYPE_V4DF,
20537 V4SI_FTYPE_V4SF,
20538 V4SI_FTYPE_V2DF,
20539 V4HI_FTYPE_V4HI,
20540 V4DF_FTYPE_V4DF,
20541 V4DF_FTYPE_V4SI,
20542 V4DF_FTYPE_V4SF,
20543 V4DF_FTYPE_V2DF,
20544 V4SF_FTYPE_V4DF,
20545 V4SF_FTYPE_V4SF,
20546 V4SF_FTYPE_V4SF_VEC_MERGE,
20547 V4SF_FTYPE_V8SF,
20548 V4SF_FTYPE_V4SI,
20549 V4SF_FTYPE_V2DF,
20550 V2DI_FTYPE_V2DI,
20551 V2DI_FTYPE_V16QI,
20552 V2DI_FTYPE_V8HI,
20553 V2DI_FTYPE_V4SI,
20554 V2DF_FTYPE_V2DF,
20555 V2DF_FTYPE_V2DF_VEC_MERGE,
20556 V2DF_FTYPE_V4SI,
20557 V2DF_FTYPE_V4DF,
20558 V2DF_FTYPE_V4SF,
20559 V2DF_FTYPE_V2SI,
20560 V2SI_FTYPE_V2SI,
20561 V2SI_FTYPE_V4SF,
20562 V2SI_FTYPE_V2SF,
20563 V2SI_FTYPE_V2DF,
20564 V2SF_FTYPE_V2SF,
20565 V2SF_FTYPE_V2SI,
20566 V16QI_FTYPE_V16QI_V16QI,
20567 V16QI_FTYPE_V8HI_V8HI,
20568 V8QI_FTYPE_V8QI_V8QI,
20569 V8QI_FTYPE_V4HI_V4HI,
20570 V8HI_FTYPE_V8HI_V8HI,
20571 V8HI_FTYPE_V8HI_V8HI_COUNT,
20572 V8HI_FTYPE_V16QI_V16QI,
20573 V8HI_FTYPE_V4SI_V4SI,
20574 V8HI_FTYPE_V8HI_SI_COUNT,
20575 V8SF_FTYPE_V8SF_V8SF,
20576 V8SF_FTYPE_V8SF_V8SI,
20577 V4SI_FTYPE_V4SI_V4SI,
20578 V4SI_FTYPE_V4SI_V4SI_COUNT,
20579 V4SI_FTYPE_V8HI_V8HI,
20580 V4SI_FTYPE_V4SF_V4SF,
20581 V4SI_FTYPE_V2DF_V2DF,
20582 V4SI_FTYPE_V4SI_SI_COUNT,
20583 V4HI_FTYPE_V4HI_V4HI,
20584 V4HI_FTYPE_V4HI_V4HI_COUNT,
20585 V4HI_FTYPE_V8QI_V8QI,
20586 V4HI_FTYPE_V2SI_V2SI,
20587 V4HI_FTYPE_V4HI_SI_COUNT,
20588 V4DF_FTYPE_V4DF_V4DF,
20589 V4DF_FTYPE_V4DF_V4DI,
20590 V4SF_FTYPE_V4SF_V4SF,
20591 V4SF_FTYPE_V4SF_V4SF_SWAP,
20592 V4SF_FTYPE_V4SF_V4SI,
20593 V4SF_FTYPE_V4SF_V2SI,
20594 V4SF_FTYPE_V4SF_V2DF,
20595 V4SF_FTYPE_V4SF_DI,
20596 V4SF_FTYPE_V4SF_SI,
20597 V2DI_FTYPE_V2DI_V2DI,
20598 V2DI_FTYPE_V2DI_V2DI_COUNT,
20599 V2DI_FTYPE_V16QI_V16QI,
20600 V2DI_FTYPE_V4SI_V4SI,
20601 V2DI_FTYPE_V2DI_V16QI,
20602 V2DI_FTYPE_V2DF_V2DF,
20603 V2DI_FTYPE_V2DI_SI_COUNT,
20604 V2SI_FTYPE_V2SI_V2SI,
20605 V2SI_FTYPE_V2SI_V2SI_COUNT,
20606 V2SI_FTYPE_V4HI_V4HI,
20607 V2SI_FTYPE_V2SF_V2SF,
20608 V2SI_FTYPE_V2SI_SI_COUNT,
20609 V2DF_FTYPE_V2DF_V2DF,
20610 V2DF_FTYPE_V2DF_V2DF_SWAP,
20611 V2DF_FTYPE_V2DF_V4SF,
20612 V2DF_FTYPE_V2DF_V2DI,
20613 V2DF_FTYPE_V2DF_DI,
20614 V2DF_FTYPE_V2DF_SI,
20615 V2SF_FTYPE_V2SF_V2SF,
20616 V1DI_FTYPE_V1DI_V1DI,
20617 V1DI_FTYPE_V1DI_V1DI_COUNT,
20618 V1DI_FTYPE_V8QI_V8QI,
20619 V1DI_FTYPE_V2SI_V2SI,
20620 V1DI_FTYPE_V1DI_SI_COUNT,
20621 UINT64_FTYPE_UINT64_UINT64,
20622 UINT_FTYPE_UINT_UINT,
20623 UINT_FTYPE_UINT_USHORT,
20624 UINT_FTYPE_UINT_UCHAR,
20625 V8HI_FTYPE_V8HI_INT,
20626 V4SI_FTYPE_V4SI_INT,
20627 V4HI_FTYPE_V4HI_INT,
20628 V8SF_FTYPE_V8SF_INT,
20629 V4SI_FTYPE_V8SI_INT,
20630 V4SF_FTYPE_V8SF_INT,
20631 V2DF_FTYPE_V4DF_INT,
20632 V4DF_FTYPE_V4DF_INT,
20633 V4SF_FTYPE_V4SF_INT,
20634 V2DI_FTYPE_V2DI_INT,
20635 V2DI2TI_FTYPE_V2DI_INT,
20636 V2DF_FTYPE_V2DF_INT,
20637 V16QI_FTYPE_V16QI_V16QI_V16QI,
20638 V8SF_FTYPE_V8SF_V8SF_V8SF,
20639 V4DF_FTYPE_V4DF_V4DF_V4DF,
20640 V4SF_FTYPE_V4SF_V4SF_V4SF,
20641 V2DF_FTYPE_V2DF_V2DF_V2DF,
20642 V16QI_FTYPE_V16QI_V16QI_INT,
20643 V8SI_FTYPE_V8SI_V8SI_INT,
20644 V8SI_FTYPE_V8SI_V4SI_INT,
20645 V8HI_FTYPE_V8HI_V8HI_INT,
20646 V8SF_FTYPE_V8SF_V8SF_INT,
20647 V8SF_FTYPE_V8SF_V4SF_INT,
20648 V4SI_FTYPE_V4SI_V4SI_INT,
20649 V4DF_FTYPE_V4DF_V4DF_INT,
20650 V4DF_FTYPE_V4DF_V2DF_INT,
20651 V4SF_FTYPE_V4SF_V4SF_INT,
20652 V2DI_FTYPE_V2DI_V2DI_INT,
20653 V2DI2TI_FTYPE_V2DI_V2DI_INT,
20654 V1DI2DI_FTYPE_V1DI_V1DI_INT,
20655 V2DF_FTYPE_V2DF_V2DF_INT,
20656 V2DI_FTYPE_V2DI_UINT_UINT,
20657 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
20658 };
20659
20660 /* Special builtins with variable number of arguments. */
20661 static const struct builtin_description bdesc_special_args[] =
20662 {
20663 /* MMX */
20664 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20665
20666 /* 3DNow! */
20667 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20668
20669 /* SSE */
20670 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20671 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20672 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20673
20674 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20675 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20676 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20677 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20678
20679 /* SSE or 3DNow!A */
20680 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20681 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
20682
20683 /* SSE2 */
20684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
20688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
20690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
20691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
20692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20693
20694 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20696
20697 /* SSE3 */
20698 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20699
20700 /* SSE4.1 */
20701 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
20702
20703 /* SSE4A */
20704 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20705 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20706
20707 /* AVX */
20708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
20709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
20710 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
20711
20712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
20716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
20717
20718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
20721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
20722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
20724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20725
20726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
20727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
20728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
20729
20730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
20731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
20732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
20733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
20734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
20735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
20736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
20737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
20738 };
20739
20740 /* Builtins with variable number of arguments. */
20741 static const struct builtin_description bdesc_args[] =
20742 {
20743 /* MMX */
20744 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20745 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20746 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20747 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20748 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20749 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20750
20751 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20752 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20753 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20754 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20755 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20756 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20757 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20758 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20759
20760 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20761 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20762
20763 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20764 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20765 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20766 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20767
20768 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20769 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20770 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20771 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20772 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20773 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20774
20775 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20776 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20777 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20778 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20779 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
20780 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
20781
20782 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20783 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
20784 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20785
20786 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
20787
20788 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20789 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20790 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20791 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20792 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20793 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20794
20795 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20796 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20797 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20798 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20799 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20800 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20801
20802 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20803 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20804 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20805 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20806
20807 /* 3DNow! */
20808 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20809 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20810 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20811 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20812
20813 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20814 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20815 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20816 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20817 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20818 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20819 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20820 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20821 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20822 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20823 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20824 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20825 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20826 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20827 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20828
20829 /* 3DNow!A */
20830 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20831 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20832 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
20833 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20834 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20835 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20836
20837 /* SSE */
20838 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
20839 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20840 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20841 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20842 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20843 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20844 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
20845 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
20846 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
20847 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
20848 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
20849 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
20850
20851 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20852
20853 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20854 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20855 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20856 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20857 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20858 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20859 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20860 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20861
20862 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
20863 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
20864 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
20865 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20866 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20867 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20868 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
20869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
20870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
20871 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
20873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
20875 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
20876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
20877 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20878 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
20879 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
20880 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
20881 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20882 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20883 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20884
20885 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20886 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20887 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20888 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20889
20890 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20891 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20892 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20893 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20894
20895 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20896 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20897 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20900
20901 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
20902 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
20903 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
20904
20905 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
20906
20907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20908 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20909 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20910
20911 /* SSE MMX or 3Dnow!A */
20912 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20913 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20914 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20915
20916 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20917 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20918 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20919 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20920
20921 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
20922 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
20923
20924 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
20925
20926 /* SSE2 */
20927 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20928
20929 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
20930 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
20931 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
20932 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
20933 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
20934
20935 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
20936 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
20937 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
20938 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
20939 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
20940
20941 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
20942
20943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
20944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
20945 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
20946 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
20947
20948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
20949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
20950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
20951
20952 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20953 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20954 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20955 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20957 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20960
20961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
20962 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
20963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
20964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
20966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
20968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
20969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
20970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
20974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
20975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
20976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
20978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
20979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
20980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20981
20982 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20983 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20984 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20985 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20986
20987 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20989 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20990 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20991
20992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20995
20996 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
20997
20998 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20999 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21000 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21001 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21002 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21003 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21004 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21005 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21006
21007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21015
21016 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21017 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21018
21019 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21021 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21022 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21023
21024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21026
21027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21032 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21033
21034 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21035 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21036 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21038
21039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21047
21048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21051
21052 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21054
21055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21057
21058 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21059
21060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21061 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21062 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21064
21065 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21066 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21067 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21068 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21069 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21070 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21071 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21072
21073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21074 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21075 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21076 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21077 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21078 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21079 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21080
21081 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21082 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21083 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21084 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21085
21086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21087 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21088 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21089
21090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21091
21092 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21093 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21094
21095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21096
21097 /* SSE2 MMX */
21098 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21099 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21100
21101 /* SSE3 */
21102 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21103 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21104
21105 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21106 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21107 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21108 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21109 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21110 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21111
21112 /* SSSE3 */
21113 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21114 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21115 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21116 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21117 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21118 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21119
21120 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21121 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21122 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21123 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21124 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21125 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21126 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21127 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21128 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21129 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21130 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21131 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21132 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21133 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21134 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21135 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21136 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21137 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21138 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21139 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21140 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21141 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21142 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21143 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21144
21145 /* SSSE3. */
21146 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21147 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21148
21149 /* SSE4.1 */
21150 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21151 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21152 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21153 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21154 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21155 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21156 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21157 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21158 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21159 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21160
21161 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21162 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21163 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21164 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21165 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21166 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21167 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21168 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21169 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21170 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21171 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21172 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21173 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21174
21175 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21176 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21177 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21178 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21179 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21180 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21181 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21182 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21183 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21184 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21185 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21186 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21187
21188 /* SSE4.1 and SSE5 */
21189 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21190 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21191 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21192 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21193
21194 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21195 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21196 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21197
21198 /* SSE4.2 */
21199 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21200 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21201 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21202 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21203 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21204
21205 /* SSE4A */
21206 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21207 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21208 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21209 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21210
21211 /* AES */
21212 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21213 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21214
21215 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21216 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21217 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21218 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21219
21220 /* PCLMUL */
21221 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21222
21223 /* AVX */
21224 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21225 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21226 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21227 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21228 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21229 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21230 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21231 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21232 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21234 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21235 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21236 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21237 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21238 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21239 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21240 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21241 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21242 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21243 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21244 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21245 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21246 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21247 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21248 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21249 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21250
21251 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21252 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21253 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21254 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21255
21256 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21257 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21258 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21259 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21260 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21261 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21262 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21264 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21265 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21266 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21267 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21268 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21269 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21270 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21271 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21272 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21273 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21274 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21275 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21276 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21277 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21278 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21279 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21280 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21281 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21282 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21283 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21284 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21285 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21286 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21287 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21288 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21289 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21290
21291 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21292 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21293 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21294
21295 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21296 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21297 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21298 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21299 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21300
21301 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21302
21303 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21304 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21305
21306 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21307 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21308 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21309 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21310
21311 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21312 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21313 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21314 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21315 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21316 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21317
21318 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21319 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21320 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21321 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21322 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21323 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21324 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21326 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21327 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21329 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21330 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21331 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21333
21334 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
21335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21336 };
21337
21338 /* SSE5 */
21339 enum multi_arg_type {
21340 MULTI_ARG_UNKNOWN,
21341 MULTI_ARG_3_SF,
21342 MULTI_ARG_3_DF,
21343 MULTI_ARG_3_DI,
21344 MULTI_ARG_3_SI,
21345 MULTI_ARG_3_SI_DI,
21346 MULTI_ARG_3_HI,
21347 MULTI_ARG_3_HI_SI,
21348 MULTI_ARG_3_QI,
21349 MULTI_ARG_3_PERMPS,
21350 MULTI_ARG_3_PERMPD,
21351 MULTI_ARG_2_SF,
21352 MULTI_ARG_2_DF,
21353 MULTI_ARG_2_DI,
21354 MULTI_ARG_2_SI,
21355 MULTI_ARG_2_HI,
21356 MULTI_ARG_2_QI,
21357 MULTI_ARG_2_DI_IMM,
21358 MULTI_ARG_2_SI_IMM,
21359 MULTI_ARG_2_HI_IMM,
21360 MULTI_ARG_2_QI_IMM,
21361 MULTI_ARG_2_SF_CMP,
21362 MULTI_ARG_2_DF_CMP,
21363 MULTI_ARG_2_DI_CMP,
21364 MULTI_ARG_2_SI_CMP,
21365 MULTI_ARG_2_HI_CMP,
21366 MULTI_ARG_2_QI_CMP,
21367 MULTI_ARG_2_DI_TF,
21368 MULTI_ARG_2_SI_TF,
21369 MULTI_ARG_2_HI_TF,
21370 MULTI_ARG_2_QI_TF,
21371 MULTI_ARG_2_SF_TF,
21372 MULTI_ARG_2_DF_TF,
21373 MULTI_ARG_1_SF,
21374 MULTI_ARG_1_DF,
21375 MULTI_ARG_1_DI,
21376 MULTI_ARG_1_SI,
21377 MULTI_ARG_1_HI,
21378 MULTI_ARG_1_QI,
21379 MULTI_ARG_1_SI_DI,
21380 MULTI_ARG_1_HI_DI,
21381 MULTI_ARG_1_HI_SI,
21382 MULTI_ARG_1_QI_DI,
21383 MULTI_ARG_1_QI_SI,
21384 MULTI_ARG_1_QI_HI,
21385 MULTI_ARG_1_PH2PS,
21386 MULTI_ARG_1_PS2PH
21387 };
21388
21389 static const struct builtin_description bdesc_multi_arg[] =
21390 {
21391 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
21392 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
21393 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
21394 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
21395 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
21396 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
21397 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
21398 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
21399 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
21400 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
21401 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
21402 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
21403 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
21404 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
21405 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
21406 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
21407 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
21408 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
21409 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
21410 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
21411 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
21412 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
21413 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
21414 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
21415 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
21416 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
21417 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
21418 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
21419 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21420 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
21421 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
21422 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
21423 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21424 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21425 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21426 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21427 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21428 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
21429 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
21430 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
21431 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
21432 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
21433 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
21434 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
21435 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
21436 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
21437 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
21438 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
21439 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
21440 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
21441 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
21442 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
21443 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
21444 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
21445 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
21446 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
21447 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
21448 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
21449 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
21450 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
21451 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
21452 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
21453 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
21454 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
21455 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
21456 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
21457 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
21458 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
21459 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
21460 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
21461 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
21462 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
21463 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
21464 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
21465 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
21466
21467 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
21468 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21469 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21470 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
21471 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
21472 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
21473 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
21474 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21475 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21476 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21477 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21478 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21479 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21480 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21481 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21482 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21483
21484 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
21485 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21486 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21487 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
21488 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
21489 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
21490 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
21491 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21492 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21493 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21494 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21495 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21496 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21497 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21498 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21499 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21500
21501 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
21502 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21503 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21504 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
21505 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
21506 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
21507 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
21508 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21509 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21510 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21511 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21512 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21513 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21514 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21515 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21516 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21517
21518 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
21519 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21520 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21521 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
21522 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
21523 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
21524 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
21525 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21526 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21527 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21528 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21529 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21530 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21531 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21532 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21533 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21534
21535 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
21536 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21537 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21538 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
21539 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
21540 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
21541 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
21542
21543 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
21544 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21545 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21546 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
21547 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
21548 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
21549 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
21550
21551 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
21552 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21553 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21554 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
21555 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
21556 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
21557 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
21558
21559 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21560 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21561 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21562 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
21563 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
21564 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
21565 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
21566
21567 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
21568 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21569 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21570 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
21571 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
21572 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
21573 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
21574
21575 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
21576 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21577 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21578 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
21579 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
21580 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
21581 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
21582
21583 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
21584 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21585 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21586 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
21587 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
21588 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
21589 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
21590
21591 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21592 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21593 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21594 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
21595 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
21596 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
21597 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
21598
21599 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
21600 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
21601 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
21602 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
21603 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
21604 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
21605 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
21606 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
21607
21608 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21609 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21610 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21611 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21612 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21613 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21614 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21615 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21616
21617 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21618 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21619 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21620 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21621 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21622 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21623 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21624 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21625 };
21626
21627 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
21628 in the current target ISA to allow the user to compile particular modules
21629 with different target specific options that differ from the command line
21630 options. */
21631 static void
21632 ix86_init_mmx_sse_builtins (void)
21633 {
21634 const struct builtin_description * d;
21635 size_t i;
21636
21637 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
21638 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
21639 tree V1DI_type_node
21640 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
21641 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
21642 tree V2DI_type_node
21643 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
21644 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
21645 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
21646 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
21647 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
21648 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
21649 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
21650
21651 tree pchar_type_node = build_pointer_type (char_type_node);
21652 tree pcchar_type_node
21653 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
21654 tree pfloat_type_node = build_pointer_type (float_type_node);
21655 tree pcfloat_type_node
21656 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
21657 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
21658 tree pcv2sf_type_node
21659 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
21660 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
21661 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
21662
21663 /* Comparisons. */
21664 tree int_ftype_v4sf_v4sf
21665 = build_function_type_list (integer_type_node,
21666 V4SF_type_node, V4SF_type_node, NULL_TREE);
21667 tree v4si_ftype_v4sf_v4sf
21668 = build_function_type_list (V4SI_type_node,
21669 V4SF_type_node, V4SF_type_node, NULL_TREE);
21670 /* MMX/SSE/integer conversions. */
21671 tree int_ftype_v4sf
21672 = build_function_type_list (integer_type_node,
21673 V4SF_type_node, NULL_TREE);
21674 tree int64_ftype_v4sf
21675 = build_function_type_list (long_long_integer_type_node,
21676 V4SF_type_node, NULL_TREE);
21677 tree int_ftype_v8qi
21678 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
21679 tree v4sf_ftype_v4sf_int
21680 = build_function_type_list (V4SF_type_node,
21681 V4SF_type_node, integer_type_node, NULL_TREE);
21682 tree v4sf_ftype_v4sf_int64
21683 = build_function_type_list (V4SF_type_node,
21684 V4SF_type_node, long_long_integer_type_node,
21685 NULL_TREE);
21686 tree v4sf_ftype_v4sf_v2si
21687 = build_function_type_list (V4SF_type_node,
21688 V4SF_type_node, V2SI_type_node, NULL_TREE);
21689
21690 /* Miscellaneous. */
21691 tree v8qi_ftype_v4hi_v4hi
21692 = build_function_type_list (V8QI_type_node,
21693 V4HI_type_node, V4HI_type_node, NULL_TREE);
21694 tree v4hi_ftype_v2si_v2si
21695 = build_function_type_list (V4HI_type_node,
21696 V2SI_type_node, V2SI_type_node, NULL_TREE);
21697 tree v4sf_ftype_v4sf_v4sf_int
21698 = build_function_type_list (V4SF_type_node,
21699 V4SF_type_node, V4SF_type_node,
21700 integer_type_node, NULL_TREE);
21701 tree v2si_ftype_v4hi_v4hi
21702 = build_function_type_list (V2SI_type_node,
21703 V4HI_type_node, V4HI_type_node, NULL_TREE);
21704 tree v4hi_ftype_v4hi_int
21705 = build_function_type_list (V4HI_type_node,
21706 V4HI_type_node, integer_type_node, NULL_TREE);
21707 tree v2si_ftype_v2si_int
21708 = build_function_type_list (V2SI_type_node,
21709 V2SI_type_node, integer_type_node, NULL_TREE);
21710 tree v1di_ftype_v1di_int
21711 = build_function_type_list (V1DI_type_node,
21712 V1DI_type_node, integer_type_node, NULL_TREE);
21713
21714 tree void_ftype_void
21715 = build_function_type (void_type_node, void_list_node);
21716 tree void_ftype_unsigned
21717 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
21718 tree void_ftype_unsigned_unsigned
21719 = build_function_type_list (void_type_node, unsigned_type_node,
21720 unsigned_type_node, NULL_TREE);
21721 tree void_ftype_pcvoid_unsigned_unsigned
21722 = build_function_type_list (void_type_node, const_ptr_type_node,
21723 unsigned_type_node, unsigned_type_node,
21724 NULL_TREE);
21725 tree unsigned_ftype_void
21726 = build_function_type (unsigned_type_node, void_list_node);
21727 tree v2si_ftype_v4sf
21728 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
21729 /* Loads/stores. */
21730 tree void_ftype_v8qi_v8qi_pchar
21731 = build_function_type_list (void_type_node,
21732 V8QI_type_node, V8QI_type_node,
21733 pchar_type_node, NULL_TREE);
21734 tree v4sf_ftype_pcfloat
21735 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
21736 tree v4sf_ftype_v4sf_pcv2sf
21737 = build_function_type_list (V4SF_type_node,
21738 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
21739 tree void_ftype_pv2sf_v4sf
21740 = build_function_type_list (void_type_node,
21741 pv2sf_type_node, V4SF_type_node, NULL_TREE);
21742 tree void_ftype_pfloat_v4sf
21743 = build_function_type_list (void_type_node,
21744 pfloat_type_node, V4SF_type_node, NULL_TREE);
21745 tree void_ftype_pdi_di
21746 = build_function_type_list (void_type_node,
21747 pdi_type_node, long_long_unsigned_type_node,
21748 NULL_TREE);
21749 tree void_ftype_pv2di_v2di
21750 = build_function_type_list (void_type_node,
21751 pv2di_type_node, V2DI_type_node, NULL_TREE);
21752 /* Normal vector unops. */
21753 tree v4sf_ftype_v4sf
21754 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
21755 tree v16qi_ftype_v16qi
21756 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
21757 tree v8hi_ftype_v8hi
21758 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
21759 tree v4si_ftype_v4si
21760 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
21761 tree v8qi_ftype_v8qi
21762 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
21763 tree v4hi_ftype_v4hi
21764 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
21765
21766 /* Normal vector binops. */
21767 tree v4sf_ftype_v4sf_v4sf
21768 = build_function_type_list (V4SF_type_node,
21769 V4SF_type_node, V4SF_type_node, NULL_TREE);
21770 tree v8qi_ftype_v8qi_v8qi
21771 = build_function_type_list (V8QI_type_node,
21772 V8QI_type_node, V8QI_type_node, NULL_TREE);
21773 tree v4hi_ftype_v4hi_v4hi
21774 = build_function_type_list (V4HI_type_node,
21775 V4HI_type_node, V4HI_type_node, NULL_TREE);
21776 tree v2si_ftype_v2si_v2si
21777 = build_function_type_list (V2SI_type_node,
21778 V2SI_type_node, V2SI_type_node, NULL_TREE);
21779 tree v1di_ftype_v1di_v1di
21780 = build_function_type_list (V1DI_type_node,
21781 V1DI_type_node, V1DI_type_node, NULL_TREE);
21782 tree v1di_ftype_v1di_v1di_int
21783 = build_function_type_list (V1DI_type_node,
21784 V1DI_type_node, V1DI_type_node,
21785 integer_type_node, NULL_TREE);
21786 tree v2si_ftype_v2sf
21787 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
21788 tree v2sf_ftype_v2si
21789 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
21790 tree v2si_ftype_v2si
21791 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
21792 tree v2sf_ftype_v2sf
21793 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
21794 tree v2sf_ftype_v2sf_v2sf
21795 = build_function_type_list (V2SF_type_node,
21796 V2SF_type_node, V2SF_type_node, NULL_TREE);
21797 tree v2si_ftype_v2sf_v2sf
21798 = build_function_type_list (V2SI_type_node,
21799 V2SF_type_node, V2SF_type_node, NULL_TREE);
21800 tree pint_type_node = build_pointer_type (integer_type_node);
21801 tree pdouble_type_node = build_pointer_type (double_type_node);
21802 tree pcdouble_type_node = build_pointer_type (
21803 build_type_variant (double_type_node, 1, 0));
21804 tree int_ftype_v2df_v2df
21805 = build_function_type_list (integer_type_node,
21806 V2DF_type_node, V2DF_type_node, NULL_TREE);
21807
21808 tree void_ftype_pcvoid
21809 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
21810 tree v4sf_ftype_v4si
21811 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
21812 tree v4si_ftype_v4sf
21813 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
21814 tree v2df_ftype_v4si
21815 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
21816 tree v4si_ftype_v2df
21817 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
21818 tree v4si_ftype_v2df_v2df
21819 = build_function_type_list (V4SI_type_node,
21820 V2DF_type_node, V2DF_type_node, NULL_TREE);
21821 tree v2si_ftype_v2df
21822 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
21823 tree v4sf_ftype_v2df
21824 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
21825 tree v2df_ftype_v2si
21826 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
21827 tree v2df_ftype_v4sf
21828 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
21829 tree int_ftype_v2df
21830 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
21831 tree int64_ftype_v2df
21832 = build_function_type_list (long_long_integer_type_node,
21833 V2DF_type_node, NULL_TREE);
21834 tree v2df_ftype_v2df_int
21835 = build_function_type_list (V2DF_type_node,
21836 V2DF_type_node, integer_type_node, NULL_TREE);
21837 tree v2df_ftype_v2df_int64
21838 = build_function_type_list (V2DF_type_node,
21839 V2DF_type_node, long_long_integer_type_node,
21840 NULL_TREE);
21841 tree v4sf_ftype_v4sf_v2df
21842 = build_function_type_list (V4SF_type_node,
21843 V4SF_type_node, V2DF_type_node, NULL_TREE);
21844 tree v2df_ftype_v2df_v4sf
21845 = build_function_type_list (V2DF_type_node,
21846 V2DF_type_node, V4SF_type_node, NULL_TREE);
21847 tree v2df_ftype_v2df_v2df_int
21848 = build_function_type_list (V2DF_type_node,
21849 V2DF_type_node, V2DF_type_node,
21850 integer_type_node,
21851 NULL_TREE);
21852 tree v2df_ftype_v2df_pcdouble
21853 = build_function_type_list (V2DF_type_node,
21854 V2DF_type_node, pcdouble_type_node, NULL_TREE);
21855 tree void_ftype_pdouble_v2df
21856 = build_function_type_list (void_type_node,
21857 pdouble_type_node, V2DF_type_node, NULL_TREE);
21858 tree void_ftype_pint_int
21859 = build_function_type_list (void_type_node,
21860 pint_type_node, integer_type_node, NULL_TREE);
21861 tree void_ftype_v16qi_v16qi_pchar
21862 = build_function_type_list (void_type_node,
21863 V16QI_type_node, V16QI_type_node,
21864 pchar_type_node, NULL_TREE);
21865 tree v2df_ftype_pcdouble
21866 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
21867 tree v2df_ftype_v2df_v2df
21868 = build_function_type_list (V2DF_type_node,
21869 V2DF_type_node, V2DF_type_node, NULL_TREE);
21870 tree v16qi_ftype_v16qi_v16qi
21871 = build_function_type_list (V16QI_type_node,
21872 V16QI_type_node, V16QI_type_node, NULL_TREE);
21873 tree v8hi_ftype_v8hi_v8hi
21874 = build_function_type_list (V8HI_type_node,
21875 V8HI_type_node, V8HI_type_node, NULL_TREE);
21876 tree v4si_ftype_v4si_v4si
21877 = build_function_type_list (V4SI_type_node,
21878 V4SI_type_node, V4SI_type_node, NULL_TREE);
21879 tree v2di_ftype_v2di_v2di
21880 = build_function_type_list (V2DI_type_node,
21881 V2DI_type_node, V2DI_type_node, NULL_TREE);
21882 tree v2di_ftype_v2df_v2df
21883 = build_function_type_list (V2DI_type_node,
21884 V2DF_type_node, V2DF_type_node, NULL_TREE);
21885 tree v2df_ftype_v2df
21886 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
21887 tree v2di_ftype_v2di_int
21888 = build_function_type_list (V2DI_type_node,
21889 V2DI_type_node, integer_type_node, NULL_TREE);
21890 tree v2di_ftype_v2di_v2di_int
21891 = build_function_type_list (V2DI_type_node, V2DI_type_node,
21892 V2DI_type_node, integer_type_node, NULL_TREE);
21893 tree v4si_ftype_v4si_int
21894 = build_function_type_list (V4SI_type_node,
21895 V4SI_type_node, integer_type_node, NULL_TREE);
21896 tree v8hi_ftype_v8hi_int
21897 = build_function_type_list (V8HI_type_node,
21898 V8HI_type_node, integer_type_node, NULL_TREE);
21899 tree v4si_ftype_v8hi_v8hi
21900 = build_function_type_list (V4SI_type_node,
21901 V8HI_type_node, V8HI_type_node, NULL_TREE);
21902 tree v1di_ftype_v8qi_v8qi
21903 = build_function_type_list (V1DI_type_node,
21904 V8QI_type_node, V8QI_type_node, NULL_TREE);
21905 tree v1di_ftype_v2si_v2si
21906 = build_function_type_list (V1DI_type_node,
21907 V2SI_type_node, V2SI_type_node, NULL_TREE);
21908 tree v2di_ftype_v16qi_v16qi
21909 = build_function_type_list (V2DI_type_node,
21910 V16QI_type_node, V16QI_type_node, NULL_TREE);
21911 tree v2di_ftype_v4si_v4si
21912 = build_function_type_list (V2DI_type_node,
21913 V4SI_type_node, V4SI_type_node, NULL_TREE);
21914 tree int_ftype_v16qi
21915 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
21916 tree v16qi_ftype_pcchar
21917 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
21918 tree void_ftype_pchar_v16qi
21919 = build_function_type_list (void_type_node,
21920 pchar_type_node, V16QI_type_node, NULL_TREE);
21921
21922 tree v2di_ftype_v2di_unsigned_unsigned
21923 = build_function_type_list (V2DI_type_node, V2DI_type_node,
21924 unsigned_type_node, unsigned_type_node,
21925 NULL_TREE);
21926 tree v2di_ftype_v2di_v2di_unsigned_unsigned
21927 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
21928 unsigned_type_node, unsigned_type_node,
21929 NULL_TREE);
21930 tree v2di_ftype_v2di_v16qi
21931 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
21932 NULL_TREE);
21933 tree v2df_ftype_v2df_v2df_v2df
21934 = build_function_type_list (V2DF_type_node,
21935 V2DF_type_node, V2DF_type_node,
21936 V2DF_type_node, NULL_TREE);
21937 tree v4sf_ftype_v4sf_v4sf_v4sf
21938 = build_function_type_list (V4SF_type_node,
21939 V4SF_type_node, V4SF_type_node,
21940 V4SF_type_node, NULL_TREE);
21941 tree v8hi_ftype_v16qi
21942 = build_function_type_list (V8HI_type_node, V16QI_type_node,
21943 NULL_TREE);
21944 tree v4si_ftype_v16qi
21945 = build_function_type_list (V4SI_type_node, V16QI_type_node,
21946 NULL_TREE);
21947 tree v2di_ftype_v16qi
21948 = build_function_type_list (V2DI_type_node, V16QI_type_node,
21949 NULL_TREE);
21950 tree v4si_ftype_v8hi
21951 = build_function_type_list (V4SI_type_node, V8HI_type_node,
21952 NULL_TREE);
21953 tree v2di_ftype_v8hi
21954 = build_function_type_list (V2DI_type_node, V8HI_type_node,
21955 NULL_TREE);
21956 tree v2di_ftype_v4si
21957 = build_function_type_list (V2DI_type_node, V4SI_type_node,
21958 NULL_TREE);
21959 tree v2di_ftype_pv2di
21960 = build_function_type_list (V2DI_type_node, pv2di_type_node,
21961 NULL_TREE);
21962 tree v16qi_ftype_v16qi_v16qi_int
21963 = build_function_type_list (V16QI_type_node, V16QI_type_node,
21964 V16QI_type_node, integer_type_node,
21965 NULL_TREE);
21966 tree v16qi_ftype_v16qi_v16qi_v16qi
21967 = build_function_type_list (V16QI_type_node, V16QI_type_node,
21968 V16QI_type_node, V16QI_type_node,
21969 NULL_TREE);
21970 tree v8hi_ftype_v8hi_v8hi_int
21971 = build_function_type_list (V8HI_type_node, V8HI_type_node,
21972 V8HI_type_node, integer_type_node,
21973 NULL_TREE);
21974 tree v4si_ftype_v4si_v4si_int
21975 = build_function_type_list (V4SI_type_node, V4SI_type_node,
21976 V4SI_type_node, integer_type_node,
21977 NULL_TREE);
21978 tree int_ftype_v2di_v2di
21979 = build_function_type_list (integer_type_node,
21980 V2DI_type_node, V2DI_type_node,
21981 NULL_TREE);
21982 tree int_ftype_v16qi_int_v16qi_int_int
21983 = build_function_type_list (integer_type_node,
21984 V16QI_type_node,
21985 integer_type_node,
21986 V16QI_type_node,
21987 integer_type_node,
21988 integer_type_node,
21989 NULL_TREE);
21990 tree v16qi_ftype_v16qi_int_v16qi_int_int
21991 = build_function_type_list (V16QI_type_node,
21992 V16QI_type_node,
21993 integer_type_node,
21994 V16QI_type_node,
21995 integer_type_node,
21996 integer_type_node,
21997 NULL_TREE);
21998 tree int_ftype_v16qi_v16qi_int
21999 = build_function_type_list (integer_type_node,
22000 V16QI_type_node,
22001 V16QI_type_node,
22002 integer_type_node,
22003 NULL_TREE);
22004
22005 /* SSE5 instructions */
22006 tree v2di_ftype_v2di_v2di_v2di
22007 = build_function_type_list (V2DI_type_node,
22008 V2DI_type_node,
22009 V2DI_type_node,
22010 V2DI_type_node,
22011 NULL_TREE);
22012
22013 tree v4si_ftype_v4si_v4si_v4si
22014 = build_function_type_list (V4SI_type_node,
22015 V4SI_type_node,
22016 V4SI_type_node,
22017 V4SI_type_node,
22018 NULL_TREE);
22019
22020 tree v4si_ftype_v4si_v4si_v2di
22021 = build_function_type_list (V4SI_type_node,
22022 V4SI_type_node,
22023 V4SI_type_node,
22024 V2DI_type_node,
22025 NULL_TREE);
22026
22027 tree v8hi_ftype_v8hi_v8hi_v8hi
22028 = build_function_type_list (V8HI_type_node,
22029 V8HI_type_node,
22030 V8HI_type_node,
22031 V8HI_type_node,
22032 NULL_TREE);
22033
22034 tree v8hi_ftype_v8hi_v8hi_v4si
22035 = build_function_type_list (V8HI_type_node,
22036 V8HI_type_node,
22037 V8HI_type_node,
22038 V4SI_type_node,
22039 NULL_TREE);
22040
22041 tree v2df_ftype_v2df_v2df_v16qi
22042 = build_function_type_list (V2DF_type_node,
22043 V2DF_type_node,
22044 V2DF_type_node,
22045 V16QI_type_node,
22046 NULL_TREE);
22047
22048 tree v4sf_ftype_v4sf_v4sf_v16qi
22049 = build_function_type_list (V4SF_type_node,
22050 V4SF_type_node,
22051 V4SF_type_node,
22052 V16QI_type_node,
22053 NULL_TREE);
22054
22055 tree v2di_ftype_v2di_si
22056 = build_function_type_list (V2DI_type_node,
22057 V2DI_type_node,
22058 integer_type_node,
22059 NULL_TREE);
22060
22061 tree v4si_ftype_v4si_si
22062 = build_function_type_list (V4SI_type_node,
22063 V4SI_type_node,
22064 integer_type_node,
22065 NULL_TREE);
22066
22067 tree v8hi_ftype_v8hi_si
22068 = build_function_type_list (V8HI_type_node,
22069 V8HI_type_node,
22070 integer_type_node,
22071 NULL_TREE);
22072
22073 tree v16qi_ftype_v16qi_si
22074 = build_function_type_list (V16QI_type_node,
22075 V16QI_type_node,
22076 integer_type_node,
22077 NULL_TREE);
22078 tree v4sf_ftype_v4hi
22079 = build_function_type_list (V4SF_type_node,
22080 V4HI_type_node,
22081 NULL_TREE);
22082
22083 tree v4hi_ftype_v4sf
22084 = build_function_type_list (V4HI_type_node,
22085 V4SF_type_node,
22086 NULL_TREE);
22087
22088 tree v2di_ftype_v2di
22089 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22090
22091 tree v16qi_ftype_v8hi_v8hi
22092 = build_function_type_list (V16QI_type_node,
22093 V8HI_type_node, V8HI_type_node,
22094 NULL_TREE);
22095 tree v8hi_ftype_v4si_v4si
22096 = build_function_type_list (V8HI_type_node,
22097 V4SI_type_node, V4SI_type_node,
22098 NULL_TREE);
22099 tree v8hi_ftype_v16qi_v16qi
22100 = build_function_type_list (V8HI_type_node,
22101 V16QI_type_node, V16QI_type_node,
22102 NULL_TREE);
22103 tree v4hi_ftype_v8qi_v8qi
22104 = build_function_type_list (V4HI_type_node,
22105 V8QI_type_node, V8QI_type_node,
22106 NULL_TREE);
22107 tree unsigned_ftype_unsigned_uchar
22108 = build_function_type_list (unsigned_type_node,
22109 unsigned_type_node,
22110 unsigned_char_type_node,
22111 NULL_TREE);
22112 tree unsigned_ftype_unsigned_ushort
22113 = build_function_type_list (unsigned_type_node,
22114 unsigned_type_node,
22115 short_unsigned_type_node,
22116 NULL_TREE);
22117 tree unsigned_ftype_unsigned_unsigned
22118 = build_function_type_list (unsigned_type_node,
22119 unsigned_type_node,
22120 unsigned_type_node,
22121 NULL_TREE);
22122 tree uint64_ftype_uint64_uint64
22123 = build_function_type_list (long_long_unsigned_type_node,
22124 long_long_unsigned_type_node,
22125 long_long_unsigned_type_node,
22126 NULL_TREE);
22127 tree float_ftype_float
22128 = build_function_type_list (float_type_node,
22129 float_type_node,
22130 NULL_TREE);
22131
22132 /* AVX builtins */
22133 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22134 V32QImode);
22135 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22136 V8SImode);
22137 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22138 V8SFmode);
22139 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22140 V4DImode);
22141 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22142 V4DFmode);
22143 tree v8sf_ftype_v8sf
22144 = build_function_type_list (V8SF_type_node,
22145 V8SF_type_node,
22146 NULL_TREE);
22147 tree v8si_ftype_v8sf
22148 = build_function_type_list (V8SI_type_node,
22149 V8SF_type_node,
22150 NULL_TREE);
22151 tree v8sf_ftype_v8si
22152 = build_function_type_list (V8SF_type_node,
22153 V8SI_type_node,
22154 NULL_TREE);
22155 tree v4si_ftype_v4df
22156 = build_function_type_list (V4SI_type_node,
22157 V4DF_type_node,
22158 NULL_TREE);
22159 tree v4df_ftype_v4df
22160 = build_function_type_list (V4DF_type_node,
22161 V4DF_type_node,
22162 NULL_TREE);
22163 tree v4df_ftype_v4si
22164 = build_function_type_list (V4DF_type_node,
22165 V4SI_type_node,
22166 NULL_TREE);
22167 tree v4df_ftype_v4sf
22168 = build_function_type_list (V4DF_type_node,
22169 V4SF_type_node,
22170 NULL_TREE);
22171 tree v4sf_ftype_v4df
22172 = build_function_type_list (V4SF_type_node,
22173 V4DF_type_node,
22174 NULL_TREE);
22175 tree v8sf_ftype_v8sf_v8sf
22176 = build_function_type_list (V8SF_type_node,
22177 V8SF_type_node, V8SF_type_node,
22178 NULL_TREE);
22179 tree v4df_ftype_v4df_v4df
22180 = build_function_type_list (V4DF_type_node,
22181 V4DF_type_node, V4DF_type_node,
22182 NULL_TREE);
22183 tree v8sf_ftype_v8sf_int
22184 = build_function_type_list (V8SF_type_node,
22185 V8SF_type_node, integer_type_node,
22186 NULL_TREE);
22187 tree v4si_ftype_v8si_int
22188 = build_function_type_list (V4SI_type_node,
22189 V8SI_type_node, integer_type_node,
22190 NULL_TREE);
22191 tree v4df_ftype_v4df_int
22192 = build_function_type_list (V4DF_type_node,
22193 V4DF_type_node, integer_type_node,
22194 NULL_TREE);
22195 tree v4sf_ftype_v8sf_int
22196 = build_function_type_list (V4SF_type_node,
22197 V8SF_type_node, integer_type_node,
22198 NULL_TREE);
22199 tree v2df_ftype_v4df_int
22200 = build_function_type_list (V2DF_type_node,
22201 V4DF_type_node, integer_type_node,
22202 NULL_TREE);
22203 tree v8sf_ftype_v8sf_v8sf_int
22204 = build_function_type_list (V8SF_type_node,
22205 V8SF_type_node, V8SF_type_node,
22206 integer_type_node,
22207 NULL_TREE);
22208 tree v8sf_ftype_v8sf_v8sf_v8sf
22209 = build_function_type_list (V8SF_type_node,
22210 V8SF_type_node, V8SF_type_node,
22211 V8SF_type_node,
22212 NULL_TREE);
22213 tree v4df_ftype_v4df_v4df_v4df
22214 = build_function_type_list (V4DF_type_node,
22215 V4DF_type_node, V4DF_type_node,
22216 V4DF_type_node,
22217 NULL_TREE);
22218 tree v8si_ftype_v8si_v8si_int
22219 = build_function_type_list (V8SI_type_node,
22220 V8SI_type_node, V8SI_type_node,
22221 integer_type_node,
22222 NULL_TREE);
22223 tree v4df_ftype_v4df_v4df_int
22224 = build_function_type_list (V4DF_type_node,
22225 V4DF_type_node, V4DF_type_node,
22226 integer_type_node,
22227 NULL_TREE);
22228 tree v8sf_ftype_pcfloat
22229 = build_function_type_list (V8SF_type_node,
22230 pcfloat_type_node,
22231 NULL_TREE);
22232 tree v4df_ftype_pcdouble
22233 = build_function_type_list (V4DF_type_node,
22234 pcdouble_type_node,
22235 NULL_TREE);
22236 tree pcv4sf_type_node
22237 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22238 tree pcv2df_type_node
22239 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22240 tree v8sf_ftype_pcv4sf
22241 = build_function_type_list (V8SF_type_node,
22242 pcv4sf_type_node,
22243 NULL_TREE);
22244 tree v4df_ftype_pcv2df
22245 = build_function_type_list (V4DF_type_node,
22246 pcv2df_type_node,
22247 NULL_TREE);
22248 tree v32qi_ftype_pcchar
22249 = build_function_type_list (V32QI_type_node,
22250 pcchar_type_node,
22251 NULL_TREE);
22252 tree void_ftype_pchar_v32qi
22253 = build_function_type_list (void_type_node,
22254 pchar_type_node, V32QI_type_node,
22255 NULL_TREE);
22256 tree v8si_ftype_v8si_v4si_int
22257 = build_function_type_list (V8SI_type_node,
22258 V8SI_type_node, V4SI_type_node,
22259 integer_type_node,
22260 NULL_TREE);
22261 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22262 tree void_ftype_pv4di_v4di
22263 = build_function_type_list (void_type_node,
22264 pv4di_type_node, V4DI_type_node,
22265 NULL_TREE);
22266 tree v8sf_ftype_v8sf_v4sf_int
22267 = build_function_type_list (V8SF_type_node,
22268 V8SF_type_node, V4SF_type_node,
22269 integer_type_node,
22270 NULL_TREE);
22271 tree v4df_ftype_v4df_v2df_int
22272 = build_function_type_list (V4DF_type_node,
22273 V4DF_type_node, V2DF_type_node,
22274 integer_type_node,
22275 NULL_TREE);
22276 tree void_ftype_pfloat_v8sf
22277 = build_function_type_list (void_type_node,
22278 pfloat_type_node, V8SF_type_node,
22279 NULL_TREE);
22280 tree void_ftype_pdouble_v4df
22281 = build_function_type_list (void_type_node,
22282 pdouble_type_node, V4DF_type_node,
22283 NULL_TREE);
22284 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22285 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22286 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22287 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22288 tree pcv8sf_type_node
22289 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22290 tree pcv4df_type_node
22291 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22292 tree v8sf_ftype_pcv8sf_v8sf
22293 = build_function_type_list (V8SF_type_node,
22294 pcv8sf_type_node, V8SF_type_node,
22295 NULL_TREE);
22296 tree v4df_ftype_pcv4df_v4df
22297 = build_function_type_list (V4DF_type_node,
22298 pcv4df_type_node, V4DF_type_node,
22299 NULL_TREE);
22300 tree v4sf_ftype_pcv4sf_v4sf
22301 = build_function_type_list (V4SF_type_node,
22302 pcv4sf_type_node, V4SF_type_node,
22303 NULL_TREE);
22304 tree v2df_ftype_pcv2df_v2df
22305 = build_function_type_list (V2DF_type_node,
22306 pcv2df_type_node, V2DF_type_node,
22307 NULL_TREE);
22308 tree void_ftype_pv8sf_v8sf_v8sf
22309 = build_function_type_list (void_type_node,
22310 pv8sf_type_node, V8SF_type_node,
22311 V8SF_type_node,
22312 NULL_TREE);
22313 tree void_ftype_pv4df_v4df_v4df
22314 = build_function_type_list (void_type_node,
22315 pv4df_type_node, V4DF_type_node,
22316 V4DF_type_node,
22317 NULL_TREE);
22318 tree void_ftype_pv4sf_v4sf_v4sf
22319 = build_function_type_list (void_type_node,
22320 pv4sf_type_node, V4SF_type_node,
22321 V4SF_type_node,
22322 NULL_TREE);
22323 tree void_ftype_pv2df_v2df_v2df
22324 = build_function_type_list (void_type_node,
22325 pv2df_type_node, V2DF_type_node,
22326 V2DF_type_node,
22327 NULL_TREE);
22328 tree v4df_ftype_v2df
22329 = build_function_type_list (V4DF_type_node,
22330 V2DF_type_node,
22331 NULL_TREE);
22332 tree v8sf_ftype_v4sf
22333 = build_function_type_list (V8SF_type_node,
22334 V4SF_type_node,
22335 NULL_TREE);
22336 tree v8si_ftype_v4si
22337 = build_function_type_list (V8SI_type_node,
22338 V4SI_type_node,
22339 NULL_TREE);
22340 tree v2df_ftype_v4df
22341 = build_function_type_list (V2DF_type_node,
22342 V4DF_type_node,
22343 NULL_TREE);
22344 tree v4sf_ftype_v8sf
22345 = build_function_type_list (V4SF_type_node,
22346 V8SF_type_node,
22347 NULL_TREE);
22348 tree v4si_ftype_v8si
22349 = build_function_type_list (V4SI_type_node,
22350 V8SI_type_node,
22351 NULL_TREE);
22352 tree int_ftype_v4df
22353 = build_function_type_list (integer_type_node,
22354 V4DF_type_node,
22355 NULL_TREE);
22356 tree int_ftype_v8sf
22357 = build_function_type_list (integer_type_node,
22358 V8SF_type_node,
22359 NULL_TREE);
22360 tree int_ftype_v8sf_v8sf
22361 = build_function_type_list (integer_type_node,
22362 V8SF_type_node, V8SF_type_node,
22363 NULL_TREE);
22364 tree int_ftype_v4di_v4di
22365 = build_function_type_list (integer_type_node,
22366 V4DI_type_node, V4DI_type_node,
22367 NULL_TREE);
22368 tree int_ftype_v4df_v4df
22369 = build_function_type_list (integer_type_node,
22370 V4DF_type_node, V4DF_type_node,
22371 NULL_TREE);
22372 tree v8sf_ftype_v8sf_v8si
22373 = build_function_type_list (V8SF_type_node,
22374 V8SF_type_node, V8SI_type_node,
22375 NULL_TREE);
22376 tree v4df_ftype_v4df_v4di
22377 = build_function_type_list (V4DF_type_node,
22378 V4DF_type_node, V4DI_type_node,
22379 NULL_TREE);
22380 tree v4sf_ftype_v4sf_v4si
22381 = build_function_type_list (V4SF_type_node,
22382 V4SF_type_node, V4SI_type_node, NULL_TREE);
22383 tree v2df_ftype_v2df_v2di
22384 = build_function_type_list (V2DF_type_node,
22385 V2DF_type_node, V2DI_type_node, NULL_TREE);
22386
22387 tree ftype;
22388
22389 /* Add all special builtins with variable number of operands. */
22390 for (i = 0, d = bdesc_special_args;
22391 i < ARRAY_SIZE (bdesc_special_args);
22392 i++, d++)
22393 {
22394 tree type;
22395
22396 if (d->name == 0)
22397 continue;
22398
22399 switch ((enum ix86_special_builtin_type) d->flag)
22400 {
22401 case VOID_FTYPE_VOID:
22402 type = void_ftype_void;
22403 break;
22404 case V32QI_FTYPE_PCCHAR:
22405 type = v32qi_ftype_pcchar;
22406 break;
22407 case V16QI_FTYPE_PCCHAR:
22408 type = v16qi_ftype_pcchar;
22409 break;
22410 case V8SF_FTYPE_PCV4SF:
22411 type = v8sf_ftype_pcv4sf;
22412 break;
22413 case V8SF_FTYPE_PCFLOAT:
22414 type = v8sf_ftype_pcfloat;
22415 break;
22416 case V4DF_FTYPE_PCV2DF:
22417 type = v4df_ftype_pcv2df;
22418 break;
22419 case V4DF_FTYPE_PCDOUBLE:
22420 type = v4df_ftype_pcdouble;
22421 break;
22422 case V4SF_FTYPE_PCFLOAT:
22423 type = v4sf_ftype_pcfloat;
22424 break;
22425 case V2DI_FTYPE_PV2DI:
22426 type = v2di_ftype_pv2di;
22427 break;
22428 case V2DF_FTYPE_PCDOUBLE:
22429 type = v2df_ftype_pcdouble;
22430 break;
22431 case V8SF_FTYPE_PCV8SF_V8SF:
22432 type = v8sf_ftype_pcv8sf_v8sf;
22433 break;
22434 case V4DF_FTYPE_PCV4DF_V4DF:
22435 type = v4df_ftype_pcv4df_v4df;
22436 break;
22437 case V4SF_FTYPE_V4SF_PCV2SF:
22438 type = v4sf_ftype_v4sf_pcv2sf;
22439 break;
22440 case V4SF_FTYPE_PCV4SF_V4SF:
22441 type = v4sf_ftype_pcv4sf_v4sf;
22442 break;
22443 case V2DF_FTYPE_V2DF_PCDOUBLE:
22444 type = v2df_ftype_v2df_pcdouble;
22445 break;
22446 case V2DF_FTYPE_PCV2DF_V2DF:
22447 type = v2df_ftype_pcv2df_v2df;
22448 break;
22449 case VOID_FTYPE_PV2SF_V4SF:
22450 type = void_ftype_pv2sf_v4sf;
22451 break;
22452 case VOID_FTYPE_PV4DI_V4DI:
22453 type = void_ftype_pv4di_v4di;
22454 break;
22455 case VOID_FTYPE_PV2DI_V2DI:
22456 type = void_ftype_pv2di_v2di;
22457 break;
22458 case VOID_FTYPE_PCHAR_V32QI:
22459 type = void_ftype_pchar_v32qi;
22460 break;
22461 case VOID_FTYPE_PCHAR_V16QI:
22462 type = void_ftype_pchar_v16qi;
22463 break;
22464 case VOID_FTYPE_PFLOAT_V8SF:
22465 type = void_ftype_pfloat_v8sf;
22466 break;
22467 case VOID_FTYPE_PFLOAT_V4SF:
22468 type = void_ftype_pfloat_v4sf;
22469 break;
22470 case VOID_FTYPE_PDOUBLE_V4DF:
22471 type = void_ftype_pdouble_v4df;
22472 break;
22473 case VOID_FTYPE_PDOUBLE_V2DF:
22474 type = void_ftype_pdouble_v2df;
22475 break;
22476 case VOID_FTYPE_PDI_DI:
22477 type = void_ftype_pdi_di;
22478 break;
22479 case VOID_FTYPE_PINT_INT:
22480 type = void_ftype_pint_int;
22481 break;
22482 case VOID_FTYPE_PV8SF_V8SF_V8SF:
22483 type = void_ftype_pv8sf_v8sf_v8sf;
22484 break;
22485 case VOID_FTYPE_PV4DF_V4DF_V4DF:
22486 type = void_ftype_pv4df_v4df_v4df;
22487 break;
22488 case VOID_FTYPE_PV4SF_V4SF_V4SF:
22489 type = void_ftype_pv4sf_v4sf_v4sf;
22490 break;
22491 case VOID_FTYPE_PV2DF_V2DF_V2DF:
22492 type = void_ftype_pv2df_v2df_v2df;
22493 break;
22494 default:
22495 gcc_unreachable ();
22496 }
22497
22498 def_builtin (d->mask, d->name, type, d->code);
22499 }
22500
22501 /* Add all builtins with variable number of operands. */
22502 for (i = 0, d = bdesc_args;
22503 i < ARRAY_SIZE (bdesc_args);
22504 i++, d++)
22505 {
22506 tree type;
22507
22508 if (d->name == 0)
22509 continue;
22510
22511 switch ((enum ix86_builtin_type) d->flag)
22512 {
22513 case FLOAT_FTYPE_FLOAT:
22514 type = float_ftype_float;
22515 break;
22516 case INT_FTYPE_V8SF_V8SF_PTEST:
22517 type = int_ftype_v8sf_v8sf;
22518 break;
22519 case INT_FTYPE_V4DI_V4DI_PTEST:
22520 type = int_ftype_v4di_v4di;
22521 break;
22522 case INT_FTYPE_V4DF_V4DF_PTEST:
22523 type = int_ftype_v4df_v4df;
22524 break;
22525 case INT_FTYPE_V4SF_V4SF_PTEST:
22526 type = int_ftype_v4sf_v4sf;
22527 break;
22528 case INT_FTYPE_V2DI_V2DI_PTEST:
22529 type = int_ftype_v2di_v2di;
22530 break;
22531 case INT_FTYPE_V2DF_V2DF_PTEST:
22532 type = int_ftype_v2df_v2df;
22533 break;
22534 case INT64_FTYPE_V4SF:
22535 type = int64_ftype_v4sf;
22536 break;
22537 case INT64_FTYPE_V2DF:
22538 type = int64_ftype_v2df;
22539 break;
22540 case INT_FTYPE_V16QI:
22541 type = int_ftype_v16qi;
22542 break;
22543 case INT_FTYPE_V8QI:
22544 type = int_ftype_v8qi;
22545 break;
22546 case INT_FTYPE_V8SF:
22547 type = int_ftype_v8sf;
22548 break;
22549 case INT_FTYPE_V4DF:
22550 type = int_ftype_v4df;
22551 break;
22552 case INT_FTYPE_V4SF:
22553 type = int_ftype_v4sf;
22554 break;
22555 case INT_FTYPE_V2DF:
22556 type = int_ftype_v2df;
22557 break;
22558 case V16QI_FTYPE_V16QI:
22559 type = v16qi_ftype_v16qi;
22560 break;
22561 case V8SI_FTYPE_V8SF:
22562 type = v8si_ftype_v8sf;
22563 break;
22564 case V8SI_FTYPE_V4SI:
22565 type = v8si_ftype_v4si;
22566 break;
22567 case V8HI_FTYPE_V8HI:
22568 type = v8hi_ftype_v8hi;
22569 break;
22570 case V8HI_FTYPE_V16QI:
22571 type = v8hi_ftype_v16qi;
22572 break;
22573 case V8QI_FTYPE_V8QI:
22574 type = v8qi_ftype_v8qi;
22575 break;
22576 case V8SF_FTYPE_V8SF:
22577 type = v8sf_ftype_v8sf;
22578 break;
22579 case V8SF_FTYPE_V8SI:
22580 type = v8sf_ftype_v8si;
22581 break;
22582 case V8SF_FTYPE_V4SF:
22583 type = v8sf_ftype_v4sf;
22584 break;
22585 case V4SI_FTYPE_V4DF:
22586 type = v4si_ftype_v4df;
22587 break;
22588 case V4SI_FTYPE_V4SI:
22589 type = v4si_ftype_v4si;
22590 break;
22591 case V4SI_FTYPE_V16QI:
22592 type = v4si_ftype_v16qi;
22593 break;
22594 case V4SI_FTYPE_V8SI:
22595 type = v4si_ftype_v8si;
22596 break;
22597 case V4SI_FTYPE_V8HI:
22598 type = v4si_ftype_v8hi;
22599 break;
22600 case V4SI_FTYPE_V4SF:
22601 type = v4si_ftype_v4sf;
22602 break;
22603 case V4SI_FTYPE_V2DF:
22604 type = v4si_ftype_v2df;
22605 break;
22606 case V4HI_FTYPE_V4HI:
22607 type = v4hi_ftype_v4hi;
22608 break;
22609 case V4DF_FTYPE_V4DF:
22610 type = v4df_ftype_v4df;
22611 break;
22612 case V4DF_FTYPE_V4SI:
22613 type = v4df_ftype_v4si;
22614 break;
22615 case V4DF_FTYPE_V4SF:
22616 type = v4df_ftype_v4sf;
22617 break;
22618 case V4DF_FTYPE_V2DF:
22619 type = v4df_ftype_v2df;
22620 break;
22621 case V4SF_FTYPE_V4SF:
22622 case V4SF_FTYPE_V4SF_VEC_MERGE:
22623 type = v4sf_ftype_v4sf;
22624 break;
22625 case V4SF_FTYPE_V8SF:
22626 type = v4sf_ftype_v8sf;
22627 break;
22628 case V4SF_FTYPE_V4SI:
22629 type = v4sf_ftype_v4si;
22630 break;
22631 case V4SF_FTYPE_V4DF:
22632 type = v4sf_ftype_v4df;
22633 break;
22634 case V4SF_FTYPE_V2DF:
22635 type = v4sf_ftype_v2df;
22636 break;
22637 case V2DI_FTYPE_V2DI:
22638 type = v2di_ftype_v2di;
22639 break;
22640 case V2DI_FTYPE_V16QI:
22641 type = v2di_ftype_v16qi;
22642 break;
22643 case V2DI_FTYPE_V8HI:
22644 type = v2di_ftype_v8hi;
22645 break;
22646 case V2DI_FTYPE_V4SI:
22647 type = v2di_ftype_v4si;
22648 break;
22649 case V2SI_FTYPE_V2SI:
22650 type = v2si_ftype_v2si;
22651 break;
22652 case V2SI_FTYPE_V4SF:
22653 type = v2si_ftype_v4sf;
22654 break;
22655 case V2SI_FTYPE_V2DF:
22656 type = v2si_ftype_v2df;
22657 break;
22658 case V2SI_FTYPE_V2SF:
22659 type = v2si_ftype_v2sf;
22660 break;
22661 case V2DF_FTYPE_V4DF:
22662 type = v2df_ftype_v4df;
22663 break;
22664 case V2DF_FTYPE_V4SF:
22665 type = v2df_ftype_v4sf;
22666 break;
22667 case V2DF_FTYPE_V2DF:
22668 case V2DF_FTYPE_V2DF_VEC_MERGE:
22669 type = v2df_ftype_v2df;
22670 break;
22671 case V2DF_FTYPE_V2SI:
22672 type = v2df_ftype_v2si;
22673 break;
22674 case V2DF_FTYPE_V4SI:
22675 type = v2df_ftype_v4si;
22676 break;
22677 case V2SF_FTYPE_V2SF:
22678 type = v2sf_ftype_v2sf;
22679 break;
22680 case V2SF_FTYPE_V2SI:
22681 type = v2sf_ftype_v2si;
22682 break;
22683 case V16QI_FTYPE_V16QI_V16QI:
22684 type = v16qi_ftype_v16qi_v16qi;
22685 break;
22686 case V16QI_FTYPE_V8HI_V8HI:
22687 type = v16qi_ftype_v8hi_v8hi;
22688 break;
22689 case V8QI_FTYPE_V8QI_V8QI:
22690 type = v8qi_ftype_v8qi_v8qi;
22691 break;
22692 case V8QI_FTYPE_V4HI_V4HI:
22693 type = v8qi_ftype_v4hi_v4hi;
22694 break;
22695 case V8HI_FTYPE_V8HI_V8HI:
22696 case V8HI_FTYPE_V8HI_V8HI_COUNT:
22697 type = v8hi_ftype_v8hi_v8hi;
22698 break;
22699 case V8HI_FTYPE_V16QI_V16QI:
22700 type = v8hi_ftype_v16qi_v16qi;
22701 break;
22702 case V8HI_FTYPE_V4SI_V4SI:
22703 type = v8hi_ftype_v4si_v4si;
22704 break;
22705 case V8HI_FTYPE_V8HI_SI_COUNT:
22706 type = v8hi_ftype_v8hi_int;
22707 break;
22708 case V8SF_FTYPE_V8SF_V8SF:
22709 type = v8sf_ftype_v8sf_v8sf;
22710 break;
22711 case V8SF_FTYPE_V8SF_V8SI:
22712 type = v8sf_ftype_v8sf_v8si;
22713 break;
22714 case V4SI_FTYPE_V4SI_V4SI:
22715 case V4SI_FTYPE_V4SI_V4SI_COUNT:
22716 type = v4si_ftype_v4si_v4si;
22717 break;
22718 case V4SI_FTYPE_V8HI_V8HI:
22719 type = v4si_ftype_v8hi_v8hi;
22720 break;
22721 case V4SI_FTYPE_V4SF_V4SF:
22722 type = v4si_ftype_v4sf_v4sf;
22723 break;
22724 case V4SI_FTYPE_V2DF_V2DF:
22725 type = v4si_ftype_v2df_v2df;
22726 break;
22727 case V4SI_FTYPE_V4SI_SI_COUNT:
22728 type = v4si_ftype_v4si_int;
22729 break;
22730 case V4HI_FTYPE_V4HI_V4HI:
22731 case V4HI_FTYPE_V4HI_V4HI_COUNT:
22732 type = v4hi_ftype_v4hi_v4hi;
22733 break;
22734 case V4HI_FTYPE_V8QI_V8QI:
22735 type = v4hi_ftype_v8qi_v8qi;
22736 break;
22737 case V4HI_FTYPE_V2SI_V2SI:
22738 type = v4hi_ftype_v2si_v2si;
22739 break;
22740 case V4HI_FTYPE_V4HI_SI_COUNT:
22741 type = v4hi_ftype_v4hi_int;
22742 break;
22743 case V4DF_FTYPE_V4DF_V4DF:
22744 type = v4df_ftype_v4df_v4df;
22745 break;
22746 case V4DF_FTYPE_V4DF_V4DI:
22747 type = v4df_ftype_v4df_v4di;
22748 break;
22749 case V4SF_FTYPE_V4SF_V4SF:
22750 case V4SF_FTYPE_V4SF_V4SF_SWAP:
22751 type = v4sf_ftype_v4sf_v4sf;
22752 break;
22753 case V4SF_FTYPE_V4SF_V4SI:
22754 type = v4sf_ftype_v4sf_v4si;
22755 break;
22756 case V4SF_FTYPE_V4SF_V2SI:
22757 type = v4sf_ftype_v4sf_v2si;
22758 break;
22759 case V4SF_FTYPE_V4SF_V2DF:
22760 type = v4sf_ftype_v4sf_v2df;
22761 break;
22762 case V4SF_FTYPE_V4SF_DI:
22763 type = v4sf_ftype_v4sf_int64;
22764 break;
22765 case V4SF_FTYPE_V4SF_SI:
22766 type = v4sf_ftype_v4sf_int;
22767 break;
22768 case V2DI_FTYPE_V2DI_V2DI:
22769 case V2DI_FTYPE_V2DI_V2DI_COUNT:
22770 type = v2di_ftype_v2di_v2di;
22771 break;
22772 case V2DI_FTYPE_V16QI_V16QI:
22773 type = v2di_ftype_v16qi_v16qi;
22774 break;
22775 case V2DI_FTYPE_V4SI_V4SI:
22776 type = v2di_ftype_v4si_v4si;
22777 break;
22778 case V2DI_FTYPE_V2DI_V16QI:
22779 type = v2di_ftype_v2di_v16qi;
22780 break;
22781 case V2DI_FTYPE_V2DF_V2DF:
22782 type = v2di_ftype_v2df_v2df;
22783 break;
22784 case V2DI_FTYPE_V2DI_SI_COUNT:
22785 type = v2di_ftype_v2di_int;
22786 break;
22787 case V2SI_FTYPE_V2SI_V2SI:
22788 case V2SI_FTYPE_V2SI_V2SI_COUNT:
22789 type = v2si_ftype_v2si_v2si;
22790 break;
22791 case V2SI_FTYPE_V4HI_V4HI:
22792 type = v2si_ftype_v4hi_v4hi;
22793 break;
22794 case V2SI_FTYPE_V2SF_V2SF:
22795 type = v2si_ftype_v2sf_v2sf;
22796 break;
22797 case V2SI_FTYPE_V2SI_SI_COUNT:
22798 type = v2si_ftype_v2si_int;
22799 break;
22800 case V2DF_FTYPE_V2DF_V2DF:
22801 case V2DF_FTYPE_V2DF_V2DF_SWAP:
22802 type = v2df_ftype_v2df_v2df;
22803 break;
22804 case V2DF_FTYPE_V2DF_V4SF:
22805 type = v2df_ftype_v2df_v4sf;
22806 break;
22807 case V2DF_FTYPE_V2DF_V2DI:
22808 type = v2df_ftype_v2df_v2di;
22809 break;
22810 case V2DF_FTYPE_V2DF_DI:
22811 type = v2df_ftype_v2df_int64;
22812 break;
22813 case V2DF_FTYPE_V2DF_SI:
22814 type = v2df_ftype_v2df_int;
22815 break;
22816 case V2SF_FTYPE_V2SF_V2SF:
22817 type = v2sf_ftype_v2sf_v2sf;
22818 break;
22819 case V1DI_FTYPE_V1DI_V1DI:
22820 case V1DI_FTYPE_V1DI_V1DI_COUNT:
22821 type = v1di_ftype_v1di_v1di;
22822 break;
22823 case V1DI_FTYPE_V8QI_V8QI:
22824 type = v1di_ftype_v8qi_v8qi;
22825 break;
22826 case V1DI_FTYPE_V2SI_V2SI:
22827 type = v1di_ftype_v2si_v2si;
22828 break;
22829 case V1DI_FTYPE_V1DI_SI_COUNT:
22830 type = v1di_ftype_v1di_int;
22831 break;
22832 case UINT64_FTYPE_UINT64_UINT64:
22833 type = uint64_ftype_uint64_uint64;
22834 break;
22835 case UINT_FTYPE_UINT_UINT:
22836 type = unsigned_ftype_unsigned_unsigned;
22837 break;
22838 case UINT_FTYPE_UINT_USHORT:
22839 type = unsigned_ftype_unsigned_ushort;
22840 break;
22841 case UINT_FTYPE_UINT_UCHAR:
22842 type = unsigned_ftype_unsigned_uchar;
22843 break;
22844 case V8HI_FTYPE_V8HI_INT:
22845 type = v8hi_ftype_v8hi_int;
22846 break;
22847 case V8SF_FTYPE_V8SF_INT:
22848 type = v8sf_ftype_v8sf_int;
22849 break;
22850 case V4SI_FTYPE_V4SI_INT:
22851 type = v4si_ftype_v4si_int;
22852 break;
22853 case V4SI_FTYPE_V8SI_INT:
22854 type = v4si_ftype_v8si_int;
22855 break;
22856 case V4HI_FTYPE_V4HI_INT:
22857 type = v4hi_ftype_v4hi_int;
22858 break;
22859 case V4DF_FTYPE_V4DF_INT:
22860 type = v4df_ftype_v4df_int;
22861 break;
22862 case V4SF_FTYPE_V4SF_INT:
22863 type = v4sf_ftype_v4sf_int;
22864 break;
22865 case V4SF_FTYPE_V8SF_INT:
22866 type = v4sf_ftype_v8sf_int;
22867 break;
22868 case V2DI_FTYPE_V2DI_INT:
22869 case V2DI2TI_FTYPE_V2DI_INT:
22870 type = v2di_ftype_v2di_int;
22871 break;
22872 case V2DF_FTYPE_V2DF_INT:
22873 type = v2df_ftype_v2df_int;
22874 break;
22875 case V2DF_FTYPE_V4DF_INT:
22876 type = v2df_ftype_v4df_int;
22877 break;
22878 case V16QI_FTYPE_V16QI_V16QI_V16QI:
22879 type = v16qi_ftype_v16qi_v16qi_v16qi;
22880 break;
22881 case V8SF_FTYPE_V8SF_V8SF_V8SF:
22882 type = v8sf_ftype_v8sf_v8sf_v8sf;
22883 break;
22884 case V4DF_FTYPE_V4DF_V4DF_V4DF:
22885 type = v4df_ftype_v4df_v4df_v4df;
22886 break;
22887 case V4SF_FTYPE_V4SF_V4SF_V4SF:
22888 type = v4sf_ftype_v4sf_v4sf_v4sf;
22889 break;
22890 case V2DF_FTYPE_V2DF_V2DF_V2DF:
22891 type = v2df_ftype_v2df_v2df_v2df;
22892 break;
22893 case V16QI_FTYPE_V16QI_V16QI_INT:
22894 type = v16qi_ftype_v16qi_v16qi_int;
22895 break;
22896 case V8SI_FTYPE_V8SI_V8SI_INT:
22897 type = v8si_ftype_v8si_v8si_int;
22898 break;
22899 case V8SI_FTYPE_V8SI_V4SI_INT:
22900 type = v8si_ftype_v8si_v4si_int;
22901 break;
22902 case V8HI_FTYPE_V8HI_V8HI_INT:
22903 type = v8hi_ftype_v8hi_v8hi_int;
22904 break;
22905 case V8SF_FTYPE_V8SF_V8SF_INT:
22906 type = v8sf_ftype_v8sf_v8sf_int;
22907 break;
22908 case V8SF_FTYPE_V8SF_V4SF_INT:
22909 type = v8sf_ftype_v8sf_v4sf_int;
22910 break;
22911 case V4SI_FTYPE_V4SI_V4SI_INT:
22912 type = v4si_ftype_v4si_v4si_int;
22913 break;
22914 case V4DF_FTYPE_V4DF_V4DF_INT:
22915 type = v4df_ftype_v4df_v4df_int;
22916 break;
22917 case V4DF_FTYPE_V4DF_V2DF_INT:
22918 type = v4df_ftype_v4df_v2df_int;
22919 break;
22920 case V4SF_FTYPE_V4SF_V4SF_INT:
22921 type = v4sf_ftype_v4sf_v4sf_int;
22922 break;
22923 case V2DI_FTYPE_V2DI_V2DI_INT:
22924 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
22925 type = v2di_ftype_v2di_v2di_int;
22926 break;
22927 case V2DF_FTYPE_V2DF_V2DF_INT:
22928 type = v2df_ftype_v2df_v2df_int;
22929 break;
22930 case V2DI_FTYPE_V2DI_UINT_UINT:
22931 type = v2di_ftype_v2di_unsigned_unsigned;
22932 break;
22933 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
22934 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
22935 break;
22936 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
22937 type = v1di_ftype_v1di_v1di_int;
22938 break;
22939 default:
22940 gcc_unreachable ();
22941 }
22942
22943 def_builtin_const (d->mask, d->name, type, d->code);
22944 }
22945
22946 /* pcmpestr[im] insns. */
22947 for (i = 0, d = bdesc_pcmpestr;
22948 i < ARRAY_SIZE (bdesc_pcmpestr);
22949 i++, d++)
22950 {
22951 if (d->code == IX86_BUILTIN_PCMPESTRM128)
22952 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
22953 else
22954 ftype = int_ftype_v16qi_int_v16qi_int_int;
22955 def_builtin_const (d->mask, d->name, ftype, d->code);
22956 }
22957
22958 /* pcmpistr[im] insns. */
22959 for (i = 0, d = bdesc_pcmpistr;
22960 i < ARRAY_SIZE (bdesc_pcmpistr);
22961 i++, d++)
22962 {
22963 if (d->code == IX86_BUILTIN_PCMPISTRM128)
22964 ftype = v16qi_ftype_v16qi_v16qi_int;
22965 else
22966 ftype = int_ftype_v16qi_v16qi_int;
22967 def_builtin_const (d->mask, d->name, ftype, d->code);
22968 }
22969
22970 /* comi/ucomi insns. */
22971 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
22972 if (d->mask == OPTION_MASK_ISA_SSE2)
22973 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
22974 else
22975 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
22976
22977 /* SSE */
22978 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
22979 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
22980
22981 /* SSE or 3DNow!A */
22982 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
22983
22984 /* SSE2 */
22985 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
22986
22987 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
22988 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
22989
22990 /* SSE3. */
22991 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
22992 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
22993
22994 /* AES */
22995 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
22996 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
22997 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
22998 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
22999 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23000 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23001
23002 /* PCLMUL */
23003 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23004
23005 /* AVX */
23006 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23007 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23008
23009 /* Access to the vec_init patterns. */
23010 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23011 integer_type_node, NULL_TREE);
23012 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23013
23014 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23015 short_integer_type_node,
23016 short_integer_type_node,
23017 short_integer_type_node, NULL_TREE);
23018 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23019
23020 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23021 char_type_node, char_type_node,
23022 char_type_node, char_type_node,
23023 char_type_node, char_type_node,
23024 char_type_node, NULL_TREE);
23025 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23026
23027 /* Access to the vec_extract patterns. */
23028 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23029 integer_type_node, NULL_TREE);
23030 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23031
23032 ftype = build_function_type_list (long_long_integer_type_node,
23033 V2DI_type_node, integer_type_node,
23034 NULL_TREE);
23035 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23036
23037 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23038 integer_type_node, NULL_TREE);
23039 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23040
23041 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23042 integer_type_node, NULL_TREE);
23043 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23044
23045 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23046 integer_type_node, NULL_TREE);
23047 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23048
23049 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23050 integer_type_node, NULL_TREE);
23051 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23052
23053 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23054 integer_type_node, NULL_TREE);
23055 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23056
23057 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23058 integer_type_node, NULL_TREE);
23059 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23060
23061 /* Access to the vec_set patterns. */
23062 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23063 intDI_type_node,
23064 integer_type_node, NULL_TREE);
23065 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23066
23067 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23068 float_type_node,
23069 integer_type_node, NULL_TREE);
23070 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23071
23072 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23073 intSI_type_node,
23074 integer_type_node, NULL_TREE);
23075 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23076
23077 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23078 intHI_type_node,
23079 integer_type_node, NULL_TREE);
23080 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23081
23082 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23083 intHI_type_node,
23084 integer_type_node, NULL_TREE);
23085 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23086
23087 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23088 intQI_type_node,
23089 integer_type_node, NULL_TREE);
23090 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23091
23092 /* Add SSE5 multi-arg argument instructions */
23093 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23094 {
23095 tree mtype = NULL_TREE;
23096
23097 if (d->name == 0)
23098 continue;
23099
23100 switch ((enum multi_arg_type)d->flag)
23101 {
23102 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23103 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23104 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23105 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23106 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23107 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23108 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23109 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23110 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23111 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23112 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23113 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23114 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23115 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23116 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23117 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23118 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23119 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23120 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23121 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23122 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23123 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23124 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23125 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23126 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23127 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23128 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23129 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23130 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23131 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23132 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23133 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23134 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23135 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23136 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23137 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23138 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23139 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23140 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23141 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23142 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23143 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23144 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23145 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23146 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23147 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23148 case MULTI_ARG_UNKNOWN:
23149 default:
23150 gcc_unreachable ();
23151 }
23152
23153 if (mtype)
23154 def_builtin_const (d->mask, d->name, mtype, d->code);
23155 }
23156 }
23157
23158 /* Internal method for ix86_init_builtins. */
23159
23160 static void
23161 ix86_init_builtins_va_builtins_abi (void)
23162 {
23163 tree ms_va_ref, sysv_va_ref;
23164 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23165 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23166 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23167 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23168
23169 if (!TARGET_64BIT)
23170 return;
23171 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23172 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23173 ms_va_ref = build_reference_type (ms_va_list_type_node);
23174 sysv_va_ref =
23175 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23176
23177 fnvoid_va_end_ms =
23178 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23179 fnvoid_va_start_ms =
23180 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23181 fnvoid_va_end_sysv =
23182 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23183 fnvoid_va_start_sysv =
23184 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23185 NULL_TREE);
23186 fnvoid_va_copy_ms =
23187 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23188 NULL_TREE);
23189 fnvoid_va_copy_sysv =
23190 build_function_type_list (void_type_node, sysv_va_ref,
23191 sysv_va_ref, NULL_TREE);
23192
23193 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23194 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23195 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23196 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23197 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23198 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23199 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23200 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23201 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23202 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23203 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23204 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23205 }
23206
23207 static void
23208 ix86_init_builtins (void)
23209 {
23210 tree float128_type_node = make_node (REAL_TYPE);
23211 tree ftype, decl;
23212
23213 /* The __float80 type. */
23214 if (TYPE_MODE (long_double_type_node) == XFmode)
23215 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23216 "__float80");
23217 else
23218 {
23219 /* The __float80 type. */
23220 tree float80_type_node = make_node (REAL_TYPE);
23221
23222 TYPE_PRECISION (float80_type_node) = 80;
23223 layout_type (float80_type_node);
23224 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23225 "__float80");
23226 }
23227
23228 /* The __float128 type. */
23229 TYPE_PRECISION (float128_type_node) = 128;
23230 layout_type (float128_type_node);
23231 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23232 "__float128");
23233
23234 /* TFmode support builtins. */
23235 ftype = build_function_type (float128_type_node, void_list_node);
23236 decl = add_builtin_function ("__builtin_infq", ftype,
23237 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23238 NULL, NULL_TREE);
23239 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23240
23241 /* We will expand them to normal call if SSE2 isn't available since
23242 they are used by libgcc. */
23243 ftype = build_function_type_list (float128_type_node,
23244 float128_type_node,
23245 NULL_TREE);
23246 decl = add_builtin_function ("__builtin_fabsq", ftype,
23247 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23248 "__fabstf2", NULL_TREE);
23249 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23250 TREE_READONLY (decl) = 1;
23251
23252 ftype = build_function_type_list (float128_type_node,
23253 float128_type_node,
23254 float128_type_node,
23255 NULL_TREE);
23256 decl = add_builtin_function ("__builtin_copysignq", ftype,
23257 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23258 "__copysigntf3", NULL_TREE);
23259 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23260 TREE_READONLY (decl) = 1;
23261
23262 ix86_init_mmx_sse_builtins ();
23263 if (TARGET_64BIT)
23264 ix86_init_builtins_va_builtins_abi ();
23265 }
23266
23267 /* Errors in the source file can cause expand_expr to return const0_rtx
23268 where we expect a vector. To avoid crashing, use one of the vector
23269 clear instructions. */
23270 static rtx
23271 safe_vector_operand (rtx x, enum machine_mode mode)
23272 {
23273 if (x == const0_rtx)
23274 x = CONST0_RTX (mode);
23275 return x;
23276 }
23277
23278 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23279
23280 static rtx
23281 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23282 {
23283 rtx pat;
23284 tree arg0 = CALL_EXPR_ARG (exp, 0);
23285 tree arg1 = CALL_EXPR_ARG (exp, 1);
23286 rtx op0 = expand_normal (arg0);
23287 rtx op1 = expand_normal (arg1);
23288 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23289 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23290 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23291
23292 if (VECTOR_MODE_P (mode0))
23293 op0 = safe_vector_operand (op0, mode0);
23294 if (VECTOR_MODE_P (mode1))
23295 op1 = safe_vector_operand (op1, mode1);
23296
23297 if (optimize || !target
23298 || GET_MODE (target) != tmode
23299 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23300 target = gen_reg_rtx (tmode);
23301
23302 if (GET_MODE (op1) == SImode && mode1 == TImode)
23303 {
23304 rtx x = gen_reg_rtx (V4SImode);
23305 emit_insn (gen_sse2_loadd (x, op1));
23306 op1 = gen_lowpart (TImode, x);
23307 }
23308
23309 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23310 op0 = copy_to_mode_reg (mode0, op0);
23311 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23312 op1 = copy_to_mode_reg (mode1, op1);
23313
23314 pat = GEN_FCN (icode) (target, op0, op1);
23315 if (! pat)
23316 return 0;
23317
23318 emit_insn (pat);
23319
23320 return target;
23321 }
23322
23323 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23324
23325 static rtx
23326 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23327 enum multi_arg_type m_type,
23328 enum insn_code sub_code)
23329 {
23330 rtx pat;
23331 int i;
23332 int nargs;
23333 bool comparison_p = false;
23334 bool tf_p = false;
23335 bool last_arg_constant = false;
23336 int num_memory = 0;
23337 struct {
23338 rtx op;
23339 enum machine_mode mode;
23340 } args[4];
23341
23342 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23343
23344 switch (m_type)
23345 {
23346 case MULTI_ARG_3_SF:
23347 case MULTI_ARG_3_DF:
23348 case MULTI_ARG_3_DI:
23349 case MULTI_ARG_3_SI:
23350 case MULTI_ARG_3_SI_DI:
23351 case MULTI_ARG_3_HI:
23352 case MULTI_ARG_3_HI_SI:
23353 case MULTI_ARG_3_QI:
23354 case MULTI_ARG_3_PERMPS:
23355 case MULTI_ARG_3_PERMPD:
23356 nargs = 3;
23357 break;
23358
23359 case MULTI_ARG_2_SF:
23360 case MULTI_ARG_2_DF:
23361 case MULTI_ARG_2_DI:
23362 case MULTI_ARG_2_SI:
23363 case MULTI_ARG_2_HI:
23364 case MULTI_ARG_2_QI:
23365 nargs = 2;
23366 break;
23367
23368 case MULTI_ARG_2_DI_IMM:
23369 case MULTI_ARG_2_SI_IMM:
23370 case MULTI_ARG_2_HI_IMM:
23371 case MULTI_ARG_2_QI_IMM:
23372 nargs = 2;
23373 last_arg_constant = true;
23374 break;
23375
23376 case MULTI_ARG_1_SF:
23377 case MULTI_ARG_1_DF:
23378 case MULTI_ARG_1_DI:
23379 case MULTI_ARG_1_SI:
23380 case MULTI_ARG_1_HI:
23381 case MULTI_ARG_1_QI:
23382 case MULTI_ARG_1_SI_DI:
23383 case MULTI_ARG_1_HI_DI:
23384 case MULTI_ARG_1_HI_SI:
23385 case MULTI_ARG_1_QI_DI:
23386 case MULTI_ARG_1_QI_SI:
23387 case MULTI_ARG_1_QI_HI:
23388 case MULTI_ARG_1_PH2PS:
23389 case MULTI_ARG_1_PS2PH:
23390 nargs = 1;
23391 break;
23392
23393 case MULTI_ARG_2_SF_CMP:
23394 case MULTI_ARG_2_DF_CMP:
23395 case MULTI_ARG_2_DI_CMP:
23396 case MULTI_ARG_2_SI_CMP:
23397 case MULTI_ARG_2_HI_CMP:
23398 case MULTI_ARG_2_QI_CMP:
23399 nargs = 2;
23400 comparison_p = true;
23401 break;
23402
23403 case MULTI_ARG_2_SF_TF:
23404 case MULTI_ARG_2_DF_TF:
23405 case MULTI_ARG_2_DI_TF:
23406 case MULTI_ARG_2_SI_TF:
23407 case MULTI_ARG_2_HI_TF:
23408 case MULTI_ARG_2_QI_TF:
23409 nargs = 2;
23410 tf_p = true;
23411 break;
23412
23413 case MULTI_ARG_UNKNOWN:
23414 default:
23415 gcc_unreachable ();
23416 }
23417
23418 if (optimize || !target
23419 || GET_MODE (target) != tmode
23420 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23421 target = gen_reg_rtx (tmode);
23422
23423 gcc_assert (nargs <= 4);
23424
23425 for (i = 0; i < nargs; i++)
23426 {
23427 tree arg = CALL_EXPR_ARG (exp, i);
23428 rtx op = expand_normal (arg);
23429 int adjust = (comparison_p) ? 1 : 0;
23430 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23431
23432 if (last_arg_constant && i == nargs-1)
23433 {
23434 if (GET_CODE (op) != CONST_INT)
23435 {
23436 error ("last argument must be an immediate");
23437 return gen_reg_rtx (tmode);
23438 }
23439 }
23440 else
23441 {
23442 if (VECTOR_MODE_P (mode))
23443 op = safe_vector_operand (op, mode);
23444
23445 /* If we aren't optimizing, only allow one memory operand to be
23446 generated. */
23447 if (memory_operand (op, mode))
23448 num_memory++;
23449
23450 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23451
23452 if (optimize
23453 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
23454 || num_memory > 1)
23455 op = force_reg (mode, op);
23456 }
23457
23458 args[i].op = op;
23459 args[i].mode = mode;
23460 }
23461
23462 switch (nargs)
23463 {
23464 case 1:
23465 pat = GEN_FCN (icode) (target, args[0].op);
23466 break;
23467
23468 case 2:
23469 if (tf_p)
23470 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23471 GEN_INT ((int)sub_code));
23472 else if (! comparison_p)
23473 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23474 else
23475 {
23476 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23477 args[0].op,
23478 args[1].op);
23479
23480 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23481 }
23482 break;
23483
23484 case 3:
23485 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23486 break;
23487
23488 default:
23489 gcc_unreachable ();
23490 }
23491
23492 if (! pat)
23493 return 0;
23494
23495 emit_insn (pat);
23496 return target;
23497 }
23498
23499 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23500 insns with vec_merge. */
23501
23502 static rtx
23503 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23504 rtx target)
23505 {
23506 rtx pat;
23507 tree arg0 = CALL_EXPR_ARG (exp, 0);
23508 rtx op1, op0 = expand_normal (arg0);
23509 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23510 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23511
23512 if (optimize || !target
23513 || GET_MODE (target) != tmode
23514 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23515 target = gen_reg_rtx (tmode);
23516
23517 if (VECTOR_MODE_P (mode0))
23518 op0 = safe_vector_operand (op0, mode0);
23519
23520 if ((optimize && !register_operand (op0, mode0))
23521 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23522 op0 = copy_to_mode_reg (mode0, op0);
23523
23524 op1 = op0;
23525 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
23526 op1 = copy_to_mode_reg (mode0, op1);
23527
23528 pat = GEN_FCN (icode) (target, op0, op1);
23529 if (! pat)
23530 return 0;
23531 emit_insn (pat);
23532 return target;
23533 }
23534
23535 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23536
23537 static rtx
23538 ix86_expand_sse_compare (const struct builtin_description *d,
23539 tree exp, rtx target, bool swap)
23540 {
23541 rtx pat;
23542 tree arg0 = CALL_EXPR_ARG (exp, 0);
23543 tree arg1 = CALL_EXPR_ARG (exp, 1);
23544 rtx op0 = expand_normal (arg0);
23545 rtx op1 = expand_normal (arg1);
23546 rtx op2;
23547 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23548 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23549 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23550 enum rtx_code comparison = d->comparison;
23551
23552 if (VECTOR_MODE_P (mode0))
23553 op0 = safe_vector_operand (op0, mode0);
23554 if (VECTOR_MODE_P (mode1))
23555 op1 = safe_vector_operand (op1, mode1);
23556
23557 /* Swap operands if we have a comparison that isn't available in
23558 hardware. */
23559 if (swap)
23560 {
23561 rtx tmp = gen_reg_rtx (mode1);
23562 emit_move_insn (tmp, op1);
23563 op1 = op0;
23564 op0 = tmp;
23565 }
23566
23567 if (optimize || !target
23568 || GET_MODE (target) != tmode
23569 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
23570 target = gen_reg_rtx (tmode);
23571
23572 if ((optimize && !register_operand (op0, mode0))
23573 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
23574 op0 = copy_to_mode_reg (mode0, op0);
23575 if ((optimize && !register_operand (op1, mode1))
23576 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
23577 op1 = copy_to_mode_reg (mode1, op1);
23578
23579 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23580 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23581 if (! pat)
23582 return 0;
23583 emit_insn (pat);
23584 return target;
23585 }
23586
23587 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
23588
23589 static rtx
23590 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23591 rtx target)
23592 {
23593 rtx pat;
23594 tree arg0 = CALL_EXPR_ARG (exp, 0);
23595 tree arg1 = CALL_EXPR_ARG (exp, 1);
23596 rtx op0 = expand_normal (arg0);
23597 rtx op1 = expand_normal (arg1);
23598 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23599 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23600 enum rtx_code comparison = d->comparison;
23601
23602 if (VECTOR_MODE_P (mode0))
23603 op0 = safe_vector_operand (op0, mode0);
23604 if (VECTOR_MODE_P (mode1))
23605 op1 = safe_vector_operand (op1, mode1);
23606
23607 /* Swap operands if we have a comparison that isn't available in
23608 hardware. */
23609 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
23610 {
23611 rtx tmp = op1;
23612 op1 = op0;
23613 op0 = tmp;
23614 }
23615
23616 target = gen_reg_rtx (SImode);
23617 emit_move_insn (target, const0_rtx);
23618 target = gen_rtx_SUBREG (QImode, target, 0);
23619
23620 if ((optimize && !register_operand (op0, mode0))
23621 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23622 op0 = copy_to_mode_reg (mode0, op0);
23623 if ((optimize && !register_operand (op1, mode1))
23624 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23625 op1 = copy_to_mode_reg (mode1, op1);
23626
23627 pat = GEN_FCN (d->icode) (op0, op1);
23628 if (! pat)
23629 return 0;
23630 emit_insn (pat);
23631 emit_insn (gen_rtx_SET (VOIDmode,
23632 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23633 gen_rtx_fmt_ee (comparison, QImode,
23634 SET_DEST (pat),
23635 const0_rtx)));
23636
23637 return SUBREG_REG (target);
23638 }
23639
23640 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
23641
23642 static rtx
23643 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
23644 rtx target)
23645 {
23646 rtx pat;
23647 tree arg0 = CALL_EXPR_ARG (exp, 0);
23648 tree arg1 = CALL_EXPR_ARG (exp, 1);
23649 rtx op0 = expand_normal (arg0);
23650 rtx op1 = expand_normal (arg1);
23651 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23652 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23653 enum rtx_code comparison = d->comparison;
23654
23655 if (VECTOR_MODE_P (mode0))
23656 op0 = safe_vector_operand (op0, mode0);
23657 if (VECTOR_MODE_P (mode1))
23658 op1 = safe_vector_operand (op1, mode1);
23659
23660 target = gen_reg_rtx (SImode);
23661 emit_move_insn (target, const0_rtx);
23662 target = gen_rtx_SUBREG (QImode, target, 0);
23663
23664 if ((optimize && !register_operand (op0, mode0))
23665 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23666 op0 = copy_to_mode_reg (mode0, op0);
23667 if ((optimize && !register_operand (op1, mode1))
23668 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23669 op1 = copy_to_mode_reg (mode1, op1);
23670
23671 pat = GEN_FCN (d->icode) (op0, op1);
23672 if (! pat)
23673 return 0;
23674 emit_insn (pat);
23675 emit_insn (gen_rtx_SET (VOIDmode,
23676 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23677 gen_rtx_fmt_ee (comparison, QImode,
23678 SET_DEST (pat),
23679 const0_rtx)));
23680
23681 return SUBREG_REG (target);
23682 }
23683
23684 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
23685
23686 static rtx
23687 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
23688 tree exp, rtx target)
23689 {
23690 rtx pat;
23691 tree arg0 = CALL_EXPR_ARG (exp, 0);
23692 tree arg1 = CALL_EXPR_ARG (exp, 1);
23693 tree arg2 = CALL_EXPR_ARG (exp, 2);
23694 tree arg3 = CALL_EXPR_ARG (exp, 3);
23695 tree arg4 = CALL_EXPR_ARG (exp, 4);
23696 rtx scratch0, scratch1;
23697 rtx op0 = expand_normal (arg0);
23698 rtx op1 = expand_normal (arg1);
23699 rtx op2 = expand_normal (arg2);
23700 rtx op3 = expand_normal (arg3);
23701 rtx op4 = expand_normal (arg4);
23702 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
23703
23704 tmode0 = insn_data[d->icode].operand[0].mode;
23705 tmode1 = insn_data[d->icode].operand[1].mode;
23706 modev2 = insn_data[d->icode].operand[2].mode;
23707 modei3 = insn_data[d->icode].operand[3].mode;
23708 modev4 = insn_data[d->icode].operand[4].mode;
23709 modei5 = insn_data[d->icode].operand[5].mode;
23710 modeimm = insn_data[d->icode].operand[6].mode;
23711
23712 if (VECTOR_MODE_P (modev2))
23713 op0 = safe_vector_operand (op0, modev2);
23714 if (VECTOR_MODE_P (modev4))
23715 op2 = safe_vector_operand (op2, modev4);
23716
23717 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23718 op0 = copy_to_mode_reg (modev2, op0);
23719 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
23720 op1 = copy_to_mode_reg (modei3, op1);
23721 if ((optimize && !register_operand (op2, modev4))
23722 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
23723 op2 = copy_to_mode_reg (modev4, op2);
23724 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
23725 op3 = copy_to_mode_reg (modei5, op3);
23726
23727 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
23728 {
23729 error ("the fifth argument must be a 8-bit immediate");
23730 return const0_rtx;
23731 }
23732
23733 if (d->code == IX86_BUILTIN_PCMPESTRI128)
23734 {
23735 if (optimize || !target
23736 || GET_MODE (target) != tmode0
23737 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23738 target = gen_reg_rtx (tmode0);
23739
23740 scratch1 = gen_reg_rtx (tmode1);
23741
23742 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
23743 }
23744 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
23745 {
23746 if (optimize || !target
23747 || GET_MODE (target) != tmode1
23748 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23749 target = gen_reg_rtx (tmode1);
23750
23751 scratch0 = gen_reg_rtx (tmode0);
23752
23753 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
23754 }
23755 else
23756 {
23757 gcc_assert (d->flag);
23758
23759 scratch0 = gen_reg_rtx (tmode0);
23760 scratch1 = gen_reg_rtx (tmode1);
23761
23762 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
23763 }
23764
23765 if (! pat)
23766 return 0;
23767
23768 emit_insn (pat);
23769
23770 if (d->flag)
23771 {
23772 target = gen_reg_rtx (SImode);
23773 emit_move_insn (target, const0_rtx);
23774 target = gen_rtx_SUBREG (QImode, target, 0);
23775
23776 emit_insn
23777 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23778 gen_rtx_fmt_ee (EQ, QImode,
23779 gen_rtx_REG ((enum machine_mode) d->flag,
23780 FLAGS_REG),
23781 const0_rtx)));
23782 return SUBREG_REG (target);
23783 }
23784 else
23785 return target;
23786 }
23787
23788
23789 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
23790
23791 static rtx
23792 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
23793 tree exp, rtx target)
23794 {
23795 rtx pat;
23796 tree arg0 = CALL_EXPR_ARG (exp, 0);
23797 tree arg1 = CALL_EXPR_ARG (exp, 1);
23798 tree arg2 = CALL_EXPR_ARG (exp, 2);
23799 rtx scratch0, scratch1;
23800 rtx op0 = expand_normal (arg0);
23801 rtx op1 = expand_normal (arg1);
23802 rtx op2 = expand_normal (arg2);
23803 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
23804
23805 tmode0 = insn_data[d->icode].operand[0].mode;
23806 tmode1 = insn_data[d->icode].operand[1].mode;
23807 modev2 = insn_data[d->icode].operand[2].mode;
23808 modev3 = insn_data[d->icode].operand[3].mode;
23809 modeimm = insn_data[d->icode].operand[4].mode;
23810
23811 if (VECTOR_MODE_P (modev2))
23812 op0 = safe_vector_operand (op0, modev2);
23813 if (VECTOR_MODE_P (modev3))
23814 op1 = safe_vector_operand (op1, modev3);
23815
23816 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23817 op0 = copy_to_mode_reg (modev2, op0);
23818 if ((optimize && !register_operand (op1, modev3))
23819 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
23820 op1 = copy_to_mode_reg (modev3, op1);
23821
23822 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
23823 {
23824 error ("the third argument must be a 8-bit immediate");
23825 return const0_rtx;
23826 }
23827
23828 if (d->code == IX86_BUILTIN_PCMPISTRI128)
23829 {
23830 if (optimize || !target
23831 || GET_MODE (target) != tmode0
23832 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23833 target = gen_reg_rtx (tmode0);
23834
23835 scratch1 = gen_reg_rtx (tmode1);
23836
23837 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
23838 }
23839 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
23840 {
23841 if (optimize || !target
23842 || GET_MODE (target) != tmode1
23843 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23844 target = gen_reg_rtx (tmode1);
23845
23846 scratch0 = gen_reg_rtx (tmode0);
23847
23848 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
23849 }
23850 else
23851 {
23852 gcc_assert (d->flag);
23853
23854 scratch0 = gen_reg_rtx (tmode0);
23855 scratch1 = gen_reg_rtx (tmode1);
23856
23857 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
23858 }
23859
23860 if (! pat)
23861 return 0;
23862
23863 emit_insn (pat);
23864
23865 if (d->flag)
23866 {
23867 target = gen_reg_rtx (SImode);
23868 emit_move_insn (target, const0_rtx);
23869 target = gen_rtx_SUBREG (QImode, target, 0);
23870
23871 emit_insn
23872 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23873 gen_rtx_fmt_ee (EQ, QImode,
23874 gen_rtx_REG ((enum machine_mode) d->flag,
23875 FLAGS_REG),
23876 const0_rtx)));
23877 return SUBREG_REG (target);
23878 }
23879 else
23880 return target;
23881 }
23882
23883 /* Subroutine of ix86_expand_builtin to take care of insns with
23884 variable number of operands. */
23885
23886 static rtx
23887 ix86_expand_args_builtin (const struct builtin_description *d,
23888 tree exp, rtx target)
23889 {
23890 rtx pat, real_target;
23891 unsigned int i, nargs;
23892 unsigned int nargs_constant = 0;
23893 int num_memory = 0;
23894 struct
23895 {
23896 rtx op;
23897 enum machine_mode mode;
23898 } args[4];
23899 bool last_arg_count = false;
23900 enum insn_code icode = d->icode;
23901 const struct insn_data *insn_p = &insn_data[icode];
23902 enum machine_mode tmode = insn_p->operand[0].mode;
23903 enum machine_mode rmode = VOIDmode;
23904 bool swap = false;
23905 enum rtx_code comparison = d->comparison;
23906
23907 switch ((enum ix86_builtin_type) d->flag)
23908 {
23909 case INT_FTYPE_V8SF_V8SF_PTEST:
23910 case INT_FTYPE_V4DI_V4DI_PTEST:
23911 case INT_FTYPE_V4DF_V4DF_PTEST:
23912 case INT_FTYPE_V4SF_V4SF_PTEST:
23913 case INT_FTYPE_V2DI_V2DI_PTEST:
23914 case INT_FTYPE_V2DF_V2DF_PTEST:
23915 return ix86_expand_sse_ptest (d, exp, target);
23916 case FLOAT128_FTYPE_FLOAT128:
23917 case FLOAT_FTYPE_FLOAT:
23918 case INT64_FTYPE_V4SF:
23919 case INT64_FTYPE_V2DF:
23920 case INT_FTYPE_V16QI:
23921 case INT_FTYPE_V8QI:
23922 case INT_FTYPE_V8SF:
23923 case INT_FTYPE_V4DF:
23924 case INT_FTYPE_V4SF:
23925 case INT_FTYPE_V2DF:
23926 case V16QI_FTYPE_V16QI:
23927 case V8SI_FTYPE_V8SF:
23928 case V8SI_FTYPE_V4SI:
23929 case V8HI_FTYPE_V8HI:
23930 case V8HI_FTYPE_V16QI:
23931 case V8QI_FTYPE_V8QI:
23932 case V8SF_FTYPE_V8SF:
23933 case V8SF_FTYPE_V8SI:
23934 case V8SF_FTYPE_V4SF:
23935 case V4SI_FTYPE_V4SI:
23936 case V4SI_FTYPE_V16QI:
23937 case V4SI_FTYPE_V4SF:
23938 case V4SI_FTYPE_V8SI:
23939 case V4SI_FTYPE_V8HI:
23940 case V4SI_FTYPE_V4DF:
23941 case V4SI_FTYPE_V2DF:
23942 case V4HI_FTYPE_V4HI:
23943 case V4DF_FTYPE_V4DF:
23944 case V4DF_FTYPE_V4SI:
23945 case V4DF_FTYPE_V4SF:
23946 case V4DF_FTYPE_V2DF:
23947 case V4SF_FTYPE_V4SF:
23948 case V4SF_FTYPE_V4SI:
23949 case V4SF_FTYPE_V8SF:
23950 case V4SF_FTYPE_V4DF:
23951 case V4SF_FTYPE_V2DF:
23952 case V2DI_FTYPE_V2DI:
23953 case V2DI_FTYPE_V16QI:
23954 case V2DI_FTYPE_V8HI:
23955 case V2DI_FTYPE_V4SI:
23956 case V2DF_FTYPE_V2DF:
23957 case V2DF_FTYPE_V4SI:
23958 case V2DF_FTYPE_V4DF:
23959 case V2DF_FTYPE_V4SF:
23960 case V2DF_FTYPE_V2SI:
23961 case V2SI_FTYPE_V2SI:
23962 case V2SI_FTYPE_V4SF:
23963 case V2SI_FTYPE_V2SF:
23964 case V2SI_FTYPE_V2DF:
23965 case V2SF_FTYPE_V2SF:
23966 case V2SF_FTYPE_V2SI:
23967 nargs = 1;
23968 break;
23969 case V4SF_FTYPE_V4SF_VEC_MERGE:
23970 case V2DF_FTYPE_V2DF_VEC_MERGE:
23971 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
23972 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
23973 case V16QI_FTYPE_V16QI_V16QI:
23974 case V16QI_FTYPE_V8HI_V8HI:
23975 case V8QI_FTYPE_V8QI_V8QI:
23976 case V8QI_FTYPE_V4HI_V4HI:
23977 case V8HI_FTYPE_V8HI_V8HI:
23978 case V8HI_FTYPE_V16QI_V16QI:
23979 case V8HI_FTYPE_V4SI_V4SI:
23980 case V8SF_FTYPE_V8SF_V8SF:
23981 case V8SF_FTYPE_V8SF_V8SI:
23982 case V4SI_FTYPE_V4SI_V4SI:
23983 case V4SI_FTYPE_V8HI_V8HI:
23984 case V4SI_FTYPE_V4SF_V4SF:
23985 case V4SI_FTYPE_V2DF_V2DF:
23986 case V4HI_FTYPE_V4HI_V4HI:
23987 case V4HI_FTYPE_V8QI_V8QI:
23988 case V4HI_FTYPE_V2SI_V2SI:
23989 case V4DF_FTYPE_V4DF_V4DF:
23990 case V4DF_FTYPE_V4DF_V4DI:
23991 case V4SF_FTYPE_V4SF_V4SF:
23992 case V4SF_FTYPE_V4SF_V4SI:
23993 case V4SF_FTYPE_V4SF_V2SI:
23994 case V4SF_FTYPE_V4SF_V2DF:
23995 case V4SF_FTYPE_V4SF_DI:
23996 case V4SF_FTYPE_V4SF_SI:
23997 case V2DI_FTYPE_V2DI_V2DI:
23998 case V2DI_FTYPE_V16QI_V16QI:
23999 case V2DI_FTYPE_V4SI_V4SI:
24000 case V2DI_FTYPE_V2DI_V16QI:
24001 case V2DI_FTYPE_V2DF_V2DF:
24002 case V2SI_FTYPE_V2SI_V2SI:
24003 case V2SI_FTYPE_V4HI_V4HI:
24004 case V2SI_FTYPE_V2SF_V2SF:
24005 case V2DF_FTYPE_V2DF_V2DF:
24006 case V2DF_FTYPE_V2DF_V4SF:
24007 case V2DF_FTYPE_V2DF_V2DI:
24008 case V2DF_FTYPE_V2DF_DI:
24009 case V2DF_FTYPE_V2DF_SI:
24010 case V2SF_FTYPE_V2SF_V2SF:
24011 case V1DI_FTYPE_V1DI_V1DI:
24012 case V1DI_FTYPE_V8QI_V8QI:
24013 case V1DI_FTYPE_V2SI_V2SI:
24014 if (comparison == UNKNOWN)
24015 return ix86_expand_binop_builtin (icode, exp, target);
24016 nargs = 2;
24017 break;
24018 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24019 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24020 gcc_assert (comparison != UNKNOWN);
24021 nargs = 2;
24022 swap = true;
24023 break;
24024 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24025 case V8HI_FTYPE_V8HI_SI_COUNT:
24026 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24027 case V4SI_FTYPE_V4SI_SI_COUNT:
24028 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24029 case V4HI_FTYPE_V4HI_SI_COUNT:
24030 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24031 case V2DI_FTYPE_V2DI_SI_COUNT:
24032 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24033 case V2SI_FTYPE_V2SI_SI_COUNT:
24034 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24035 case V1DI_FTYPE_V1DI_SI_COUNT:
24036 nargs = 2;
24037 last_arg_count = true;
24038 break;
24039 case UINT64_FTYPE_UINT64_UINT64:
24040 case UINT_FTYPE_UINT_UINT:
24041 case UINT_FTYPE_UINT_USHORT:
24042 case UINT_FTYPE_UINT_UCHAR:
24043 nargs = 2;
24044 break;
24045 case V2DI2TI_FTYPE_V2DI_INT:
24046 nargs = 2;
24047 rmode = V2DImode;
24048 nargs_constant = 1;
24049 break;
24050 case V8HI_FTYPE_V8HI_INT:
24051 case V8SF_FTYPE_V8SF_INT:
24052 case V4SI_FTYPE_V4SI_INT:
24053 case V4SI_FTYPE_V8SI_INT:
24054 case V4HI_FTYPE_V4HI_INT:
24055 case V4DF_FTYPE_V4DF_INT:
24056 case V4SF_FTYPE_V4SF_INT:
24057 case V4SF_FTYPE_V8SF_INT:
24058 case V2DI_FTYPE_V2DI_INT:
24059 case V2DF_FTYPE_V2DF_INT:
24060 case V2DF_FTYPE_V4DF_INT:
24061 nargs = 2;
24062 nargs_constant = 1;
24063 break;
24064 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24065 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24066 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24067 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24068 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24069 nargs = 3;
24070 break;
24071 case V16QI_FTYPE_V16QI_V16QI_INT:
24072 case V8HI_FTYPE_V8HI_V8HI_INT:
24073 case V8SI_FTYPE_V8SI_V8SI_INT:
24074 case V8SI_FTYPE_V8SI_V4SI_INT:
24075 case V8SF_FTYPE_V8SF_V8SF_INT:
24076 case V8SF_FTYPE_V8SF_V4SF_INT:
24077 case V4SI_FTYPE_V4SI_V4SI_INT:
24078 case V4DF_FTYPE_V4DF_V4DF_INT:
24079 case V4DF_FTYPE_V4DF_V2DF_INT:
24080 case V4SF_FTYPE_V4SF_V4SF_INT:
24081 case V2DI_FTYPE_V2DI_V2DI_INT:
24082 case V2DF_FTYPE_V2DF_V2DF_INT:
24083 nargs = 3;
24084 nargs_constant = 1;
24085 break;
24086 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24087 nargs = 3;
24088 rmode = V2DImode;
24089 nargs_constant = 1;
24090 break;
24091 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24092 nargs = 3;
24093 rmode = DImode;
24094 nargs_constant = 1;
24095 break;
24096 case V2DI_FTYPE_V2DI_UINT_UINT:
24097 nargs = 3;
24098 nargs_constant = 2;
24099 break;
24100 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24101 nargs = 4;
24102 nargs_constant = 2;
24103 break;
24104 default:
24105 gcc_unreachable ();
24106 }
24107
24108 gcc_assert (nargs <= ARRAY_SIZE (args));
24109
24110 if (comparison != UNKNOWN)
24111 {
24112 gcc_assert (nargs == 2);
24113 return ix86_expand_sse_compare (d, exp, target, swap);
24114 }
24115
24116 if (rmode == VOIDmode || rmode == tmode)
24117 {
24118 if (optimize
24119 || target == 0
24120 || GET_MODE (target) != tmode
24121 || ! (*insn_p->operand[0].predicate) (target, tmode))
24122 target = gen_reg_rtx (tmode);
24123 real_target = target;
24124 }
24125 else
24126 {
24127 target = gen_reg_rtx (rmode);
24128 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24129 }
24130
24131 for (i = 0; i < nargs; i++)
24132 {
24133 tree arg = CALL_EXPR_ARG (exp, i);
24134 rtx op = expand_normal (arg);
24135 enum machine_mode mode = insn_p->operand[i + 1].mode;
24136 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24137
24138 if (last_arg_count && (i + 1) == nargs)
24139 {
24140 /* SIMD shift insns take either an 8-bit immediate or
24141 register as count. But builtin functions take int as
24142 count. If count doesn't match, we put it in register. */
24143 if (!match)
24144 {
24145 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24146 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24147 op = copy_to_reg (op);
24148 }
24149 }
24150 else if ((nargs - i) <= nargs_constant)
24151 {
24152 if (!match)
24153 switch (icode)
24154 {
24155 case CODE_FOR_sse4_1_roundpd:
24156 case CODE_FOR_sse4_1_roundps:
24157 case CODE_FOR_sse4_1_roundsd:
24158 case CODE_FOR_sse4_1_roundss:
24159 case CODE_FOR_sse4_1_blendps:
24160 case CODE_FOR_avx_blendpd256:
24161 case CODE_FOR_avx_vpermilv4df:
24162 case CODE_FOR_avx_roundpd256:
24163 case CODE_FOR_avx_roundps256:
24164 error ("the last argument must be a 4-bit immediate");
24165 return const0_rtx;
24166
24167 case CODE_FOR_sse4_1_blendpd:
24168 case CODE_FOR_avx_vpermilv2df:
24169 error ("the last argument must be a 2-bit immediate");
24170 return const0_rtx;
24171
24172 case CODE_FOR_avx_vextractf128v4df:
24173 case CODE_FOR_avx_vextractf128v8sf:
24174 case CODE_FOR_avx_vextractf128v8si:
24175 case CODE_FOR_avx_vinsertf128v4df:
24176 case CODE_FOR_avx_vinsertf128v8sf:
24177 case CODE_FOR_avx_vinsertf128v8si:
24178 error ("the last argument must be a 1-bit immediate");
24179 return const0_rtx;
24180
24181 case CODE_FOR_avx_cmpsdv2df3:
24182 case CODE_FOR_avx_cmpssv4sf3:
24183 case CODE_FOR_avx_cmppdv2df3:
24184 case CODE_FOR_avx_cmppsv4sf3:
24185 case CODE_FOR_avx_cmppdv4df3:
24186 case CODE_FOR_avx_cmppsv8sf3:
24187 error ("the last argument must be a 5-bit immediate");
24188 return const0_rtx;
24189
24190 default:
24191 switch (nargs_constant)
24192 {
24193 case 2:
24194 if ((nargs - i) == nargs_constant)
24195 {
24196 error ("the next to last argument must be an 8-bit immediate");
24197 break;
24198 }
24199 case 1:
24200 error ("the last argument must be an 8-bit immediate");
24201 break;
24202 default:
24203 gcc_unreachable ();
24204 }
24205 return const0_rtx;
24206 }
24207 }
24208 else
24209 {
24210 if (VECTOR_MODE_P (mode))
24211 op = safe_vector_operand (op, mode);
24212
24213 /* If we aren't optimizing, only allow one memory operand to
24214 be generated. */
24215 if (memory_operand (op, mode))
24216 num_memory++;
24217
24218 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24219 {
24220 if (optimize || !match || num_memory > 1)
24221 op = copy_to_mode_reg (mode, op);
24222 }
24223 else
24224 {
24225 op = copy_to_reg (op);
24226 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24227 }
24228 }
24229
24230 args[i].op = op;
24231 args[i].mode = mode;
24232 }
24233
24234 switch (nargs)
24235 {
24236 case 1:
24237 pat = GEN_FCN (icode) (real_target, args[0].op);
24238 break;
24239 case 2:
24240 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24241 break;
24242 case 3:
24243 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24244 args[2].op);
24245 break;
24246 case 4:
24247 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24248 args[2].op, args[3].op);
24249 break;
24250 default:
24251 gcc_unreachable ();
24252 }
24253
24254 if (! pat)
24255 return 0;
24256
24257 emit_insn (pat);
24258 return target;
24259 }
24260
24261 /* Subroutine of ix86_expand_builtin to take care of special insns
24262 with variable number of operands. */
24263
24264 static rtx
24265 ix86_expand_special_args_builtin (const struct builtin_description *d,
24266 tree exp, rtx target)
24267 {
24268 tree arg;
24269 rtx pat, op;
24270 unsigned int i, nargs, arg_adjust, memory;
24271 struct
24272 {
24273 rtx op;
24274 enum machine_mode mode;
24275 } args[2];
24276 enum insn_code icode = d->icode;
24277 bool last_arg_constant = false;
24278 const struct insn_data *insn_p = &insn_data[icode];
24279 enum machine_mode tmode = insn_p->operand[0].mode;
24280 enum { load, store } klass;
24281
24282 switch ((enum ix86_special_builtin_type) d->flag)
24283 {
24284 case VOID_FTYPE_VOID:
24285 emit_insn (GEN_FCN (icode) (target));
24286 return 0;
24287 case V2DI_FTYPE_PV2DI:
24288 case V32QI_FTYPE_PCCHAR:
24289 case V16QI_FTYPE_PCCHAR:
24290 case V8SF_FTYPE_PCV4SF:
24291 case V8SF_FTYPE_PCFLOAT:
24292 case V4SF_FTYPE_PCFLOAT:
24293 case V4DF_FTYPE_PCV2DF:
24294 case V4DF_FTYPE_PCDOUBLE:
24295 case V2DF_FTYPE_PCDOUBLE:
24296 nargs = 1;
24297 klass = load;
24298 memory = 0;
24299 break;
24300 case VOID_FTYPE_PV2SF_V4SF:
24301 case VOID_FTYPE_PV4DI_V4DI:
24302 case VOID_FTYPE_PV2DI_V2DI:
24303 case VOID_FTYPE_PCHAR_V32QI:
24304 case VOID_FTYPE_PCHAR_V16QI:
24305 case VOID_FTYPE_PFLOAT_V8SF:
24306 case VOID_FTYPE_PFLOAT_V4SF:
24307 case VOID_FTYPE_PDOUBLE_V4DF:
24308 case VOID_FTYPE_PDOUBLE_V2DF:
24309 case VOID_FTYPE_PDI_DI:
24310 case VOID_FTYPE_PINT_INT:
24311 nargs = 1;
24312 klass = store;
24313 /* Reserve memory operand for target. */
24314 memory = ARRAY_SIZE (args);
24315 break;
24316 case V4SF_FTYPE_V4SF_PCV2SF:
24317 case V2DF_FTYPE_V2DF_PCDOUBLE:
24318 nargs = 2;
24319 klass = load;
24320 memory = 1;
24321 break;
24322 case V8SF_FTYPE_PCV8SF_V8SF:
24323 case V4DF_FTYPE_PCV4DF_V4DF:
24324 case V4SF_FTYPE_PCV4SF_V4SF:
24325 case V2DF_FTYPE_PCV2DF_V2DF:
24326 nargs = 2;
24327 klass = load;
24328 memory = 0;
24329 break;
24330 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24331 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24332 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24333 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24334 nargs = 2;
24335 klass = store;
24336 /* Reserve memory operand for target. */
24337 memory = ARRAY_SIZE (args);
24338 break;
24339 default:
24340 gcc_unreachable ();
24341 }
24342
24343 gcc_assert (nargs <= ARRAY_SIZE (args));
24344
24345 if (klass == store)
24346 {
24347 arg = CALL_EXPR_ARG (exp, 0);
24348 op = expand_normal (arg);
24349 gcc_assert (target == 0);
24350 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24351 arg_adjust = 1;
24352 }
24353 else
24354 {
24355 arg_adjust = 0;
24356 if (optimize
24357 || target == 0
24358 || GET_MODE (target) != tmode
24359 || ! (*insn_p->operand[0].predicate) (target, tmode))
24360 target = gen_reg_rtx (tmode);
24361 }
24362
24363 for (i = 0; i < nargs; i++)
24364 {
24365 enum machine_mode mode = insn_p->operand[i + 1].mode;
24366 bool match;
24367
24368 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24369 op = expand_normal (arg);
24370 match = (*insn_p->operand[i + 1].predicate) (op, mode);
24371
24372 if (last_arg_constant && (i + 1) == nargs)
24373 {
24374 if (!match)
24375 switch (icode)
24376 {
24377 default:
24378 error ("the last argument must be an 8-bit immediate");
24379 return const0_rtx;
24380 }
24381 }
24382 else
24383 {
24384 if (i == memory)
24385 {
24386 /* This must be the memory operand. */
24387 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24388 gcc_assert (GET_MODE (op) == mode
24389 || GET_MODE (op) == VOIDmode);
24390 }
24391 else
24392 {
24393 /* This must be register. */
24394 if (VECTOR_MODE_P (mode))
24395 op = safe_vector_operand (op, mode);
24396
24397 gcc_assert (GET_MODE (op) == mode
24398 || GET_MODE (op) == VOIDmode);
24399 op = copy_to_mode_reg (mode, op);
24400 }
24401 }
24402
24403 args[i].op = op;
24404 args[i].mode = mode;
24405 }
24406
24407 switch (nargs)
24408 {
24409 case 1:
24410 pat = GEN_FCN (icode) (target, args[0].op);
24411 break;
24412 case 2:
24413 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24414 break;
24415 default:
24416 gcc_unreachable ();
24417 }
24418
24419 if (! pat)
24420 return 0;
24421 emit_insn (pat);
24422 return klass == store ? 0 : target;
24423 }
24424
24425 /* Return the integer constant in ARG. Constrain it to be in the range
24426 of the subparts of VEC_TYPE; issue an error if not. */
24427
24428 static int
24429 get_element_number (tree vec_type, tree arg)
24430 {
24431 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24432
24433 if (!host_integerp (arg, 1)
24434 || (elt = tree_low_cst (arg, 1), elt > max))
24435 {
24436 error ("selector must be an integer constant in the range 0..%wi", max);
24437 return 0;
24438 }
24439
24440 return elt;
24441 }
24442
24443 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24444 ix86_expand_vector_init. We DO have language-level syntax for this, in
24445 the form of (type){ init-list }. Except that since we can't place emms
24446 instructions from inside the compiler, we can't allow the use of MMX
24447 registers unless the user explicitly asks for it. So we do *not* define
24448 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
24449 we have builtins invoked by mmintrin.h that gives us license to emit
24450 these sorts of instructions. */
24451
24452 static rtx
24453 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24454 {
24455 enum machine_mode tmode = TYPE_MODE (type);
24456 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24457 int i, n_elt = GET_MODE_NUNITS (tmode);
24458 rtvec v = rtvec_alloc (n_elt);
24459
24460 gcc_assert (VECTOR_MODE_P (tmode));
24461 gcc_assert (call_expr_nargs (exp) == n_elt);
24462
24463 for (i = 0; i < n_elt; ++i)
24464 {
24465 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24466 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24467 }
24468
24469 if (!target || !register_operand (target, tmode))
24470 target = gen_reg_rtx (tmode);
24471
24472 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24473 return target;
24474 }
24475
24476 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24477 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
24478 had a language-level syntax for referencing vector elements. */
24479
24480 static rtx
24481 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24482 {
24483 enum machine_mode tmode, mode0;
24484 tree arg0, arg1;
24485 int elt;
24486 rtx op0;
24487
24488 arg0 = CALL_EXPR_ARG (exp, 0);
24489 arg1 = CALL_EXPR_ARG (exp, 1);
24490
24491 op0 = expand_normal (arg0);
24492 elt = get_element_number (TREE_TYPE (arg0), arg1);
24493
24494 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24495 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24496 gcc_assert (VECTOR_MODE_P (mode0));
24497
24498 op0 = force_reg (mode0, op0);
24499
24500 if (optimize || !target || !register_operand (target, tmode))
24501 target = gen_reg_rtx (tmode);
24502
24503 ix86_expand_vector_extract (true, target, op0, elt);
24504
24505 return target;
24506 }
24507
24508 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24509 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24510 a language-level syntax for referencing vector elements. */
24511
24512 static rtx
24513 ix86_expand_vec_set_builtin (tree exp)
24514 {
24515 enum machine_mode tmode, mode1;
24516 tree arg0, arg1, arg2;
24517 int elt;
24518 rtx op0, op1, target;
24519
24520 arg0 = CALL_EXPR_ARG (exp, 0);
24521 arg1 = CALL_EXPR_ARG (exp, 1);
24522 arg2 = CALL_EXPR_ARG (exp, 2);
24523
24524 tmode = TYPE_MODE (TREE_TYPE (arg0));
24525 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24526 gcc_assert (VECTOR_MODE_P (tmode));
24527
24528 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24529 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24530 elt = get_element_number (TREE_TYPE (arg0), arg2);
24531
24532 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24533 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24534
24535 op0 = force_reg (tmode, op0);
24536 op1 = force_reg (mode1, op1);
24537
24538 /* OP0 is the source of these builtin functions and shouldn't be
24539 modified. Create a copy, use it and return it as target. */
24540 target = gen_reg_rtx (tmode);
24541 emit_move_insn (target, op0);
24542 ix86_expand_vector_set (true, target, op1, elt);
24543
24544 return target;
24545 }
24546
24547 /* Expand an expression EXP that calls a built-in function,
24548 with result going to TARGET if that's convenient
24549 (and in mode MODE if that's convenient).
24550 SUBTARGET may be used as the target for computing one of EXP's operands.
24551 IGNORE is nonzero if the value is to be ignored. */
24552
24553 static rtx
24554 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
24555 enum machine_mode mode ATTRIBUTE_UNUSED,
24556 int ignore ATTRIBUTE_UNUSED)
24557 {
24558 const struct builtin_description *d;
24559 size_t i;
24560 enum insn_code icode;
24561 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24562 tree arg0, arg1, arg2;
24563 rtx op0, op1, op2, pat;
24564 enum machine_mode mode0, mode1, mode2;
24565 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24566
24567 /* Determine whether the builtin function is available under the current ISA.
24568 Originally the builtin was not created if it wasn't applicable to the
24569 current ISA based on the command line switches. With function specific
24570 options, we need to check in the context of the function making the call
24571 whether it is supported. */
24572 if (ix86_builtins_isa[fcode].isa
24573 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
24574 {
24575 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
24576 NULL, NULL, false);
24577
24578 if (!opts)
24579 error ("%qE needs unknown isa option", fndecl);
24580 else
24581 {
24582 gcc_assert (opts != NULL);
24583 error ("%qE needs isa option %s", fndecl, opts);
24584 free (opts);
24585 }
24586 return const0_rtx;
24587 }
24588
24589 switch (fcode)
24590 {
24591 case IX86_BUILTIN_MASKMOVQ:
24592 case IX86_BUILTIN_MASKMOVDQU:
24593 icode = (fcode == IX86_BUILTIN_MASKMOVQ
24594 ? CODE_FOR_mmx_maskmovq
24595 : CODE_FOR_sse2_maskmovdqu);
24596 /* Note the arg order is different from the operand order. */
24597 arg1 = CALL_EXPR_ARG (exp, 0);
24598 arg2 = CALL_EXPR_ARG (exp, 1);
24599 arg0 = CALL_EXPR_ARG (exp, 2);
24600 op0 = expand_normal (arg0);
24601 op1 = expand_normal (arg1);
24602 op2 = expand_normal (arg2);
24603 mode0 = insn_data[icode].operand[0].mode;
24604 mode1 = insn_data[icode].operand[1].mode;
24605 mode2 = insn_data[icode].operand[2].mode;
24606
24607 op0 = force_reg (Pmode, op0);
24608 op0 = gen_rtx_MEM (mode1, op0);
24609
24610 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
24611 op0 = copy_to_mode_reg (mode0, op0);
24612 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
24613 op1 = copy_to_mode_reg (mode1, op1);
24614 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
24615 op2 = copy_to_mode_reg (mode2, op2);
24616 pat = GEN_FCN (icode) (op0, op1, op2);
24617 if (! pat)
24618 return 0;
24619 emit_insn (pat);
24620 return 0;
24621
24622 case IX86_BUILTIN_LDMXCSR:
24623 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
24624 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24625 emit_move_insn (target, op0);
24626 emit_insn (gen_sse_ldmxcsr (target));
24627 return 0;
24628
24629 case IX86_BUILTIN_STMXCSR:
24630 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24631 emit_insn (gen_sse_stmxcsr (target));
24632 return copy_to_mode_reg (SImode, target);
24633
24634 case IX86_BUILTIN_CLFLUSH:
24635 arg0 = CALL_EXPR_ARG (exp, 0);
24636 op0 = expand_normal (arg0);
24637 icode = CODE_FOR_sse2_clflush;
24638 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
24639 op0 = copy_to_mode_reg (Pmode, op0);
24640
24641 emit_insn (gen_sse2_clflush (op0));
24642 return 0;
24643
24644 case IX86_BUILTIN_MONITOR:
24645 arg0 = CALL_EXPR_ARG (exp, 0);
24646 arg1 = CALL_EXPR_ARG (exp, 1);
24647 arg2 = CALL_EXPR_ARG (exp, 2);
24648 op0 = expand_normal (arg0);
24649 op1 = expand_normal (arg1);
24650 op2 = expand_normal (arg2);
24651 if (!REG_P (op0))
24652 op0 = copy_to_mode_reg (Pmode, op0);
24653 if (!REG_P (op1))
24654 op1 = copy_to_mode_reg (SImode, op1);
24655 if (!REG_P (op2))
24656 op2 = copy_to_mode_reg (SImode, op2);
24657 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
24658 return 0;
24659
24660 case IX86_BUILTIN_MWAIT:
24661 arg0 = CALL_EXPR_ARG (exp, 0);
24662 arg1 = CALL_EXPR_ARG (exp, 1);
24663 op0 = expand_normal (arg0);
24664 op1 = expand_normal (arg1);
24665 if (!REG_P (op0))
24666 op0 = copy_to_mode_reg (SImode, op0);
24667 if (!REG_P (op1))
24668 op1 = copy_to_mode_reg (SImode, op1);
24669 emit_insn (gen_sse3_mwait (op0, op1));
24670 return 0;
24671
24672 case IX86_BUILTIN_VEC_INIT_V2SI:
24673 case IX86_BUILTIN_VEC_INIT_V4HI:
24674 case IX86_BUILTIN_VEC_INIT_V8QI:
24675 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
24676
24677 case IX86_BUILTIN_VEC_EXT_V2DF:
24678 case IX86_BUILTIN_VEC_EXT_V2DI:
24679 case IX86_BUILTIN_VEC_EXT_V4SF:
24680 case IX86_BUILTIN_VEC_EXT_V4SI:
24681 case IX86_BUILTIN_VEC_EXT_V8HI:
24682 case IX86_BUILTIN_VEC_EXT_V2SI:
24683 case IX86_BUILTIN_VEC_EXT_V4HI:
24684 case IX86_BUILTIN_VEC_EXT_V16QI:
24685 return ix86_expand_vec_ext_builtin (exp, target);
24686
24687 case IX86_BUILTIN_VEC_SET_V2DI:
24688 case IX86_BUILTIN_VEC_SET_V4SF:
24689 case IX86_BUILTIN_VEC_SET_V4SI:
24690 case IX86_BUILTIN_VEC_SET_V8HI:
24691 case IX86_BUILTIN_VEC_SET_V4HI:
24692 case IX86_BUILTIN_VEC_SET_V16QI:
24693 return ix86_expand_vec_set_builtin (exp);
24694
24695 case IX86_BUILTIN_INFQ:
24696 {
24697 REAL_VALUE_TYPE inf;
24698 rtx tmp;
24699
24700 real_inf (&inf);
24701 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
24702
24703 tmp = validize_mem (force_const_mem (mode, tmp));
24704
24705 if (target == 0)
24706 target = gen_reg_rtx (mode);
24707
24708 emit_move_insn (target, tmp);
24709 return target;
24710 }
24711
24712 default:
24713 break;
24714 }
24715
24716 for (i = 0, d = bdesc_special_args;
24717 i < ARRAY_SIZE (bdesc_special_args);
24718 i++, d++)
24719 if (d->code == fcode)
24720 return ix86_expand_special_args_builtin (d, exp, target);
24721
24722 for (i = 0, d = bdesc_args;
24723 i < ARRAY_SIZE (bdesc_args);
24724 i++, d++)
24725 if (d->code == fcode)
24726 switch (fcode)
24727 {
24728 case IX86_BUILTIN_FABSQ:
24729 case IX86_BUILTIN_COPYSIGNQ:
24730 if (!TARGET_SSE2)
24731 /* Emit a normal call if SSE2 isn't available. */
24732 return expand_call (exp, target, ignore);
24733 default:
24734 return ix86_expand_args_builtin (d, exp, target);
24735 }
24736
24737 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24738 if (d->code == fcode)
24739 return ix86_expand_sse_comi (d, exp, target);
24740
24741 for (i = 0, d = bdesc_pcmpestr;
24742 i < ARRAY_SIZE (bdesc_pcmpestr);
24743 i++, d++)
24744 if (d->code == fcode)
24745 return ix86_expand_sse_pcmpestr (d, exp, target);
24746
24747 for (i = 0, d = bdesc_pcmpistr;
24748 i < ARRAY_SIZE (bdesc_pcmpistr);
24749 i++, d++)
24750 if (d->code == fcode)
24751 return ix86_expand_sse_pcmpistr (d, exp, target);
24752
24753 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24754 if (d->code == fcode)
24755 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
24756 (enum multi_arg_type)d->flag,
24757 d->comparison);
24758
24759 gcc_unreachable ();
24760 }
24761
24762 /* Returns a function decl for a vectorized version of the builtin function
24763 with builtin function code FN and the result vector type TYPE, or NULL_TREE
24764 if it is not available. */
24765
24766 static tree
24767 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
24768 tree type_in)
24769 {
24770 enum machine_mode in_mode, out_mode;
24771 int in_n, out_n;
24772
24773 if (TREE_CODE (type_out) != VECTOR_TYPE
24774 || TREE_CODE (type_in) != VECTOR_TYPE)
24775 return NULL_TREE;
24776
24777 out_mode = TYPE_MODE (TREE_TYPE (type_out));
24778 out_n = TYPE_VECTOR_SUBPARTS (type_out);
24779 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24780 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24781
24782 switch (fn)
24783 {
24784 case BUILT_IN_SQRT:
24785 if (out_mode == DFmode && out_n == 2
24786 && in_mode == DFmode && in_n == 2)
24787 return ix86_builtins[IX86_BUILTIN_SQRTPD];
24788 break;
24789
24790 case BUILT_IN_SQRTF:
24791 if (out_mode == SFmode && out_n == 4
24792 && in_mode == SFmode && in_n == 4)
24793 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
24794 break;
24795
24796 case BUILT_IN_LRINT:
24797 if (out_mode == SImode && out_n == 4
24798 && in_mode == DFmode && in_n == 2)
24799 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
24800 break;
24801
24802 case BUILT_IN_LRINTF:
24803 if (out_mode == SImode && out_n == 4
24804 && in_mode == SFmode && in_n == 4)
24805 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
24806 break;
24807
24808 default:
24809 ;
24810 }
24811
24812 /* Dispatch to a handler for a vectorization library. */
24813 if (ix86_veclib_handler)
24814 return (*ix86_veclib_handler)(fn, type_out, type_in);
24815
24816 return NULL_TREE;
24817 }
24818
24819 /* Handler for an SVML-style interface to
24820 a library with vectorized intrinsics. */
24821
24822 static tree
24823 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
24824 {
24825 char name[20];
24826 tree fntype, new_fndecl, args;
24827 unsigned arity;
24828 const char *bname;
24829 enum machine_mode el_mode, in_mode;
24830 int n, in_n;
24831
24832 /* The SVML is suitable for unsafe math only. */
24833 if (!flag_unsafe_math_optimizations)
24834 return NULL_TREE;
24835
24836 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24837 n = TYPE_VECTOR_SUBPARTS (type_out);
24838 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24839 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24840 if (el_mode != in_mode
24841 || n != in_n)
24842 return NULL_TREE;
24843
24844 switch (fn)
24845 {
24846 case BUILT_IN_EXP:
24847 case BUILT_IN_LOG:
24848 case BUILT_IN_LOG10:
24849 case BUILT_IN_POW:
24850 case BUILT_IN_TANH:
24851 case BUILT_IN_TAN:
24852 case BUILT_IN_ATAN:
24853 case BUILT_IN_ATAN2:
24854 case BUILT_IN_ATANH:
24855 case BUILT_IN_CBRT:
24856 case BUILT_IN_SINH:
24857 case BUILT_IN_SIN:
24858 case BUILT_IN_ASINH:
24859 case BUILT_IN_ASIN:
24860 case BUILT_IN_COSH:
24861 case BUILT_IN_COS:
24862 case BUILT_IN_ACOSH:
24863 case BUILT_IN_ACOS:
24864 if (el_mode != DFmode || n != 2)
24865 return NULL_TREE;
24866 break;
24867
24868 case BUILT_IN_EXPF:
24869 case BUILT_IN_LOGF:
24870 case BUILT_IN_LOG10F:
24871 case BUILT_IN_POWF:
24872 case BUILT_IN_TANHF:
24873 case BUILT_IN_TANF:
24874 case BUILT_IN_ATANF:
24875 case BUILT_IN_ATAN2F:
24876 case BUILT_IN_ATANHF:
24877 case BUILT_IN_CBRTF:
24878 case BUILT_IN_SINHF:
24879 case BUILT_IN_SINF:
24880 case BUILT_IN_ASINHF:
24881 case BUILT_IN_ASINF:
24882 case BUILT_IN_COSHF:
24883 case BUILT_IN_COSF:
24884 case BUILT_IN_ACOSHF:
24885 case BUILT_IN_ACOSF:
24886 if (el_mode != SFmode || n != 4)
24887 return NULL_TREE;
24888 break;
24889
24890 default:
24891 return NULL_TREE;
24892 }
24893
24894 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24895
24896 if (fn == BUILT_IN_LOGF)
24897 strcpy (name, "vmlsLn4");
24898 else if (fn == BUILT_IN_LOG)
24899 strcpy (name, "vmldLn2");
24900 else if (n == 4)
24901 {
24902 sprintf (name, "vmls%s", bname+10);
24903 name[strlen (name)-1] = '4';
24904 }
24905 else
24906 sprintf (name, "vmld%s2", bname+10);
24907
24908 /* Convert to uppercase. */
24909 name[4] &= ~0x20;
24910
24911 arity = 0;
24912 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24913 args = TREE_CHAIN (args))
24914 arity++;
24915
24916 if (arity == 1)
24917 fntype = build_function_type_list (type_out, type_in, NULL);
24918 else
24919 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24920
24921 /* Build a function declaration for the vectorized function. */
24922 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
24923 TREE_PUBLIC (new_fndecl) = 1;
24924 DECL_EXTERNAL (new_fndecl) = 1;
24925 DECL_IS_NOVOPS (new_fndecl) = 1;
24926 TREE_READONLY (new_fndecl) = 1;
24927
24928 return new_fndecl;
24929 }
24930
24931 /* Handler for an ACML-style interface to
24932 a library with vectorized intrinsics. */
24933
24934 static tree
24935 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
24936 {
24937 char name[20] = "__vr.._";
24938 tree fntype, new_fndecl, args;
24939 unsigned arity;
24940 const char *bname;
24941 enum machine_mode el_mode, in_mode;
24942 int n, in_n;
24943
24944 /* The ACML is 64bits only and suitable for unsafe math only as
24945 it does not correctly support parts of IEEE with the required
24946 precision such as denormals. */
24947 if (!TARGET_64BIT
24948 || !flag_unsafe_math_optimizations)
24949 return NULL_TREE;
24950
24951 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24952 n = TYPE_VECTOR_SUBPARTS (type_out);
24953 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24954 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24955 if (el_mode != in_mode
24956 || n != in_n)
24957 return NULL_TREE;
24958
24959 switch (fn)
24960 {
24961 case BUILT_IN_SIN:
24962 case BUILT_IN_COS:
24963 case BUILT_IN_EXP:
24964 case BUILT_IN_LOG:
24965 case BUILT_IN_LOG2:
24966 case BUILT_IN_LOG10:
24967 name[4] = 'd';
24968 name[5] = '2';
24969 if (el_mode != DFmode
24970 || n != 2)
24971 return NULL_TREE;
24972 break;
24973
24974 case BUILT_IN_SINF:
24975 case BUILT_IN_COSF:
24976 case BUILT_IN_EXPF:
24977 case BUILT_IN_POWF:
24978 case BUILT_IN_LOGF:
24979 case BUILT_IN_LOG2F:
24980 case BUILT_IN_LOG10F:
24981 name[4] = 's';
24982 name[5] = '4';
24983 if (el_mode != SFmode
24984 || n != 4)
24985 return NULL_TREE;
24986 break;
24987
24988 default:
24989 return NULL_TREE;
24990 }
24991
24992 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24993 sprintf (name + 7, "%s", bname+10);
24994
24995 arity = 0;
24996 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24997 args = TREE_CHAIN (args))
24998 arity++;
24999
25000 if (arity == 1)
25001 fntype = build_function_type_list (type_out, type_in, NULL);
25002 else
25003 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25004
25005 /* Build a function declaration for the vectorized function. */
25006 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25007 TREE_PUBLIC (new_fndecl) = 1;
25008 DECL_EXTERNAL (new_fndecl) = 1;
25009 DECL_IS_NOVOPS (new_fndecl) = 1;
25010 TREE_READONLY (new_fndecl) = 1;
25011
25012 return new_fndecl;
25013 }
25014
25015
25016 /* Returns a decl of a function that implements conversion of an integer vector
25017 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25018 side of the conversion.
25019 Return NULL_TREE if it is not available. */
25020
25021 static tree
25022 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25023 {
25024 if (TREE_CODE (type) != VECTOR_TYPE)
25025 return NULL_TREE;
25026
25027 switch (code)
25028 {
25029 case FLOAT_EXPR:
25030 switch (TYPE_MODE (type))
25031 {
25032 case V4SImode:
25033 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25034 default:
25035 return NULL_TREE;
25036 }
25037
25038 case FIX_TRUNC_EXPR:
25039 switch (TYPE_MODE (type))
25040 {
25041 case V4SImode:
25042 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25043 default:
25044 return NULL_TREE;
25045 }
25046 default:
25047 return NULL_TREE;
25048
25049 }
25050 }
25051
25052 /* Returns a code for a target-specific builtin that implements
25053 reciprocal of the function, or NULL_TREE if not available. */
25054
25055 static tree
25056 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25057 bool sqrt ATTRIBUTE_UNUSED)
25058 {
25059 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25060 && flag_finite_math_only && !flag_trapping_math
25061 && flag_unsafe_math_optimizations))
25062 return NULL_TREE;
25063
25064 if (md_fn)
25065 /* Machine dependent builtins. */
25066 switch (fn)
25067 {
25068 /* Vectorized version of sqrt to rsqrt conversion. */
25069 case IX86_BUILTIN_SQRTPS_NR:
25070 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25071
25072 default:
25073 return NULL_TREE;
25074 }
25075 else
25076 /* Normal builtins. */
25077 switch (fn)
25078 {
25079 /* Sqrt to rsqrt conversion. */
25080 case BUILT_IN_SQRTF:
25081 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25082
25083 default:
25084 return NULL_TREE;
25085 }
25086 }
25087
25088 /* Store OPERAND to the memory after reload is completed. This means
25089 that we can't easily use assign_stack_local. */
25090 rtx
25091 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25092 {
25093 rtx result;
25094
25095 gcc_assert (reload_completed);
25096 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25097 {
25098 result = gen_rtx_MEM (mode,
25099 gen_rtx_PLUS (Pmode,
25100 stack_pointer_rtx,
25101 GEN_INT (-RED_ZONE_SIZE)));
25102 emit_move_insn (result, operand);
25103 }
25104 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25105 {
25106 switch (mode)
25107 {
25108 case HImode:
25109 case SImode:
25110 operand = gen_lowpart (DImode, operand);
25111 /* FALLTHRU */
25112 case DImode:
25113 emit_insn (
25114 gen_rtx_SET (VOIDmode,
25115 gen_rtx_MEM (DImode,
25116 gen_rtx_PRE_DEC (DImode,
25117 stack_pointer_rtx)),
25118 operand));
25119 break;
25120 default:
25121 gcc_unreachable ();
25122 }
25123 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25124 }
25125 else
25126 {
25127 switch (mode)
25128 {
25129 case DImode:
25130 {
25131 rtx operands[2];
25132 split_di (&operand, 1, operands, operands + 1);
25133 emit_insn (
25134 gen_rtx_SET (VOIDmode,
25135 gen_rtx_MEM (SImode,
25136 gen_rtx_PRE_DEC (Pmode,
25137 stack_pointer_rtx)),
25138 operands[1]));
25139 emit_insn (
25140 gen_rtx_SET (VOIDmode,
25141 gen_rtx_MEM (SImode,
25142 gen_rtx_PRE_DEC (Pmode,
25143 stack_pointer_rtx)),
25144 operands[0]));
25145 }
25146 break;
25147 case HImode:
25148 /* Store HImodes as SImodes. */
25149 operand = gen_lowpart (SImode, operand);
25150 /* FALLTHRU */
25151 case SImode:
25152 emit_insn (
25153 gen_rtx_SET (VOIDmode,
25154 gen_rtx_MEM (GET_MODE (operand),
25155 gen_rtx_PRE_DEC (SImode,
25156 stack_pointer_rtx)),
25157 operand));
25158 break;
25159 default:
25160 gcc_unreachable ();
25161 }
25162 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25163 }
25164 return result;
25165 }
25166
25167 /* Free operand from the memory. */
25168 void
25169 ix86_free_from_memory (enum machine_mode mode)
25170 {
25171 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25172 {
25173 int size;
25174
25175 if (mode == DImode || TARGET_64BIT)
25176 size = 8;
25177 else
25178 size = 4;
25179 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25180 to pop or add instruction if registers are available. */
25181 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25182 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25183 GEN_INT (size))));
25184 }
25185 }
25186
25187 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25188 QImode must go into class Q_REGS.
25189 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25190 movdf to do mem-to-mem moves through integer regs. */
25191 enum reg_class
25192 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25193 {
25194 enum machine_mode mode = GET_MODE (x);
25195
25196 /* We're only allowed to return a subclass of CLASS. Many of the
25197 following checks fail for NO_REGS, so eliminate that early. */
25198 if (regclass == NO_REGS)
25199 return NO_REGS;
25200
25201 /* All classes can load zeros. */
25202 if (x == CONST0_RTX (mode))
25203 return regclass;
25204
25205 /* Force constants into memory if we are loading a (nonzero) constant into
25206 an MMX or SSE register. This is because there are no MMX/SSE instructions
25207 to load from a constant. */
25208 if (CONSTANT_P (x)
25209 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25210 return NO_REGS;
25211
25212 /* Prefer SSE regs only, if we can use them for math. */
25213 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25214 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25215
25216 /* Floating-point constants need more complex checks. */
25217 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25218 {
25219 /* General regs can load everything. */
25220 if (reg_class_subset_p (regclass, GENERAL_REGS))
25221 return regclass;
25222
25223 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25224 zero above. We only want to wind up preferring 80387 registers if
25225 we plan on doing computation with them. */
25226 if (TARGET_80387
25227 && standard_80387_constant_p (x))
25228 {
25229 /* Limit class to non-sse. */
25230 if (regclass == FLOAT_SSE_REGS)
25231 return FLOAT_REGS;
25232 if (regclass == FP_TOP_SSE_REGS)
25233 return FP_TOP_REG;
25234 if (regclass == FP_SECOND_SSE_REGS)
25235 return FP_SECOND_REG;
25236 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25237 return regclass;
25238 }
25239
25240 return NO_REGS;
25241 }
25242
25243 /* Generally when we see PLUS here, it's the function invariant
25244 (plus soft-fp const_int). Which can only be computed into general
25245 regs. */
25246 if (GET_CODE (x) == PLUS)
25247 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25248
25249 /* QImode constants are easy to load, but non-constant QImode data
25250 must go into Q_REGS. */
25251 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25252 {
25253 if (reg_class_subset_p (regclass, Q_REGS))
25254 return regclass;
25255 if (reg_class_subset_p (Q_REGS, regclass))
25256 return Q_REGS;
25257 return NO_REGS;
25258 }
25259
25260 return regclass;
25261 }
25262
25263 /* Discourage putting floating-point values in SSE registers unless
25264 SSE math is being used, and likewise for the 387 registers. */
25265 enum reg_class
25266 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25267 {
25268 enum machine_mode mode = GET_MODE (x);
25269
25270 /* Restrict the output reload class to the register bank that we are doing
25271 math on. If we would like not to return a subset of CLASS, reject this
25272 alternative: if reload cannot do this, it will still use its choice. */
25273 mode = GET_MODE (x);
25274 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25275 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25276
25277 if (X87_FLOAT_MODE_P (mode))
25278 {
25279 if (regclass == FP_TOP_SSE_REGS)
25280 return FP_TOP_REG;
25281 else if (regclass == FP_SECOND_SSE_REGS)
25282 return FP_SECOND_REG;
25283 else
25284 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25285 }
25286
25287 return regclass;
25288 }
25289
25290 static enum reg_class
25291 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25292 enum machine_mode mode,
25293 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25294 {
25295 /* QImode spills from non-QI registers require
25296 intermediate register on 32bit targets. */
25297 if (!in_p && mode == QImode && !TARGET_64BIT
25298 && (rclass == GENERAL_REGS
25299 || rclass == LEGACY_REGS
25300 || rclass == INDEX_REGS))
25301 {
25302 int regno;
25303
25304 if (REG_P (x))
25305 regno = REGNO (x);
25306 else
25307 regno = -1;
25308
25309 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25310 regno = true_regnum (x);
25311
25312 /* Return Q_REGS if the operand is in memory. */
25313 if (regno == -1)
25314 return Q_REGS;
25315 }
25316
25317 return NO_REGS;
25318 }
25319
25320 /* If we are copying between general and FP registers, we need a memory
25321 location. The same is true for SSE and MMX registers.
25322
25323 To optimize register_move_cost performance, allow inline variant.
25324
25325 The macro can't work reliably when one of the CLASSES is class containing
25326 registers from multiple units (SSE, MMX, integer). We avoid this by never
25327 combining those units in single alternative in the machine description.
25328 Ensure that this constraint holds to avoid unexpected surprises.
25329
25330 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25331 enforce these sanity checks. */
25332
25333 static inline int
25334 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25335 enum machine_mode mode, int strict)
25336 {
25337 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25338 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25339 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25340 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25341 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25342 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25343 {
25344 gcc_assert (!strict);
25345 return true;
25346 }
25347
25348 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25349 return true;
25350
25351 /* ??? This is a lie. We do have moves between mmx/general, and for
25352 mmx/sse2. But by saying we need secondary memory we discourage the
25353 register allocator from using the mmx registers unless needed. */
25354 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25355 return true;
25356
25357 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25358 {
25359 /* SSE1 doesn't have any direct moves from other classes. */
25360 if (!TARGET_SSE2)
25361 return true;
25362
25363 /* If the target says that inter-unit moves are more expensive
25364 than moving through memory, then don't generate them. */
25365 if (!TARGET_INTER_UNIT_MOVES)
25366 return true;
25367
25368 /* Between SSE and general, we have moves no larger than word size. */
25369 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25370 return true;
25371 }
25372
25373 return false;
25374 }
25375
25376 int
25377 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25378 enum machine_mode mode, int strict)
25379 {
25380 return inline_secondary_memory_needed (class1, class2, mode, strict);
25381 }
25382
25383 /* Return true if the registers in CLASS cannot represent the change from
25384 modes FROM to TO. */
25385
25386 bool
25387 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
25388 enum reg_class regclass)
25389 {
25390 if (from == to)
25391 return false;
25392
25393 /* x87 registers can't do subreg at all, as all values are reformatted
25394 to extended precision. */
25395 if (MAYBE_FLOAT_CLASS_P (regclass))
25396 return true;
25397
25398 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25399 {
25400 /* Vector registers do not support QI or HImode loads. If we don't
25401 disallow a change to these modes, reload will assume it's ok to
25402 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
25403 the vec_dupv4hi pattern. */
25404 if (GET_MODE_SIZE (from) < 4)
25405 return true;
25406
25407 /* Vector registers do not support subreg with nonzero offsets, which
25408 are otherwise valid for integer registers. Since we can't see
25409 whether we have a nonzero offset from here, prohibit all
25410 nonparadoxical subregs changing size. */
25411 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
25412 return true;
25413 }
25414
25415 return false;
25416 }
25417
25418 /* Return the cost of moving data of mode M between a
25419 register and memory. A value of 2 is the default; this cost is
25420 relative to those in `REGISTER_MOVE_COST'.
25421
25422 This function is used extensively by register_move_cost that is used to
25423 build tables at startup. Make it inline in this case.
25424 When IN is 2, return maximum of in and out move cost.
25425
25426 If moving between registers and memory is more expensive than
25427 between two registers, you should define this macro to express the
25428 relative cost.
25429
25430 Model also increased moving costs of QImode registers in non
25431 Q_REGS classes.
25432 */
25433 static inline int
25434 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
25435 int in)
25436 {
25437 int cost;
25438 if (FLOAT_CLASS_P (regclass))
25439 {
25440 int index;
25441 switch (mode)
25442 {
25443 case SFmode:
25444 index = 0;
25445 break;
25446 case DFmode:
25447 index = 1;
25448 break;
25449 case XFmode:
25450 index = 2;
25451 break;
25452 default:
25453 return 100;
25454 }
25455 if (in == 2)
25456 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
25457 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
25458 }
25459 if (SSE_CLASS_P (regclass))
25460 {
25461 int index;
25462 switch (GET_MODE_SIZE (mode))
25463 {
25464 case 4:
25465 index = 0;
25466 break;
25467 case 8:
25468 index = 1;
25469 break;
25470 case 16:
25471 index = 2;
25472 break;
25473 default:
25474 return 100;
25475 }
25476 if (in == 2)
25477 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
25478 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
25479 }
25480 if (MMX_CLASS_P (regclass))
25481 {
25482 int index;
25483 switch (GET_MODE_SIZE (mode))
25484 {
25485 case 4:
25486 index = 0;
25487 break;
25488 case 8:
25489 index = 1;
25490 break;
25491 default:
25492 return 100;
25493 }
25494 if (in)
25495 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
25496 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
25497 }
25498 switch (GET_MODE_SIZE (mode))
25499 {
25500 case 1:
25501 if (Q_CLASS_P (regclass) || TARGET_64BIT)
25502 {
25503 if (!in)
25504 return ix86_cost->int_store[0];
25505 if (TARGET_PARTIAL_REG_DEPENDENCY
25506 && optimize_function_for_speed_p (cfun))
25507 cost = ix86_cost->movzbl_load;
25508 else
25509 cost = ix86_cost->int_load[0];
25510 if (in == 2)
25511 return MAX (cost, ix86_cost->int_store[0]);
25512 return cost;
25513 }
25514 else
25515 {
25516 if (in == 2)
25517 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
25518 if (in)
25519 return ix86_cost->movzbl_load;
25520 else
25521 return ix86_cost->int_store[0] + 4;
25522 }
25523 break;
25524 case 2:
25525 if (in == 2)
25526 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
25527 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
25528 default:
25529 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
25530 if (mode == TFmode)
25531 mode = XFmode;
25532 if (in == 2)
25533 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
25534 else if (in)
25535 cost = ix86_cost->int_load[2];
25536 else
25537 cost = ix86_cost->int_store[2];
25538 return (cost * (((int) GET_MODE_SIZE (mode)
25539 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
25540 }
25541 }
25542
25543 int
25544 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
25545 {
25546 return inline_memory_move_cost (mode, regclass, in);
25547 }
25548
25549
25550 /* Return the cost of moving data from a register in class CLASS1 to
25551 one in class CLASS2.
25552
25553 It is not required that the cost always equal 2 when FROM is the same as TO;
25554 on some machines it is expensive to move between registers if they are not
25555 general registers. */
25556
25557 int
25558 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
25559 enum reg_class class2)
25560 {
25561 /* In case we require secondary memory, compute cost of the store followed
25562 by load. In order to avoid bad register allocation choices, we need
25563 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
25564
25565 if (inline_secondary_memory_needed (class1, class2, mode, 0))
25566 {
25567 int cost = 1;
25568
25569 cost += inline_memory_move_cost (mode, class1, 2);
25570 cost += inline_memory_move_cost (mode, class2, 2);
25571
25572 /* In case of copying from general_purpose_register we may emit multiple
25573 stores followed by single load causing memory size mismatch stall.
25574 Count this as arbitrarily high cost of 20. */
25575 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
25576 cost += 20;
25577
25578 /* In the case of FP/MMX moves, the registers actually overlap, and we
25579 have to switch modes in order to treat them differently. */
25580 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
25581 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25582 cost += 20;
25583
25584 return cost;
25585 }
25586
25587 /* Moves between SSE/MMX and integer unit are expensive. */
25588 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25589 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25590
25591 /* ??? By keeping returned value relatively high, we limit the number
25592 of moves between integer and MMX/SSE registers for all targets.
25593 Additionally, high value prevents problem with x86_modes_tieable_p(),
25594 where integer modes in MMX/SSE registers are not tieable
25595 because of missing QImode and HImode moves to, from or between
25596 MMX/SSE registers. */
25597 return MAX (8, ix86_cost->mmxsse_to_integer);
25598
25599 if (MAYBE_FLOAT_CLASS_P (class1))
25600 return ix86_cost->fp_move;
25601 if (MAYBE_SSE_CLASS_P (class1))
25602 return ix86_cost->sse_move;
25603 if (MAYBE_MMX_CLASS_P (class1))
25604 return ix86_cost->mmx_move;
25605 return 2;
25606 }
25607
25608 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
25609
25610 bool
25611 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
25612 {
25613 /* Flags and only flags can only hold CCmode values. */
25614 if (CC_REGNO_P (regno))
25615 return GET_MODE_CLASS (mode) == MODE_CC;
25616 if (GET_MODE_CLASS (mode) == MODE_CC
25617 || GET_MODE_CLASS (mode) == MODE_RANDOM
25618 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
25619 return 0;
25620 if (FP_REGNO_P (regno))
25621 return VALID_FP_MODE_P (mode);
25622 if (SSE_REGNO_P (regno))
25623 {
25624 /* We implement the move patterns for all vector modes into and
25625 out of SSE registers, even when no operation instructions
25626 are available. OImode move is available only when AVX is
25627 enabled. */
25628 return ((TARGET_AVX && mode == OImode)
25629 || VALID_AVX256_REG_MODE (mode)
25630 || VALID_SSE_REG_MODE (mode)
25631 || VALID_SSE2_REG_MODE (mode)
25632 || VALID_MMX_REG_MODE (mode)
25633 || VALID_MMX_REG_MODE_3DNOW (mode));
25634 }
25635 if (MMX_REGNO_P (regno))
25636 {
25637 /* We implement the move patterns for 3DNOW modes even in MMX mode,
25638 so if the register is available at all, then we can move data of
25639 the given mode into or out of it. */
25640 return (VALID_MMX_REG_MODE (mode)
25641 || VALID_MMX_REG_MODE_3DNOW (mode));
25642 }
25643
25644 if (mode == QImode)
25645 {
25646 /* Take care for QImode values - they can be in non-QI regs,
25647 but then they do cause partial register stalls. */
25648 if (regno < 4 || TARGET_64BIT)
25649 return 1;
25650 if (!TARGET_PARTIAL_REG_STALL)
25651 return 1;
25652 return reload_in_progress || reload_completed;
25653 }
25654 /* We handle both integer and floats in the general purpose registers. */
25655 else if (VALID_INT_MODE_P (mode))
25656 return 1;
25657 else if (VALID_FP_MODE_P (mode))
25658 return 1;
25659 else if (VALID_DFP_MODE_P (mode))
25660 return 1;
25661 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
25662 on to use that value in smaller contexts, this can easily force a
25663 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
25664 supporting DImode, allow it. */
25665 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
25666 return 1;
25667
25668 return 0;
25669 }
25670
25671 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
25672 tieable integer mode. */
25673
25674 static bool
25675 ix86_tieable_integer_mode_p (enum machine_mode mode)
25676 {
25677 switch (mode)
25678 {
25679 case HImode:
25680 case SImode:
25681 return true;
25682
25683 case QImode:
25684 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
25685
25686 case DImode:
25687 return TARGET_64BIT;
25688
25689 default:
25690 return false;
25691 }
25692 }
25693
25694 /* Return true if MODE1 is accessible in a register that can hold MODE2
25695 without copying. That is, all register classes that can hold MODE2
25696 can also hold MODE1. */
25697
25698 bool
25699 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
25700 {
25701 if (mode1 == mode2)
25702 return true;
25703
25704 if (ix86_tieable_integer_mode_p (mode1)
25705 && ix86_tieable_integer_mode_p (mode2))
25706 return true;
25707
25708 /* MODE2 being XFmode implies fp stack or general regs, which means we
25709 can tie any smaller floating point modes to it. Note that we do not
25710 tie this with TFmode. */
25711 if (mode2 == XFmode)
25712 return mode1 == SFmode || mode1 == DFmode;
25713
25714 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
25715 that we can tie it with SFmode. */
25716 if (mode2 == DFmode)
25717 return mode1 == SFmode;
25718
25719 /* If MODE2 is only appropriate for an SSE register, then tie with
25720 any other mode acceptable to SSE registers. */
25721 if (GET_MODE_SIZE (mode2) == 16
25722 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
25723 return (GET_MODE_SIZE (mode1) == 16
25724 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
25725
25726 /* If MODE2 is appropriate for an MMX register, then tie
25727 with any other mode acceptable to MMX registers. */
25728 if (GET_MODE_SIZE (mode2) == 8
25729 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
25730 return (GET_MODE_SIZE (mode1) == 8
25731 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
25732
25733 return false;
25734 }
25735
25736 /* Compute a (partial) cost for rtx X. Return true if the complete
25737 cost has been computed, and false if subexpressions should be
25738 scanned. In either case, *TOTAL contains the cost result. */
25739
25740 static bool
25741 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
25742 {
25743 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
25744 enum machine_mode mode = GET_MODE (x);
25745 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
25746
25747 switch (code)
25748 {
25749 case CONST_INT:
25750 case CONST:
25751 case LABEL_REF:
25752 case SYMBOL_REF:
25753 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
25754 *total = 3;
25755 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
25756 *total = 2;
25757 else if (flag_pic && SYMBOLIC_CONST (x)
25758 && (!TARGET_64BIT
25759 || (!GET_CODE (x) != LABEL_REF
25760 && (GET_CODE (x) != SYMBOL_REF
25761 || !SYMBOL_REF_LOCAL_P (x)))))
25762 *total = 1;
25763 else
25764 *total = 0;
25765 return true;
25766
25767 case CONST_DOUBLE:
25768 if (mode == VOIDmode)
25769 *total = 0;
25770 else
25771 switch (standard_80387_constant_p (x))
25772 {
25773 case 1: /* 0.0 */
25774 *total = 1;
25775 break;
25776 default: /* Other constants */
25777 *total = 2;
25778 break;
25779 case 0:
25780 case -1:
25781 /* Start with (MEM (SYMBOL_REF)), since that's where
25782 it'll probably end up. Add a penalty for size. */
25783 *total = (COSTS_N_INSNS (1)
25784 + (flag_pic != 0 && !TARGET_64BIT)
25785 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
25786 break;
25787 }
25788 return true;
25789
25790 case ZERO_EXTEND:
25791 /* The zero extensions is often completely free on x86_64, so make
25792 it as cheap as possible. */
25793 if (TARGET_64BIT && mode == DImode
25794 && GET_MODE (XEXP (x, 0)) == SImode)
25795 *total = 1;
25796 else if (TARGET_ZERO_EXTEND_WITH_AND)
25797 *total = cost->add;
25798 else
25799 *total = cost->movzx;
25800 return false;
25801
25802 case SIGN_EXTEND:
25803 *total = cost->movsx;
25804 return false;
25805
25806 case ASHIFT:
25807 if (CONST_INT_P (XEXP (x, 1))
25808 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
25809 {
25810 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25811 if (value == 1)
25812 {
25813 *total = cost->add;
25814 return false;
25815 }
25816 if ((value == 2 || value == 3)
25817 && cost->lea <= cost->shift_const)
25818 {
25819 *total = cost->lea;
25820 return false;
25821 }
25822 }
25823 /* FALLTHRU */
25824
25825 case ROTATE:
25826 case ASHIFTRT:
25827 case LSHIFTRT:
25828 case ROTATERT:
25829 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
25830 {
25831 if (CONST_INT_P (XEXP (x, 1)))
25832 {
25833 if (INTVAL (XEXP (x, 1)) > 32)
25834 *total = cost->shift_const + COSTS_N_INSNS (2);
25835 else
25836 *total = cost->shift_const * 2;
25837 }
25838 else
25839 {
25840 if (GET_CODE (XEXP (x, 1)) == AND)
25841 *total = cost->shift_var * 2;
25842 else
25843 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
25844 }
25845 }
25846 else
25847 {
25848 if (CONST_INT_P (XEXP (x, 1)))
25849 *total = cost->shift_const;
25850 else
25851 *total = cost->shift_var;
25852 }
25853 return false;
25854
25855 case MULT:
25856 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25857 {
25858 /* ??? SSE scalar cost should be used here. */
25859 *total = cost->fmul;
25860 return false;
25861 }
25862 else if (X87_FLOAT_MODE_P (mode))
25863 {
25864 *total = cost->fmul;
25865 return false;
25866 }
25867 else if (FLOAT_MODE_P (mode))
25868 {
25869 /* ??? SSE vector cost should be used here. */
25870 *total = cost->fmul;
25871 return false;
25872 }
25873 else
25874 {
25875 rtx op0 = XEXP (x, 0);
25876 rtx op1 = XEXP (x, 1);
25877 int nbits;
25878 if (CONST_INT_P (XEXP (x, 1)))
25879 {
25880 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25881 for (nbits = 0; value != 0; value &= value - 1)
25882 nbits++;
25883 }
25884 else
25885 /* This is arbitrary. */
25886 nbits = 7;
25887
25888 /* Compute costs correctly for widening multiplication. */
25889 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
25890 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
25891 == GET_MODE_SIZE (mode))
25892 {
25893 int is_mulwiden = 0;
25894 enum machine_mode inner_mode = GET_MODE (op0);
25895
25896 if (GET_CODE (op0) == GET_CODE (op1))
25897 is_mulwiden = 1, op1 = XEXP (op1, 0);
25898 else if (CONST_INT_P (op1))
25899 {
25900 if (GET_CODE (op0) == SIGN_EXTEND)
25901 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
25902 == INTVAL (op1);
25903 else
25904 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
25905 }
25906
25907 if (is_mulwiden)
25908 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
25909 }
25910
25911 *total = (cost->mult_init[MODE_INDEX (mode)]
25912 + nbits * cost->mult_bit
25913 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
25914
25915 return true;
25916 }
25917
25918 case DIV:
25919 case UDIV:
25920 case MOD:
25921 case UMOD:
25922 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25923 /* ??? SSE cost should be used here. */
25924 *total = cost->fdiv;
25925 else if (X87_FLOAT_MODE_P (mode))
25926 *total = cost->fdiv;
25927 else if (FLOAT_MODE_P (mode))
25928 /* ??? SSE vector cost should be used here. */
25929 *total = cost->fdiv;
25930 else
25931 *total = cost->divide[MODE_INDEX (mode)];
25932 return false;
25933
25934 case PLUS:
25935 if (GET_MODE_CLASS (mode) == MODE_INT
25936 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
25937 {
25938 if (GET_CODE (XEXP (x, 0)) == PLUS
25939 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
25940 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
25941 && CONSTANT_P (XEXP (x, 1)))
25942 {
25943 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
25944 if (val == 2 || val == 4 || val == 8)
25945 {
25946 *total = cost->lea;
25947 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
25948 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
25949 outer_code, speed);
25950 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25951 return true;
25952 }
25953 }
25954 else if (GET_CODE (XEXP (x, 0)) == MULT
25955 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
25956 {
25957 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
25958 if (val == 2 || val == 4 || val == 8)
25959 {
25960 *total = cost->lea;
25961 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
25962 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25963 return true;
25964 }
25965 }
25966 else if (GET_CODE (XEXP (x, 0)) == PLUS)
25967 {
25968 *total = cost->lea;
25969 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
25970 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
25971 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25972 return true;
25973 }
25974 }
25975 /* FALLTHRU */
25976
25977 case MINUS:
25978 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25979 {
25980 /* ??? SSE cost should be used here. */
25981 *total = cost->fadd;
25982 return false;
25983 }
25984 else if (X87_FLOAT_MODE_P (mode))
25985 {
25986 *total = cost->fadd;
25987 return false;
25988 }
25989 else if (FLOAT_MODE_P (mode))
25990 {
25991 /* ??? SSE vector cost should be used here. */
25992 *total = cost->fadd;
25993 return false;
25994 }
25995 /* FALLTHRU */
25996
25997 case AND:
25998 case IOR:
25999 case XOR:
26000 if (!TARGET_64BIT && mode == DImode)
26001 {
26002 *total = (cost->add * 2
26003 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26004 << (GET_MODE (XEXP (x, 0)) != DImode))
26005 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26006 << (GET_MODE (XEXP (x, 1)) != DImode)));
26007 return true;
26008 }
26009 /* FALLTHRU */
26010
26011 case NEG:
26012 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26013 {
26014 /* ??? SSE cost should be used here. */
26015 *total = cost->fchs;
26016 return false;
26017 }
26018 else if (X87_FLOAT_MODE_P (mode))
26019 {
26020 *total = cost->fchs;
26021 return false;
26022 }
26023 else if (FLOAT_MODE_P (mode))
26024 {
26025 /* ??? SSE vector cost should be used here. */
26026 *total = cost->fchs;
26027 return false;
26028 }
26029 /* FALLTHRU */
26030
26031 case NOT:
26032 if (!TARGET_64BIT && mode == DImode)
26033 *total = cost->add * 2;
26034 else
26035 *total = cost->add;
26036 return false;
26037
26038 case COMPARE:
26039 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26040 && XEXP (XEXP (x, 0), 1) == const1_rtx
26041 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26042 && XEXP (x, 1) == const0_rtx)
26043 {
26044 /* This kind of construct is implemented using test[bwl].
26045 Treat it as if we had an AND. */
26046 *total = (cost->add
26047 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26048 + rtx_cost (const1_rtx, outer_code, speed));
26049 return true;
26050 }
26051 return false;
26052
26053 case FLOAT_EXTEND:
26054 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26055 *total = 0;
26056 return false;
26057
26058 case ABS:
26059 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26060 /* ??? SSE cost should be used here. */
26061 *total = cost->fabs;
26062 else if (X87_FLOAT_MODE_P (mode))
26063 *total = cost->fabs;
26064 else if (FLOAT_MODE_P (mode))
26065 /* ??? SSE vector cost should be used here. */
26066 *total = cost->fabs;
26067 return false;
26068
26069 case SQRT:
26070 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26071 /* ??? SSE cost should be used here. */
26072 *total = cost->fsqrt;
26073 else if (X87_FLOAT_MODE_P (mode))
26074 *total = cost->fsqrt;
26075 else if (FLOAT_MODE_P (mode))
26076 /* ??? SSE vector cost should be used here. */
26077 *total = cost->fsqrt;
26078 return false;
26079
26080 case UNSPEC:
26081 if (XINT (x, 1) == UNSPEC_TP)
26082 *total = 0;
26083 return false;
26084
26085 default:
26086 return false;
26087 }
26088 }
26089
26090 #if TARGET_MACHO
26091
26092 static int current_machopic_label_num;
26093
26094 /* Given a symbol name and its associated stub, write out the
26095 definition of the stub. */
26096
26097 void
26098 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26099 {
26100 unsigned int length;
26101 char *binder_name, *symbol_name, lazy_ptr_name[32];
26102 int label = ++current_machopic_label_num;
26103
26104 /* For 64-bit we shouldn't get here. */
26105 gcc_assert (!TARGET_64BIT);
26106
26107 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26108 symb = (*targetm.strip_name_encoding) (symb);
26109
26110 length = strlen (stub);
26111 binder_name = XALLOCAVEC (char, length + 32);
26112 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26113
26114 length = strlen (symb);
26115 symbol_name = XALLOCAVEC (char, length + 32);
26116 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26117
26118 sprintf (lazy_ptr_name, "L%d$lz", label);
26119
26120 if (MACHOPIC_PURE)
26121 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26122 else
26123 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26124
26125 fprintf (file, "%s:\n", stub);
26126 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26127
26128 if (MACHOPIC_PURE)
26129 {
26130 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26131 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26132 fprintf (file, "\tjmp\t*%%edx\n");
26133 }
26134 else
26135 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26136
26137 fprintf (file, "%s:\n", binder_name);
26138
26139 if (MACHOPIC_PURE)
26140 {
26141 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26142 fprintf (file, "\tpushl\t%%eax\n");
26143 }
26144 else
26145 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26146
26147 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26148
26149 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26150 fprintf (file, "%s:\n", lazy_ptr_name);
26151 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26152 fprintf (file, "\t.long %s\n", binder_name);
26153 }
26154
26155 void
26156 darwin_x86_file_end (void)
26157 {
26158 darwin_file_end ();
26159 ix86_file_end ();
26160 }
26161 #endif /* TARGET_MACHO */
26162
26163 /* Order the registers for register allocator. */
26164
26165 void
26166 x86_order_regs_for_local_alloc (void)
26167 {
26168 int pos = 0;
26169 int i;
26170
26171 /* First allocate the local general purpose registers. */
26172 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26173 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26174 reg_alloc_order [pos++] = i;
26175
26176 /* Global general purpose registers. */
26177 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26178 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26179 reg_alloc_order [pos++] = i;
26180
26181 /* x87 registers come first in case we are doing FP math
26182 using them. */
26183 if (!TARGET_SSE_MATH)
26184 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26185 reg_alloc_order [pos++] = i;
26186
26187 /* SSE registers. */
26188 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26189 reg_alloc_order [pos++] = i;
26190 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26191 reg_alloc_order [pos++] = i;
26192
26193 /* x87 registers. */
26194 if (TARGET_SSE_MATH)
26195 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26196 reg_alloc_order [pos++] = i;
26197
26198 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26199 reg_alloc_order [pos++] = i;
26200
26201 /* Initialize the rest of array as we do not allocate some registers
26202 at all. */
26203 while (pos < FIRST_PSEUDO_REGISTER)
26204 reg_alloc_order [pos++] = 0;
26205 }
26206
26207 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26208 struct attribute_spec.handler. */
26209 static tree
26210 ix86_handle_abi_attribute (tree *node, tree name,
26211 tree args ATTRIBUTE_UNUSED,
26212 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26213 {
26214 if (TREE_CODE (*node) != FUNCTION_TYPE
26215 && TREE_CODE (*node) != METHOD_TYPE
26216 && TREE_CODE (*node) != FIELD_DECL
26217 && TREE_CODE (*node) != TYPE_DECL)
26218 {
26219 warning (OPT_Wattributes, "%qs attribute only applies to functions",
26220 IDENTIFIER_POINTER (name));
26221 *no_add_attrs = true;
26222 return NULL_TREE;
26223 }
26224 if (!TARGET_64BIT)
26225 {
26226 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
26227 IDENTIFIER_POINTER (name));
26228 *no_add_attrs = true;
26229 return NULL_TREE;
26230 }
26231
26232 /* Can combine regparm with all attributes but fastcall. */
26233 if (is_attribute_p ("ms_abi", name))
26234 {
26235 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26236 {
26237 error ("ms_abi and sysv_abi attributes are not compatible");
26238 }
26239
26240 return NULL_TREE;
26241 }
26242 else if (is_attribute_p ("sysv_abi", name))
26243 {
26244 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26245 {
26246 error ("ms_abi and sysv_abi attributes are not compatible");
26247 }
26248
26249 return NULL_TREE;
26250 }
26251
26252 return NULL_TREE;
26253 }
26254
26255 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26256 struct attribute_spec.handler. */
26257 static tree
26258 ix86_handle_struct_attribute (tree *node, tree name,
26259 tree args ATTRIBUTE_UNUSED,
26260 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26261 {
26262 tree *type = NULL;
26263 if (DECL_P (*node))
26264 {
26265 if (TREE_CODE (*node) == TYPE_DECL)
26266 type = &TREE_TYPE (*node);
26267 }
26268 else
26269 type = node;
26270
26271 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26272 || TREE_CODE (*type) == UNION_TYPE)))
26273 {
26274 warning (OPT_Wattributes, "%qs attribute ignored",
26275 IDENTIFIER_POINTER (name));
26276 *no_add_attrs = true;
26277 }
26278
26279 else if ((is_attribute_p ("ms_struct", name)
26280 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26281 || ((is_attribute_p ("gcc_struct", name)
26282 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26283 {
26284 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
26285 IDENTIFIER_POINTER (name));
26286 *no_add_attrs = true;
26287 }
26288
26289 return NULL_TREE;
26290 }
26291
26292 static bool
26293 ix86_ms_bitfield_layout_p (const_tree record_type)
26294 {
26295 return (TARGET_MS_BITFIELD_LAYOUT &&
26296 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26297 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26298 }
26299
26300 /* Returns an expression indicating where the this parameter is
26301 located on entry to the FUNCTION. */
26302
26303 static rtx
26304 x86_this_parameter (tree function)
26305 {
26306 tree type = TREE_TYPE (function);
26307 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26308 int nregs;
26309
26310 if (TARGET_64BIT)
26311 {
26312 const int *parm_regs;
26313
26314 if (ix86_function_type_abi (type) == MS_ABI)
26315 parm_regs = x86_64_ms_abi_int_parameter_registers;
26316 else
26317 parm_regs = x86_64_int_parameter_registers;
26318 return gen_rtx_REG (DImode, parm_regs[aggr]);
26319 }
26320
26321 nregs = ix86_function_regparm (type, function);
26322
26323 if (nregs > 0 && !stdarg_p (type))
26324 {
26325 int regno;
26326
26327 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26328 regno = aggr ? DX_REG : CX_REG;
26329 else
26330 {
26331 regno = AX_REG;
26332 if (aggr)
26333 {
26334 regno = DX_REG;
26335 if (nregs == 1)
26336 return gen_rtx_MEM (SImode,
26337 plus_constant (stack_pointer_rtx, 4));
26338 }
26339 }
26340 return gen_rtx_REG (SImode, regno);
26341 }
26342
26343 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26344 }
26345
26346 /* Determine whether x86_output_mi_thunk can succeed. */
26347
26348 static bool
26349 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26350 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26351 HOST_WIDE_INT vcall_offset, const_tree function)
26352 {
26353 /* 64-bit can handle anything. */
26354 if (TARGET_64BIT)
26355 return true;
26356
26357 /* For 32-bit, everything's fine if we have one free register. */
26358 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26359 return true;
26360
26361 /* Need a free register for vcall_offset. */
26362 if (vcall_offset)
26363 return false;
26364
26365 /* Need a free register for GOT references. */
26366 if (flag_pic && !(*targetm.binds_local_p) (function))
26367 return false;
26368
26369 /* Otherwise ok. */
26370 return true;
26371 }
26372
26373 /* Output the assembler code for a thunk function. THUNK_DECL is the
26374 declaration for the thunk function itself, FUNCTION is the decl for
26375 the target function. DELTA is an immediate constant offset to be
26376 added to THIS. If VCALL_OFFSET is nonzero, the word at
26377 *(*this + vcall_offset) should be added to THIS. */
26378
26379 static void
26380 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
26381 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
26382 HOST_WIDE_INT vcall_offset, tree function)
26383 {
26384 rtx xops[3];
26385 rtx this_param = x86_this_parameter (function);
26386 rtx this_reg, tmp;
26387
26388 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
26389 pull it in now and let DELTA benefit. */
26390 if (REG_P (this_param))
26391 this_reg = this_param;
26392 else if (vcall_offset)
26393 {
26394 /* Put the this parameter into %eax. */
26395 xops[0] = this_param;
26396 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
26397 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26398 }
26399 else
26400 this_reg = NULL_RTX;
26401
26402 /* Adjust the this parameter by a fixed constant. */
26403 if (delta)
26404 {
26405 xops[0] = GEN_INT (delta);
26406 xops[1] = this_reg ? this_reg : this_param;
26407 if (TARGET_64BIT)
26408 {
26409 if (!x86_64_general_operand (xops[0], DImode))
26410 {
26411 tmp = gen_rtx_REG (DImode, R10_REG);
26412 xops[1] = tmp;
26413 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
26414 xops[0] = tmp;
26415 xops[1] = this_param;
26416 }
26417 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
26418 }
26419 else
26420 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
26421 }
26422
26423 /* Adjust the this parameter by a value stored in the vtable. */
26424 if (vcall_offset)
26425 {
26426 if (TARGET_64BIT)
26427 tmp = gen_rtx_REG (DImode, R10_REG);
26428 else
26429 {
26430 int tmp_regno = CX_REG;
26431 if (lookup_attribute ("fastcall",
26432 TYPE_ATTRIBUTES (TREE_TYPE (function))))
26433 tmp_regno = AX_REG;
26434 tmp = gen_rtx_REG (SImode, tmp_regno);
26435 }
26436
26437 xops[0] = gen_rtx_MEM (Pmode, this_reg);
26438 xops[1] = tmp;
26439 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26440
26441 /* Adjust the this parameter. */
26442 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
26443 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
26444 {
26445 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
26446 xops[0] = GEN_INT (vcall_offset);
26447 xops[1] = tmp2;
26448 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
26449 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
26450 }
26451 xops[1] = this_reg;
26452 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
26453 }
26454
26455 /* If necessary, drop THIS back to its stack slot. */
26456 if (this_reg && this_reg != this_param)
26457 {
26458 xops[0] = this_reg;
26459 xops[1] = this_param;
26460 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26461 }
26462
26463 xops[0] = XEXP (DECL_RTL (function), 0);
26464 if (TARGET_64BIT)
26465 {
26466 if (!flag_pic || (*targetm.binds_local_p) (function))
26467 output_asm_insn ("jmp\t%P0", xops);
26468 /* All thunks should be in the same object as their target,
26469 and thus binds_local_p should be true. */
26470 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
26471 gcc_unreachable ();
26472 else
26473 {
26474 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
26475 tmp = gen_rtx_CONST (Pmode, tmp);
26476 tmp = gen_rtx_MEM (QImode, tmp);
26477 xops[0] = tmp;
26478 output_asm_insn ("jmp\t%A0", xops);
26479 }
26480 }
26481 else
26482 {
26483 if (!flag_pic || (*targetm.binds_local_p) (function))
26484 output_asm_insn ("jmp\t%P0", xops);
26485 else
26486 #if TARGET_MACHO
26487 if (TARGET_MACHO)
26488 {
26489 rtx sym_ref = XEXP (DECL_RTL (function), 0);
26490 tmp = (gen_rtx_SYMBOL_REF
26491 (Pmode,
26492 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
26493 tmp = gen_rtx_MEM (QImode, tmp);
26494 xops[0] = tmp;
26495 output_asm_insn ("jmp\t%0", xops);
26496 }
26497 else
26498 #endif /* TARGET_MACHO */
26499 {
26500 tmp = gen_rtx_REG (SImode, CX_REG);
26501 output_set_got (tmp, NULL_RTX);
26502
26503 xops[1] = tmp;
26504 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
26505 output_asm_insn ("jmp\t{*}%1", xops);
26506 }
26507 }
26508 }
26509
26510 static void
26511 x86_file_start (void)
26512 {
26513 default_file_start ();
26514 #if TARGET_MACHO
26515 darwin_file_start ();
26516 #endif
26517 if (X86_FILE_START_VERSION_DIRECTIVE)
26518 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
26519 if (X86_FILE_START_FLTUSED)
26520 fputs ("\t.global\t__fltused\n", asm_out_file);
26521 if (ix86_asm_dialect == ASM_INTEL)
26522 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
26523 }
26524
26525 int
26526 x86_field_alignment (tree field, int computed)
26527 {
26528 enum machine_mode mode;
26529 tree type = TREE_TYPE (field);
26530
26531 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
26532 return computed;
26533 mode = TYPE_MODE (strip_array_types (type));
26534 if (mode == DFmode || mode == DCmode
26535 || GET_MODE_CLASS (mode) == MODE_INT
26536 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
26537 return MIN (32, computed);
26538 return computed;
26539 }
26540
26541 /* Output assembler code to FILE to increment profiler label # LABELNO
26542 for profiling a function entry. */
26543 void
26544 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
26545 {
26546 if (TARGET_64BIT)
26547 {
26548 #ifndef NO_PROFILE_COUNTERS
26549 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
26550 #endif
26551
26552 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
26553 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
26554 else
26555 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26556 }
26557 else if (flag_pic)
26558 {
26559 #ifndef NO_PROFILE_COUNTERS
26560 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
26561 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
26562 #endif
26563 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
26564 }
26565 else
26566 {
26567 #ifndef NO_PROFILE_COUNTERS
26568 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
26569 PROFILE_COUNT_REGISTER);
26570 #endif
26571 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26572 }
26573 }
26574
26575 /* We don't have exact information about the insn sizes, but we may assume
26576 quite safely that we are informed about all 1 byte insns and memory
26577 address sizes. This is enough to eliminate unnecessary padding in
26578 99% of cases. */
26579
26580 static int
26581 min_insn_size (rtx insn)
26582 {
26583 int l = 0;
26584
26585 if (!INSN_P (insn) || !active_insn_p (insn))
26586 return 0;
26587
26588 /* Discard alignments we've emit and jump instructions. */
26589 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26590 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26591 return 0;
26592 if (JUMP_P (insn)
26593 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
26594 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
26595 return 0;
26596
26597 /* Important case - calls are always 5 bytes.
26598 It is common to have many calls in the row. */
26599 if (CALL_P (insn)
26600 && symbolic_reference_mentioned_p (PATTERN (insn))
26601 && !SIBLING_CALL_P (insn))
26602 return 5;
26603 if (get_attr_length (insn) <= 1)
26604 return 1;
26605
26606 /* For normal instructions we may rely on the sizes of addresses
26607 and the presence of symbol to require 4 bytes of encoding.
26608 This is not the case for jumps where references are PC relative. */
26609 if (!JUMP_P (insn))
26610 {
26611 l = get_attr_length_address (insn);
26612 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
26613 l = 4;
26614 }
26615 if (l)
26616 return 1+l;
26617 else
26618 return 2;
26619 }
26620
26621 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
26622 window. */
26623
26624 static void
26625 ix86_avoid_jump_misspredicts (void)
26626 {
26627 rtx insn, start = get_insns ();
26628 int nbytes = 0, njumps = 0;
26629 int isjump = 0;
26630
26631 /* Look for all minimal intervals of instructions containing 4 jumps.
26632 The intervals are bounded by START and INSN. NBYTES is the total
26633 size of instructions in the interval including INSN and not including
26634 START. When the NBYTES is smaller than 16 bytes, it is possible
26635 that the end of START and INSN ends up in the same 16byte page.
26636
26637 The smallest offset in the page INSN can start is the case where START
26638 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
26639 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
26640 */
26641 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26642 {
26643
26644 nbytes += min_insn_size (insn);
26645 if (dump_file)
26646 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
26647 INSN_UID (insn), min_insn_size (insn));
26648 if ((JUMP_P (insn)
26649 && GET_CODE (PATTERN (insn)) != ADDR_VEC
26650 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
26651 || CALL_P (insn))
26652 njumps++;
26653 else
26654 continue;
26655
26656 while (njumps > 3)
26657 {
26658 start = NEXT_INSN (start);
26659 if ((JUMP_P (start)
26660 && GET_CODE (PATTERN (start)) != ADDR_VEC
26661 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
26662 || CALL_P (start))
26663 njumps--, isjump = 1;
26664 else
26665 isjump = 0;
26666 nbytes -= min_insn_size (start);
26667 }
26668 gcc_assert (njumps >= 0);
26669 if (dump_file)
26670 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
26671 INSN_UID (start), INSN_UID (insn), nbytes);
26672
26673 if (njumps == 3 && isjump && nbytes < 16)
26674 {
26675 int padsize = 15 - nbytes + min_insn_size (insn);
26676
26677 if (dump_file)
26678 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
26679 INSN_UID (insn), padsize);
26680 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
26681 }
26682 }
26683 }
26684
26685 /* AMD Athlon works faster
26686 when RET is not destination of conditional jump or directly preceded
26687 by other jump instruction. We avoid the penalty by inserting NOP just
26688 before the RET instructions in such cases. */
26689 static void
26690 ix86_pad_returns (void)
26691 {
26692 edge e;
26693 edge_iterator ei;
26694
26695 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
26696 {
26697 basic_block bb = e->src;
26698 rtx ret = BB_END (bb);
26699 rtx prev;
26700 bool replace = false;
26701
26702 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
26703 || optimize_bb_for_size_p (bb))
26704 continue;
26705 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
26706 if (active_insn_p (prev) || LABEL_P (prev))
26707 break;
26708 if (prev && LABEL_P (prev))
26709 {
26710 edge e;
26711 edge_iterator ei;
26712
26713 FOR_EACH_EDGE (e, ei, bb->preds)
26714 if (EDGE_FREQUENCY (e) && e->src->index >= 0
26715 && !(e->flags & EDGE_FALLTHRU))
26716 replace = true;
26717 }
26718 if (!replace)
26719 {
26720 prev = prev_active_insn (ret);
26721 if (prev
26722 && ((JUMP_P (prev) && any_condjump_p (prev))
26723 || CALL_P (prev)))
26724 replace = true;
26725 /* Empty functions get branch mispredict even when the jump destination
26726 is not visible to us. */
26727 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
26728 replace = true;
26729 }
26730 if (replace)
26731 {
26732 emit_insn_before (gen_return_internal_long (), ret);
26733 delete_insn (ret);
26734 }
26735 }
26736 }
26737
26738 /* Implement machine specific optimizations. We implement padding of returns
26739 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
26740 static void
26741 ix86_reorg (void)
26742 {
26743 if (TARGET_PAD_RETURNS && optimize
26744 && optimize_function_for_speed_p (cfun))
26745 ix86_pad_returns ();
26746 if (TARGET_FOUR_JUMP_LIMIT && optimize
26747 && optimize_function_for_speed_p (cfun))
26748 ix86_avoid_jump_misspredicts ();
26749 }
26750
26751 /* Return nonzero when QImode register that must be represented via REX prefix
26752 is used. */
26753 bool
26754 x86_extended_QIreg_mentioned_p (rtx insn)
26755 {
26756 int i;
26757 extract_insn_cached (insn);
26758 for (i = 0; i < recog_data.n_operands; i++)
26759 if (REG_P (recog_data.operand[i])
26760 && REGNO (recog_data.operand[i]) >= 4)
26761 return true;
26762 return false;
26763 }
26764
26765 /* Return nonzero when P points to register encoded via REX prefix.
26766 Called via for_each_rtx. */
26767 static int
26768 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
26769 {
26770 unsigned int regno;
26771 if (!REG_P (*p))
26772 return 0;
26773 regno = REGNO (*p);
26774 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
26775 }
26776
26777 /* Return true when INSN mentions register that must be encoded using REX
26778 prefix. */
26779 bool
26780 x86_extended_reg_mentioned_p (rtx insn)
26781 {
26782 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
26783 extended_reg_mentioned_1, NULL);
26784 }
26785
26786 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
26787 optabs would emit if we didn't have TFmode patterns. */
26788
26789 void
26790 x86_emit_floatuns (rtx operands[2])
26791 {
26792 rtx neglab, donelab, i0, i1, f0, in, out;
26793 enum machine_mode mode, inmode;
26794
26795 inmode = GET_MODE (operands[1]);
26796 gcc_assert (inmode == SImode || inmode == DImode);
26797
26798 out = operands[0];
26799 in = force_reg (inmode, operands[1]);
26800 mode = GET_MODE (out);
26801 neglab = gen_label_rtx ();
26802 donelab = gen_label_rtx ();
26803 f0 = gen_reg_rtx (mode);
26804
26805 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
26806
26807 expand_float (out, in, 0);
26808
26809 emit_jump_insn (gen_jump (donelab));
26810 emit_barrier ();
26811
26812 emit_label (neglab);
26813
26814 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
26815 1, OPTAB_DIRECT);
26816 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
26817 1, OPTAB_DIRECT);
26818 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
26819
26820 expand_float (f0, i0, 0);
26821
26822 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
26823
26824 emit_label (donelab);
26825 }
26826 \f
26827 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26828 with all elements equal to VAR. Return true if successful. */
26829
26830 static bool
26831 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
26832 rtx target, rtx val)
26833 {
26834 enum machine_mode hmode, smode, wsmode, wvmode;
26835 rtx x;
26836
26837 switch (mode)
26838 {
26839 case V2SImode:
26840 case V2SFmode:
26841 if (!mmx_ok)
26842 return false;
26843 /* FALLTHRU */
26844
26845 case V2DFmode:
26846 case V2DImode:
26847 case V4SFmode:
26848 case V4SImode:
26849 val = force_reg (GET_MODE_INNER (mode), val);
26850 x = gen_rtx_VEC_DUPLICATE (mode, val);
26851 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26852 return true;
26853
26854 case V4HImode:
26855 if (!mmx_ok)
26856 return false;
26857 if (TARGET_SSE || TARGET_3DNOW_A)
26858 {
26859 val = gen_lowpart (SImode, val);
26860 x = gen_rtx_TRUNCATE (HImode, val);
26861 x = gen_rtx_VEC_DUPLICATE (mode, x);
26862 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26863 return true;
26864 }
26865 else
26866 {
26867 smode = HImode;
26868 wsmode = SImode;
26869 wvmode = V2SImode;
26870 goto widen;
26871 }
26872
26873 case V8QImode:
26874 if (!mmx_ok)
26875 return false;
26876 smode = QImode;
26877 wsmode = HImode;
26878 wvmode = V4HImode;
26879 goto widen;
26880 case V8HImode:
26881 if (TARGET_SSE2)
26882 {
26883 rtx tmp1, tmp2;
26884 /* Extend HImode to SImode using a paradoxical SUBREG. */
26885 tmp1 = gen_reg_rtx (SImode);
26886 emit_move_insn (tmp1, gen_lowpart (SImode, val));
26887 /* Insert the SImode value as low element of V4SImode vector. */
26888 tmp2 = gen_reg_rtx (V4SImode);
26889 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
26890 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
26891 CONST0_RTX (V4SImode),
26892 const1_rtx);
26893 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
26894 /* Cast the V4SImode vector back to a V8HImode vector. */
26895 tmp1 = gen_reg_rtx (V8HImode);
26896 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
26897 /* Duplicate the low short through the whole low SImode word. */
26898 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
26899 /* Cast the V8HImode vector back to a V4SImode vector. */
26900 tmp2 = gen_reg_rtx (V4SImode);
26901 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
26902 /* Replicate the low element of the V4SImode vector. */
26903 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
26904 /* Cast the V2SImode back to V8HImode, and store in target. */
26905 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
26906 return true;
26907 }
26908 smode = HImode;
26909 wsmode = SImode;
26910 wvmode = V4SImode;
26911 goto widen;
26912 case V16QImode:
26913 if (TARGET_SSE2)
26914 {
26915 rtx tmp1, tmp2;
26916 /* Extend QImode to SImode using a paradoxical SUBREG. */
26917 tmp1 = gen_reg_rtx (SImode);
26918 emit_move_insn (tmp1, gen_lowpart (SImode, val));
26919 /* Insert the SImode value as low element of V4SImode vector. */
26920 tmp2 = gen_reg_rtx (V4SImode);
26921 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
26922 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
26923 CONST0_RTX (V4SImode),
26924 const1_rtx);
26925 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
26926 /* Cast the V4SImode vector back to a V16QImode vector. */
26927 tmp1 = gen_reg_rtx (V16QImode);
26928 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
26929 /* Duplicate the low byte through the whole low SImode word. */
26930 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
26931 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
26932 /* Cast the V16QImode vector back to a V4SImode vector. */
26933 tmp2 = gen_reg_rtx (V4SImode);
26934 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
26935 /* Replicate the low element of the V4SImode vector. */
26936 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
26937 /* Cast the V2SImode back to V16QImode, and store in target. */
26938 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
26939 return true;
26940 }
26941 smode = QImode;
26942 wsmode = HImode;
26943 wvmode = V8HImode;
26944 goto widen;
26945 widen:
26946 /* Replicate the value once into the next wider mode and recurse. */
26947 val = convert_modes (wsmode, smode, val, true);
26948 x = expand_simple_binop (wsmode, ASHIFT, val,
26949 GEN_INT (GET_MODE_BITSIZE (smode)),
26950 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26951 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
26952
26953 x = gen_reg_rtx (wvmode);
26954 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
26955 gcc_unreachable ();
26956 emit_move_insn (target, gen_lowpart (mode, x));
26957 return true;
26958
26959 case V4DFmode:
26960 hmode = V2DFmode;
26961 goto half;
26962 case V4DImode:
26963 hmode = V2DImode;
26964 goto half;
26965 case V8SFmode:
26966 hmode = V4SFmode;
26967 goto half;
26968 case V8SImode:
26969 hmode = V4SImode;
26970 goto half;
26971 case V16HImode:
26972 hmode = V8HImode;
26973 goto half;
26974 case V32QImode:
26975 hmode = V16QImode;
26976 goto half;
26977 half:
26978 {
26979 rtx tmp = gen_reg_rtx (hmode);
26980 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
26981 emit_insn (gen_rtx_SET (VOIDmode, target,
26982 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
26983 }
26984 return true;
26985
26986 default:
26987 return false;
26988 }
26989 }
26990
26991 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26992 whose ONE_VAR element is VAR, and other elements are zero. Return true
26993 if successful. */
26994
26995 static bool
26996 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
26997 rtx target, rtx var, int one_var)
26998 {
26999 enum machine_mode vsimode;
27000 rtx new_target;
27001 rtx x, tmp;
27002 bool use_vector_set = false;
27003
27004 switch (mode)
27005 {
27006 case V2DImode:
27007 /* For SSE4.1, we normally use vector set. But if the second
27008 element is zero and inter-unit moves are OK, we use movq
27009 instead. */
27010 use_vector_set = (TARGET_64BIT
27011 && TARGET_SSE4_1
27012 && !(TARGET_INTER_UNIT_MOVES
27013 && one_var == 0));
27014 break;
27015 case V16QImode:
27016 case V4SImode:
27017 case V4SFmode:
27018 use_vector_set = TARGET_SSE4_1;
27019 break;
27020 case V8HImode:
27021 use_vector_set = TARGET_SSE2;
27022 break;
27023 case V4HImode:
27024 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27025 break;
27026 case V32QImode:
27027 case V16HImode:
27028 case V8SImode:
27029 case V8SFmode:
27030 case V4DImode:
27031 case V4DFmode:
27032 use_vector_set = TARGET_AVX;
27033 break;
27034 default:
27035 break;
27036 }
27037
27038 if (use_vector_set)
27039 {
27040 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27041 var = force_reg (GET_MODE_INNER (mode), var);
27042 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27043 return true;
27044 }
27045
27046 switch (mode)
27047 {
27048 case V2SFmode:
27049 case V2SImode:
27050 if (!mmx_ok)
27051 return false;
27052 /* FALLTHRU */
27053
27054 case V2DFmode:
27055 case V2DImode:
27056 if (one_var != 0)
27057 return false;
27058 var = force_reg (GET_MODE_INNER (mode), var);
27059 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27060 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27061 return true;
27062
27063 case V4SFmode:
27064 case V4SImode:
27065 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27066 new_target = gen_reg_rtx (mode);
27067 else
27068 new_target = target;
27069 var = force_reg (GET_MODE_INNER (mode), var);
27070 x = gen_rtx_VEC_DUPLICATE (mode, var);
27071 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27072 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27073 if (one_var != 0)
27074 {
27075 /* We need to shuffle the value to the correct position, so
27076 create a new pseudo to store the intermediate result. */
27077
27078 /* With SSE2, we can use the integer shuffle insns. */
27079 if (mode != V4SFmode && TARGET_SSE2)
27080 {
27081 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27082 GEN_INT (1),
27083 GEN_INT (one_var == 1 ? 0 : 1),
27084 GEN_INT (one_var == 2 ? 0 : 1),
27085 GEN_INT (one_var == 3 ? 0 : 1)));
27086 if (target != new_target)
27087 emit_move_insn (target, new_target);
27088 return true;
27089 }
27090
27091 /* Otherwise convert the intermediate result to V4SFmode and
27092 use the SSE1 shuffle instructions. */
27093 if (mode != V4SFmode)
27094 {
27095 tmp = gen_reg_rtx (V4SFmode);
27096 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27097 }
27098 else
27099 tmp = new_target;
27100
27101 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27102 GEN_INT (1),
27103 GEN_INT (one_var == 1 ? 0 : 1),
27104 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27105 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27106
27107 if (mode != V4SFmode)
27108 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27109 else if (tmp != target)
27110 emit_move_insn (target, tmp);
27111 }
27112 else if (target != new_target)
27113 emit_move_insn (target, new_target);
27114 return true;
27115
27116 case V8HImode:
27117 case V16QImode:
27118 vsimode = V4SImode;
27119 goto widen;
27120 case V4HImode:
27121 case V8QImode:
27122 if (!mmx_ok)
27123 return false;
27124 vsimode = V2SImode;
27125 goto widen;
27126 widen:
27127 if (one_var != 0)
27128 return false;
27129
27130 /* Zero extend the variable element to SImode and recurse. */
27131 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27132
27133 x = gen_reg_rtx (vsimode);
27134 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27135 var, one_var))
27136 gcc_unreachable ();
27137
27138 emit_move_insn (target, gen_lowpart (mode, x));
27139 return true;
27140
27141 default:
27142 return false;
27143 }
27144 }
27145
27146 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27147 consisting of the values in VALS. It is known that all elements
27148 except ONE_VAR are constants. Return true if successful. */
27149
27150 static bool
27151 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27152 rtx target, rtx vals, int one_var)
27153 {
27154 rtx var = XVECEXP (vals, 0, one_var);
27155 enum machine_mode wmode;
27156 rtx const_vec, x;
27157
27158 const_vec = copy_rtx (vals);
27159 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27160 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27161
27162 switch (mode)
27163 {
27164 case V2DFmode:
27165 case V2DImode:
27166 case V2SFmode:
27167 case V2SImode:
27168 /* For the two element vectors, it's just as easy to use
27169 the general case. */
27170 return false;
27171
27172 case V4DFmode:
27173 case V4DImode:
27174 case V8SFmode:
27175 case V8SImode:
27176 case V16HImode:
27177 case V32QImode:
27178 case V4SFmode:
27179 case V4SImode:
27180 case V8HImode:
27181 case V4HImode:
27182 break;
27183
27184 case V16QImode:
27185 if (TARGET_SSE4_1)
27186 break;
27187 wmode = V8HImode;
27188 goto widen;
27189 case V8QImode:
27190 wmode = V4HImode;
27191 goto widen;
27192 widen:
27193 /* There's no way to set one QImode entry easily. Combine
27194 the variable value with its adjacent constant value, and
27195 promote to an HImode set. */
27196 x = XVECEXP (vals, 0, one_var ^ 1);
27197 if (one_var & 1)
27198 {
27199 var = convert_modes (HImode, QImode, var, true);
27200 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27201 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27202 x = GEN_INT (INTVAL (x) & 0xff);
27203 }
27204 else
27205 {
27206 var = convert_modes (HImode, QImode, var, true);
27207 x = gen_int_mode (INTVAL (x) << 8, HImode);
27208 }
27209 if (x != const0_rtx)
27210 var = expand_simple_binop (HImode, IOR, var, x, var,
27211 1, OPTAB_LIB_WIDEN);
27212
27213 x = gen_reg_rtx (wmode);
27214 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27215 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27216
27217 emit_move_insn (target, gen_lowpart (mode, x));
27218 return true;
27219
27220 default:
27221 return false;
27222 }
27223
27224 emit_move_insn (target, const_vec);
27225 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27226 return true;
27227 }
27228
27229 /* A subroutine of ix86_expand_vector_init_general. Use vector
27230 concatenate to handle the most general case: all values variable,
27231 and none identical. */
27232
27233 static void
27234 ix86_expand_vector_init_concat (enum machine_mode mode,
27235 rtx target, rtx *ops, int n)
27236 {
27237 enum machine_mode cmode, hmode = VOIDmode;
27238 rtx first[8], second[4];
27239 rtvec v;
27240 int i, j;
27241
27242 switch (n)
27243 {
27244 case 2:
27245 switch (mode)
27246 {
27247 case V8SImode:
27248 cmode = V4SImode;
27249 break;
27250 case V8SFmode:
27251 cmode = V4SFmode;
27252 break;
27253 case V4DImode:
27254 cmode = V2DImode;
27255 break;
27256 case V4DFmode:
27257 cmode = V2DFmode;
27258 break;
27259 case V4SImode:
27260 cmode = V2SImode;
27261 break;
27262 case V4SFmode:
27263 cmode = V2SFmode;
27264 break;
27265 case V2DImode:
27266 cmode = DImode;
27267 break;
27268 case V2SImode:
27269 cmode = SImode;
27270 break;
27271 case V2DFmode:
27272 cmode = DFmode;
27273 break;
27274 case V2SFmode:
27275 cmode = SFmode;
27276 break;
27277 default:
27278 gcc_unreachable ();
27279 }
27280
27281 if (!register_operand (ops[1], cmode))
27282 ops[1] = force_reg (cmode, ops[1]);
27283 if (!register_operand (ops[0], cmode))
27284 ops[0] = force_reg (cmode, ops[0]);
27285 emit_insn (gen_rtx_SET (VOIDmode, target,
27286 gen_rtx_VEC_CONCAT (mode, ops[0],
27287 ops[1])));
27288 break;
27289
27290 case 4:
27291 switch (mode)
27292 {
27293 case V4DImode:
27294 cmode = V2DImode;
27295 break;
27296 case V4DFmode:
27297 cmode = V2DFmode;
27298 break;
27299 case V4SImode:
27300 cmode = V2SImode;
27301 break;
27302 case V4SFmode:
27303 cmode = V2SFmode;
27304 break;
27305 default:
27306 gcc_unreachable ();
27307 }
27308 goto half;
27309
27310 case 8:
27311 switch (mode)
27312 {
27313 case V8SImode:
27314 cmode = V2SImode;
27315 hmode = V4SImode;
27316 break;
27317 case V8SFmode:
27318 cmode = V2SFmode;
27319 hmode = V4SFmode;
27320 break;
27321 default:
27322 gcc_unreachable ();
27323 }
27324 goto half;
27325
27326 half:
27327 /* FIXME: We process inputs backward to help RA. PR 36222. */
27328 i = n - 1;
27329 j = (n >> 1) - 1;
27330 for (; i > 0; i -= 2, j--)
27331 {
27332 first[j] = gen_reg_rtx (cmode);
27333 v = gen_rtvec (2, ops[i - 1], ops[i]);
27334 ix86_expand_vector_init (false, first[j],
27335 gen_rtx_PARALLEL (cmode, v));
27336 }
27337
27338 n >>= 1;
27339 if (n > 2)
27340 {
27341 gcc_assert (hmode != VOIDmode);
27342 for (i = j = 0; i < n; i += 2, j++)
27343 {
27344 second[j] = gen_reg_rtx (hmode);
27345 ix86_expand_vector_init_concat (hmode, second [j],
27346 &first [i], 2);
27347 }
27348 n >>= 1;
27349 ix86_expand_vector_init_concat (mode, target, second, n);
27350 }
27351 else
27352 ix86_expand_vector_init_concat (mode, target, first, n);
27353 break;
27354
27355 default:
27356 gcc_unreachable ();
27357 }
27358 }
27359
27360 /* A subroutine of ix86_expand_vector_init_general. Use vector
27361 interleave to handle the most general case: all values variable,
27362 and none identical. */
27363
27364 static void
27365 ix86_expand_vector_init_interleave (enum machine_mode mode,
27366 rtx target, rtx *ops, int n)
27367 {
27368 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27369 int i, j;
27370 rtx op0, op1;
27371 rtx (*gen_load_even) (rtx, rtx, rtx);
27372 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27373 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
27374
27375 switch (mode)
27376 {
27377 case V8HImode:
27378 gen_load_even = gen_vec_setv8hi;
27379 gen_interleave_first_low = gen_vec_interleave_lowv4si;
27380 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27381 inner_mode = HImode;
27382 first_imode = V4SImode;
27383 second_imode = V2DImode;
27384 third_imode = VOIDmode;
27385 break;
27386 case V16QImode:
27387 gen_load_even = gen_vec_setv16qi;
27388 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
27389 gen_interleave_second_low = gen_vec_interleave_lowv4si;
27390 inner_mode = QImode;
27391 first_imode = V8HImode;
27392 second_imode = V4SImode;
27393 third_imode = V2DImode;
27394 break;
27395 default:
27396 gcc_unreachable ();
27397 }
27398
27399 for (i = 0; i < n; i++)
27400 {
27401 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
27402 op0 = gen_reg_rtx (SImode);
27403 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
27404
27405 /* Insert the SImode value as low element of V4SImode vector. */
27406 op1 = gen_reg_rtx (V4SImode);
27407 op0 = gen_rtx_VEC_MERGE (V4SImode,
27408 gen_rtx_VEC_DUPLICATE (V4SImode,
27409 op0),
27410 CONST0_RTX (V4SImode),
27411 const1_rtx);
27412 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
27413
27414 /* Cast the V4SImode vector back to a vector in orignal mode. */
27415 op0 = gen_reg_rtx (mode);
27416 emit_move_insn (op0, gen_lowpart (mode, op1));
27417
27418 /* Load even elements into the second positon. */
27419 emit_insn ((*gen_load_even) (op0,
27420 force_reg (inner_mode,
27421 ops [i + i + 1]),
27422 const1_rtx));
27423
27424 /* Cast vector to FIRST_IMODE vector. */
27425 ops[i] = gen_reg_rtx (first_imode);
27426 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
27427 }
27428
27429 /* Interleave low FIRST_IMODE vectors. */
27430 for (i = j = 0; i < n; i += 2, j++)
27431 {
27432 op0 = gen_reg_rtx (first_imode);
27433 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
27434
27435 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
27436 ops[j] = gen_reg_rtx (second_imode);
27437 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
27438 }
27439
27440 /* Interleave low SECOND_IMODE vectors. */
27441 switch (second_imode)
27442 {
27443 case V4SImode:
27444 for (i = j = 0; i < n / 2; i += 2, j++)
27445 {
27446 op0 = gen_reg_rtx (second_imode);
27447 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
27448 ops[i + 1]));
27449
27450 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
27451 vector. */
27452 ops[j] = gen_reg_rtx (third_imode);
27453 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
27454 }
27455 second_imode = V2DImode;
27456 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27457 /* FALLTHRU */
27458
27459 case V2DImode:
27460 op0 = gen_reg_rtx (second_imode);
27461 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
27462 ops[1]));
27463
27464 /* Cast the SECOND_IMODE vector back to a vector on original
27465 mode. */
27466 emit_insn (gen_rtx_SET (VOIDmode, target,
27467 gen_lowpart (mode, op0)));
27468 break;
27469
27470 default:
27471 gcc_unreachable ();
27472 }
27473 }
27474
27475 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
27476 all values variable, and none identical. */
27477
27478 static void
27479 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
27480 rtx target, rtx vals)
27481 {
27482 rtx ops[32], op0, op1;
27483 enum machine_mode half_mode = VOIDmode;
27484 int n, i;
27485
27486 switch (mode)
27487 {
27488 case V2SFmode:
27489 case V2SImode:
27490 if (!mmx_ok && !TARGET_SSE)
27491 break;
27492 /* FALLTHRU */
27493
27494 case V8SFmode:
27495 case V8SImode:
27496 case V4DFmode:
27497 case V4DImode:
27498 case V4SFmode:
27499 case V4SImode:
27500 case V2DFmode:
27501 case V2DImode:
27502 n = GET_MODE_NUNITS (mode);
27503 for (i = 0; i < n; i++)
27504 ops[i] = XVECEXP (vals, 0, i);
27505 ix86_expand_vector_init_concat (mode, target, ops, n);
27506 return;
27507
27508 case V32QImode:
27509 half_mode = V16QImode;
27510 goto half;
27511
27512 case V16HImode:
27513 half_mode = V8HImode;
27514 goto half;
27515
27516 half:
27517 n = GET_MODE_NUNITS (mode);
27518 for (i = 0; i < n; i++)
27519 ops[i] = XVECEXP (vals, 0, i);
27520 op0 = gen_reg_rtx (half_mode);
27521 op1 = gen_reg_rtx (half_mode);
27522 ix86_expand_vector_init_interleave (half_mode, op0, ops,
27523 n >> 2);
27524 ix86_expand_vector_init_interleave (half_mode, op1,
27525 &ops [n >> 1], n >> 2);
27526 emit_insn (gen_rtx_SET (VOIDmode, target,
27527 gen_rtx_VEC_CONCAT (mode, op0, op1)));
27528 return;
27529
27530 case V16QImode:
27531 if (!TARGET_SSE4_1)
27532 break;
27533 /* FALLTHRU */
27534
27535 case V8HImode:
27536 if (!TARGET_SSE2)
27537 break;
27538
27539 /* Don't use ix86_expand_vector_init_interleave if we can't
27540 move from GPR to SSE register directly. */
27541 if (!TARGET_INTER_UNIT_MOVES)
27542 break;
27543
27544 n = GET_MODE_NUNITS (mode);
27545 for (i = 0; i < n; i++)
27546 ops[i] = XVECEXP (vals, 0, i);
27547 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
27548 return;
27549
27550 case V4HImode:
27551 case V8QImode:
27552 break;
27553
27554 default:
27555 gcc_unreachable ();
27556 }
27557
27558 {
27559 int i, j, n_elts, n_words, n_elt_per_word;
27560 enum machine_mode inner_mode;
27561 rtx words[4], shift;
27562
27563 inner_mode = GET_MODE_INNER (mode);
27564 n_elts = GET_MODE_NUNITS (mode);
27565 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27566 n_elt_per_word = n_elts / n_words;
27567 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
27568
27569 for (i = 0; i < n_words; ++i)
27570 {
27571 rtx word = NULL_RTX;
27572
27573 for (j = 0; j < n_elt_per_word; ++j)
27574 {
27575 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
27576 elt = convert_modes (word_mode, inner_mode, elt, true);
27577
27578 if (j == 0)
27579 word = elt;
27580 else
27581 {
27582 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
27583 word, 1, OPTAB_LIB_WIDEN);
27584 word = expand_simple_binop (word_mode, IOR, word, elt,
27585 word, 1, OPTAB_LIB_WIDEN);
27586 }
27587 }
27588
27589 words[i] = word;
27590 }
27591
27592 if (n_words == 1)
27593 emit_move_insn (target, gen_lowpart (mode, words[0]));
27594 else if (n_words == 2)
27595 {
27596 rtx tmp = gen_reg_rtx (mode);
27597 emit_clobber (tmp);
27598 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27599 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27600 emit_move_insn (target, tmp);
27601 }
27602 else if (n_words == 4)
27603 {
27604 rtx tmp = gen_reg_rtx (V4SImode);
27605 gcc_assert (word_mode == SImode);
27606 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
27607 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
27608 emit_move_insn (target, gen_lowpart (mode, tmp));
27609 }
27610 else
27611 gcc_unreachable ();
27612 }
27613 }
27614
27615 /* Initialize vector TARGET via VALS. Suppress the use of MMX
27616 instructions unless MMX_OK is true. */
27617
27618 void
27619 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
27620 {
27621 enum machine_mode mode = GET_MODE (target);
27622 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27623 int n_elts = GET_MODE_NUNITS (mode);
27624 int n_var = 0, one_var = -1;
27625 bool all_same = true, all_const_zero = true;
27626 int i;
27627 rtx x;
27628
27629 for (i = 0; i < n_elts; ++i)
27630 {
27631 x = XVECEXP (vals, 0, i);
27632 if (!(CONST_INT_P (x)
27633 || GET_CODE (x) == CONST_DOUBLE
27634 || GET_CODE (x) == CONST_FIXED))
27635 n_var++, one_var = i;
27636 else if (x != CONST0_RTX (inner_mode))
27637 all_const_zero = false;
27638 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
27639 all_same = false;
27640 }
27641
27642 /* Constants are best loaded from the constant pool. */
27643 if (n_var == 0)
27644 {
27645 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
27646 return;
27647 }
27648
27649 /* If all values are identical, broadcast the value. */
27650 if (all_same
27651 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
27652 XVECEXP (vals, 0, 0)))
27653 return;
27654
27655 /* Values where only one field is non-constant are best loaded from
27656 the pool and overwritten via move later. */
27657 if (n_var == 1)
27658 {
27659 if (all_const_zero
27660 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
27661 XVECEXP (vals, 0, one_var),
27662 one_var))
27663 return;
27664
27665 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
27666 return;
27667 }
27668
27669 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
27670 }
27671
27672 void
27673 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
27674 {
27675 enum machine_mode mode = GET_MODE (target);
27676 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27677 enum machine_mode half_mode;
27678 bool use_vec_merge = false;
27679 rtx tmp;
27680 static rtx (*gen_extract[6][2]) (rtx, rtx)
27681 = {
27682 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
27683 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
27684 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
27685 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
27686 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
27687 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
27688 };
27689 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
27690 = {
27691 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
27692 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
27693 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
27694 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
27695 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
27696 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
27697 };
27698 int i, j, n;
27699
27700 switch (mode)
27701 {
27702 case V2SFmode:
27703 case V2SImode:
27704 if (mmx_ok)
27705 {
27706 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
27707 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
27708 if (elt == 0)
27709 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
27710 else
27711 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
27712 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27713 return;
27714 }
27715 break;
27716
27717 case V2DImode:
27718 use_vec_merge = TARGET_SSE4_1;
27719 if (use_vec_merge)
27720 break;
27721
27722 case V2DFmode:
27723 {
27724 rtx op0, op1;
27725
27726 /* For the two element vectors, we implement a VEC_CONCAT with
27727 the extraction of the other element. */
27728
27729 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
27730 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
27731
27732 if (elt == 0)
27733 op0 = val, op1 = tmp;
27734 else
27735 op0 = tmp, op1 = val;
27736
27737 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
27738 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27739 }
27740 return;
27741
27742 case V4SFmode:
27743 use_vec_merge = TARGET_SSE4_1;
27744 if (use_vec_merge)
27745 break;
27746
27747 switch (elt)
27748 {
27749 case 0:
27750 use_vec_merge = true;
27751 break;
27752
27753 case 1:
27754 /* tmp = target = A B C D */
27755 tmp = copy_to_reg (target);
27756 /* target = A A B B */
27757 emit_insn (gen_sse_unpcklps (target, target, target));
27758 /* target = X A B B */
27759 ix86_expand_vector_set (false, target, val, 0);
27760 /* target = A X C D */
27761 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27762 GEN_INT (1), GEN_INT (0),
27763 GEN_INT (2+4), GEN_INT (3+4)));
27764 return;
27765
27766 case 2:
27767 /* tmp = target = A B C D */
27768 tmp = copy_to_reg (target);
27769 /* tmp = X B C D */
27770 ix86_expand_vector_set (false, tmp, val, 0);
27771 /* target = A B X D */
27772 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27773 GEN_INT (0), GEN_INT (1),
27774 GEN_INT (0+4), GEN_INT (3+4)));
27775 return;
27776
27777 case 3:
27778 /* tmp = target = A B C D */
27779 tmp = copy_to_reg (target);
27780 /* tmp = X B C D */
27781 ix86_expand_vector_set (false, tmp, val, 0);
27782 /* target = A B X D */
27783 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27784 GEN_INT (0), GEN_INT (1),
27785 GEN_INT (2+4), GEN_INT (0+4)));
27786 return;
27787
27788 default:
27789 gcc_unreachable ();
27790 }
27791 break;
27792
27793 case V4SImode:
27794 use_vec_merge = TARGET_SSE4_1;
27795 if (use_vec_merge)
27796 break;
27797
27798 /* Element 0 handled by vec_merge below. */
27799 if (elt == 0)
27800 {
27801 use_vec_merge = true;
27802 break;
27803 }
27804
27805 if (TARGET_SSE2)
27806 {
27807 /* With SSE2, use integer shuffles to swap element 0 and ELT,
27808 store into element 0, then shuffle them back. */
27809
27810 rtx order[4];
27811
27812 order[0] = GEN_INT (elt);
27813 order[1] = const1_rtx;
27814 order[2] = const2_rtx;
27815 order[3] = GEN_INT (3);
27816 order[elt] = const0_rtx;
27817
27818 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27819 order[1], order[2], order[3]));
27820
27821 ix86_expand_vector_set (false, target, val, 0);
27822
27823 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27824 order[1], order[2], order[3]));
27825 }
27826 else
27827 {
27828 /* For SSE1, we have to reuse the V4SF code. */
27829 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
27830 gen_lowpart (SFmode, val), elt);
27831 }
27832 return;
27833
27834 case V8HImode:
27835 use_vec_merge = TARGET_SSE2;
27836 break;
27837 case V4HImode:
27838 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27839 break;
27840
27841 case V16QImode:
27842 use_vec_merge = TARGET_SSE4_1;
27843 break;
27844
27845 case V8QImode:
27846 break;
27847
27848 case V32QImode:
27849 half_mode = V16QImode;
27850 j = 0;
27851 n = 16;
27852 goto half;
27853
27854 case V16HImode:
27855 half_mode = V8HImode;
27856 j = 1;
27857 n = 8;
27858 goto half;
27859
27860 case V8SImode:
27861 half_mode = V4SImode;
27862 j = 2;
27863 n = 4;
27864 goto half;
27865
27866 case V4DImode:
27867 half_mode = V2DImode;
27868 j = 3;
27869 n = 2;
27870 goto half;
27871
27872 case V8SFmode:
27873 half_mode = V4SFmode;
27874 j = 4;
27875 n = 4;
27876 goto half;
27877
27878 case V4DFmode:
27879 half_mode = V2DFmode;
27880 j = 5;
27881 n = 2;
27882 goto half;
27883
27884 half:
27885 /* Compute offset. */
27886 i = elt / n;
27887 elt %= n;
27888
27889 gcc_assert (i <= 1);
27890
27891 /* Extract the half. */
27892 tmp = gen_reg_rtx (half_mode);
27893 emit_insn ((*gen_extract[j][i]) (tmp, target));
27894
27895 /* Put val in tmp at elt. */
27896 ix86_expand_vector_set (false, tmp, val, elt);
27897
27898 /* Put it back. */
27899 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
27900 return;
27901
27902 default:
27903 break;
27904 }
27905
27906 if (use_vec_merge)
27907 {
27908 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
27909 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
27910 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27911 }
27912 else
27913 {
27914 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27915
27916 emit_move_insn (mem, target);
27917
27918 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27919 emit_move_insn (tmp, val);
27920
27921 emit_move_insn (target, mem);
27922 }
27923 }
27924
27925 void
27926 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
27927 {
27928 enum machine_mode mode = GET_MODE (vec);
27929 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27930 bool use_vec_extr = false;
27931 rtx tmp;
27932
27933 switch (mode)
27934 {
27935 case V2SImode:
27936 case V2SFmode:
27937 if (!mmx_ok)
27938 break;
27939 /* FALLTHRU */
27940
27941 case V2DFmode:
27942 case V2DImode:
27943 use_vec_extr = true;
27944 break;
27945
27946 case V4SFmode:
27947 use_vec_extr = TARGET_SSE4_1;
27948 if (use_vec_extr)
27949 break;
27950
27951 switch (elt)
27952 {
27953 case 0:
27954 tmp = vec;
27955 break;
27956
27957 case 1:
27958 case 3:
27959 tmp = gen_reg_rtx (mode);
27960 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
27961 GEN_INT (elt), GEN_INT (elt),
27962 GEN_INT (elt+4), GEN_INT (elt+4)));
27963 break;
27964
27965 case 2:
27966 tmp = gen_reg_rtx (mode);
27967 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
27968 break;
27969
27970 default:
27971 gcc_unreachable ();
27972 }
27973 vec = tmp;
27974 use_vec_extr = true;
27975 elt = 0;
27976 break;
27977
27978 case V4SImode:
27979 use_vec_extr = TARGET_SSE4_1;
27980 if (use_vec_extr)
27981 break;
27982
27983 if (TARGET_SSE2)
27984 {
27985 switch (elt)
27986 {
27987 case 0:
27988 tmp = vec;
27989 break;
27990
27991 case 1:
27992 case 3:
27993 tmp = gen_reg_rtx (mode);
27994 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
27995 GEN_INT (elt), GEN_INT (elt),
27996 GEN_INT (elt), GEN_INT (elt)));
27997 break;
27998
27999 case 2:
28000 tmp = gen_reg_rtx (mode);
28001 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28002 break;
28003
28004 default:
28005 gcc_unreachable ();
28006 }
28007 vec = tmp;
28008 use_vec_extr = true;
28009 elt = 0;
28010 }
28011 else
28012 {
28013 /* For SSE1, we have to reuse the V4SF code. */
28014 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28015 gen_lowpart (V4SFmode, vec), elt);
28016 return;
28017 }
28018 break;
28019
28020 case V8HImode:
28021 use_vec_extr = TARGET_SSE2;
28022 break;
28023 case V4HImode:
28024 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28025 break;
28026
28027 case V16QImode:
28028 use_vec_extr = TARGET_SSE4_1;
28029 break;
28030
28031 case V8QImode:
28032 /* ??? Could extract the appropriate HImode element and shift. */
28033 default:
28034 break;
28035 }
28036
28037 if (use_vec_extr)
28038 {
28039 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28040 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28041
28042 /* Let the rtl optimizers know about the zero extension performed. */
28043 if (inner_mode == QImode || inner_mode == HImode)
28044 {
28045 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28046 target = gen_lowpart (SImode, target);
28047 }
28048
28049 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28050 }
28051 else
28052 {
28053 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28054
28055 emit_move_insn (mem, vec);
28056
28057 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28058 emit_move_insn (target, tmp);
28059 }
28060 }
28061
28062 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28063 pattern to reduce; DEST is the destination; IN is the input vector. */
28064
28065 void
28066 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28067 {
28068 rtx tmp1, tmp2, tmp3;
28069
28070 tmp1 = gen_reg_rtx (V4SFmode);
28071 tmp2 = gen_reg_rtx (V4SFmode);
28072 tmp3 = gen_reg_rtx (V4SFmode);
28073
28074 emit_insn (gen_sse_movhlps (tmp1, in, in));
28075 emit_insn (fn (tmp2, tmp1, in));
28076
28077 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28078 GEN_INT (1), GEN_INT (1),
28079 GEN_INT (1+4), GEN_INT (1+4)));
28080 emit_insn (fn (dest, tmp2, tmp3));
28081 }
28082 \f
28083 /* Target hook for scalar_mode_supported_p. */
28084 static bool
28085 ix86_scalar_mode_supported_p (enum machine_mode mode)
28086 {
28087 if (DECIMAL_FLOAT_MODE_P (mode))
28088 return true;
28089 else if (mode == TFmode)
28090 return true;
28091 else
28092 return default_scalar_mode_supported_p (mode);
28093 }
28094
28095 /* Implements target hook vector_mode_supported_p. */
28096 static bool
28097 ix86_vector_mode_supported_p (enum machine_mode mode)
28098 {
28099 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28100 return true;
28101 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28102 return true;
28103 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28104 return true;
28105 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28106 return true;
28107 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28108 return true;
28109 return false;
28110 }
28111
28112 /* Target hook for c_mode_for_suffix. */
28113 static enum machine_mode
28114 ix86_c_mode_for_suffix (char suffix)
28115 {
28116 if (suffix == 'q')
28117 return TFmode;
28118 if (suffix == 'w')
28119 return XFmode;
28120
28121 return VOIDmode;
28122 }
28123
28124 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28125
28126 We do this in the new i386 backend to maintain source compatibility
28127 with the old cc0-based compiler. */
28128
28129 static tree
28130 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28131 tree inputs ATTRIBUTE_UNUSED,
28132 tree clobbers)
28133 {
28134 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28135 clobbers);
28136 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28137 clobbers);
28138 return clobbers;
28139 }
28140
28141 /* Implements target vector targetm.asm.encode_section_info. This
28142 is not used by netware. */
28143
28144 static void ATTRIBUTE_UNUSED
28145 ix86_encode_section_info (tree decl, rtx rtl, int first)
28146 {
28147 default_encode_section_info (decl, rtl, first);
28148
28149 if (TREE_CODE (decl) == VAR_DECL
28150 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28151 && ix86_in_large_data_p (decl))
28152 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28153 }
28154
28155 /* Worker function for REVERSE_CONDITION. */
28156
28157 enum rtx_code
28158 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28159 {
28160 return (mode != CCFPmode && mode != CCFPUmode
28161 ? reverse_condition (code)
28162 : reverse_condition_maybe_unordered (code));
28163 }
28164
28165 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28166 to OPERANDS[0]. */
28167
28168 const char *
28169 output_387_reg_move (rtx insn, rtx *operands)
28170 {
28171 if (REG_P (operands[0]))
28172 {
28173 if (REG_P (operands[1])
28174 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28175 {
28176 if (REGNO (operands[0]) == FIRST_STACK_REG)
28177 return output_387_ffreep (operands, 0);
28178 return "fstp\t%y0";
28179 }
28180 if (STACK_TOP_P (operands[0]))
28181 return "fld%z1\t%y1";
28182 return "fst\t%y0";
28183 }
28184 else if (MEM_P (operands[0]))
28185 {
28186 gcc_assert (REG_P (operands[1]));
28187 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28188 return "fstp%z0\t%y0";
28189 else
28190 {
28191 /* There is no non-popping store to memory for XFmode.
28192 So if we need one, follow the store with a load. */
28193 if (GET_MODE (operands[0]) == XFmode)
28194 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
28195 else
28196 return "fst%z0\t%y0";
28197 }
28198 }
28199 else
28200 gcc_unreachable();
28201 }
28202
28203 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28204 FP status register is set. */
28205
28206 void
28207 ix86_emit_fp_unordered_jump (rtx label)
28208 {
28209 rtx reg = gen_reg_rtx (HImode);
28210 rtx temp;
28211
28212 emit_insn (gen_x86_fnstsw_1 (reg));
28213
28214 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28215 {
28216 emit_insn (gen_x86_sahf_1 (reg));
28217
28218 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28219 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28220 }
28221 else
28222 {
28223 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28224
28225 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28226 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28227 }
28228
28229 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28230 gen_rtx_LABEL_REF (VOIDmode, label),
28231 pc_rtx);
28232 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28233
28234 emit_jump_insn (temp);
28235 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28236 }
28237
28238 /* Output code to perform a log1p XFmode calculation. */
28239
28240 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28241 {
28242 rtx label1 = gen_label_rtx ();
28243 rtx label2 = gen_label_rtx ();
28244
28245 rtx tmp = gen_reg_rtx (XFmode);
28246 rtx tmp2 = gen_reg_rtx (XFmode);
28247
28248 emit_insn (gen_absxf2 (tmp, op1));
28249 emit_insn (gen_cmpxf (tmp,
28250 CONST_DOUBLE_FROM_REAL_VALUE (
28251 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28252 XFmode)));
28253 emit_jump_insn (gen_bge (label1));
28254
28255 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28256 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28257 emit_jump (label2);
28258
28259 emit_label (label1);
28260 emit_move_insn (tmp, CONST1_RTX (XFmode));
28261 emit_insn (gen_addxf3 (tmp, op1, tmp));
28262 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28263 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28264
28265 emit_label (label2);
28266 }
28267
28268 /* Output code to perform a Newton-Rhapson approximation of a single precision
28269 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28270
28271 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28272 {
28273 rtx x0, x1, e0, e1, two;
28274
28275 x0 = gen_reg_rtx (mode);
28276 e0 = gen_reg_rtx (mode);
28277 e1 = gen_reg_rtx (mode);
28278 x1 = gen_reg_rtx (mode);
28279
28280 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28281
28282 if (VECTOR_MODE_P (mode))
28283 two = ix86_build_const_vector (SFmode, true, two);
28284
28285 two = force_reg (mode, two);
28286
28287 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28288
28289 /* x0 = rcp(b) estimate */
28290 emit_insn (gen_rtx_SET (VOIDmode, x0,
28291 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28292 UNSPEC_RCP)));
28293 /* e0 = x0 * b */
28294 emit_insn (gen_rtx_SET (VOIDmode, e0,
28295 gen_rtx_MULT (mode, x0, b)));
28296 /* e1 = 2. - e0 */
28297 emit_insn (gen_rtx_SET (VOIDmode, e1,
28298 gen_rtx_MINUS (mode, two, e0)));
28299 /* x1 = x0 * e1 */
28300 emit_insn (gen_rtx_SET (VOIDmode, x1,
28301 gen_rtx_MULT (mode, x0, e1)));
28302 /* res = a * x1 */
28303 emit_insn (gen_rtx_SET (VOIDmode, res,
28304 gen_rtx_MULT (mode, a, x1)));
28305 }
28306
28307 /* Output code to perform a Newton-Rhapson approximation of a
28308 single precision floating point [reciprocal] square root. */
28309
28310 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28311 bool recip)
28312 {
28313 rtx x0, e0, e1, e2, e3, mthree, mhalf;
28314 REAL_VALUE_TYPE r;
28315
28316 x0 = gen_reg_rtx (mode);
28317 e0 = gen_reg_rtx (mode);
28318 e1 = gen_reg_rtx (mode);
28319 e2 = gen_reg_rtx (mode);
28320 e3 = gen_reg_rtx (mode);
28321
28322 real_from_integer (&r, VOIDmode, -3, -1, 0);
28323 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28324
28325 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28326 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28327
28328 if (VECTOR_MODE_P (mode))
28329 {
28330 mthree = ix86_build_const_vector (SFmode, true, mthree);
28331 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28332 }
28333
28334 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28335 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28336
28337 /* x0 = rsqrt(a) estimate */
28338 emit_insn (gen_rtx_SET (VOIDmode, x0,
28339 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28340 UNSPEC_RSQRT)));
28341
28342 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
28343 if (!recip)
28344 {
28345 rtx zero, mask;
28346
28347 zero = gen_reg_rtx (mode);
28348 mask = gen_reg_rtx (mode);
28349
28350 zero = force_reg (mode, CONST0_RTX(mode));
28351 emit_insn (gen_rtx_SET (VOIDmode, mask,
28352 gen_rtx_NE (mode, zero, a)));
28353
28354 emit_insn (gen_rtx_SET (VOIDmode, x0,
28355 gen_rtx_AND (mode, x0, mask)));
28356 }
28357
28358 /* e0 = x0 * a */
28359 emit_insn (gen_rtx_SET (VOIDmode, e0,
28360 gen_rtx_MULT (mode, x0, a)));
28361 /* e1 = e0 * x0 */
28362 emit_insn (gen_rtx_SET (VOIDmode, e1,
28363 gen_rtx_MULT (mode, e0, x0)));
28364
28365 /* e2 = e1 - 3. */
28366 mthree = force_reg (mode, mthree);
28367 emit_insn (gen_rtx_SET (VOIDmode, e2,
28368 gen_rtx_PLUS (mode, e1, mthree)));
28369
28370 mhalf = force_reg (mode, mhalf);
28371 if (recip)
28372 /* e3 = -.5 * x0 */
28373 emit_insn (gen_rtx_SET (VOIDmode, e3,
28374 gen_rtx_MULT (mode, x0, mhalf)));
28375 else
28376 /* e3 = -.5 * e0 */
28377 emit_insn (gen_rtx_SET (VOIDmode, e3,
28378 gen_rtx_MULT (mode, e0, mhalf)));
28379 /* ret = e2 * e3 */
28380 emit_insn (gen_rtx_SET (VOIDmode, res,
28381 gen_rtx_MULT (mode, e2, e3)));
28382 }
28383
28384 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
28385
28386 static void ATTRIBUTE_UNUSED
28387 i386_solaris_elf_named_section (const char *name, unsigned int flags,
28388 tree decl)
28389 {
28390 /* With Binutils 2.15, the "@unwind" marker must be specified on
28391 every occurrence of the ".eh_frame" section, not just the first
28392 one. */
28393 if (TARGET_64BIT
28394 && strcmp (name, ".eh_frame") == 0)
28395 {
28396 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
28397 flags & SECTION_WRITE ? "aw" : "a");
28398 return;
28399 }
28400 default_elf_asm_named_section (name, flags, decl);
28401 }
28402
28403 /* Return the mangling of TYPE if it is an extended fundamental type. */
28404
28405 static const char *
28406 ix86_mangle_type (const_tree type)
28407 {
28408 type = TYPE_MAIN_VARIANT (type);
28409
28410 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28411 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28412 return NULL;
28413
28414 switch (TYPE_MODE (type))
28415 {
28416 case TFmode:
28417 /* __float128 is "g". */
28418 return "g";
28419 case XFmode:
28420 /* "long double" or __float80 is "e". */
28421 return "e";
28422 default:
28423 return NULL;
28424 }
28425 }
28426
28427 /* For 32-bit code we can save PIC register setup by using
28428 __stack_chk_fail_local hidden function instead of calling
28429 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
28430 register, so it is better to call __stack_chk_fail directly. */
28431
28432 static tree
28433 ix86_stack_protect_fail (void)
28434 {
28435 return TARGET_64BIT
28436 ? default_external_stack_protect_fail ()
28437 : default_hidden_stack_protect_fail ();
28438 }
28439
28440 /* Select a format to encode pointers in exception handling data. CODE
28441 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
28442 true if the symbol may be affected by dynamic relocations.
28443
28444 ??? All x86 object file formats are capable of representing this.
28445 After all, the relocation needed is the same as for the call insn.
28446 Whether or not a particular assembler allows us to enter such, I
28447 guess we'll have to see. */
28448 int
28449 asm_preferred_eh_data_format (int code, int global)
28450 {
28451 if (flag_pic)
28452 {
28453 int type = DW_EH_PE_sdata8;
28454 if (!TARGET_64BIT
28455 || ix86_cmodel == CM_SMALL_PIC
28456 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
28457 type = DW_EH_PE_sdata4;
28458 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
28459 }
28460 if (ix86_cmodel == CM_SMALL
28461 || (ix86_cmodel == CM_MEDIUM && code))
28462 return DW_EH_PE_udata4;
28463 return DW_EH_PE_absptr;
28464 }
28465 \f
28466 /* Expand copysign from SIGN to the positive value ABS_VALUE
28467 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
28468 the sign-bit. */
28469 static void
28470 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
28471 {
28472 enum machine_mode mode = GET_MODE (sign);
28473 rtx sgn = gen_reg_rtx (mode);
28474 if (mask == NULL_RTX)
28475 {
28476 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
28477 if (!VECTOR_MODE_P (mode))
28478 {
28479 /* We need to generate a scalar mode mask in this case. */
28480 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28481 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28482 mask = gen_reg_rtx (mode);
28483 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28484 }
28485 }
28486 else
28487 mask = gen_rtx_NOT (mode, mask);
28488 emit_insn (gen_rtx_SET (VOIDmode, sgn,
28489 gen_rtx_AND (mode, mask, sign)));
28490 emit_insn (gen_rtx_SET (VOIDmode, result,
28491 gen_rtx_IOR (mode, abs_value, sgn)));
28492 }
28493
28494 /* Expand fabs (OP0) and return a new rtx that holds the result. The
28495 mask for masking out the sign-bit is stored in *SMASK, if that is
28496 non-null. */
28497 static rtx
28498 ix86_expand_sse_fabs (rtx op0, rtx *smask)
28499 {
28500 enum machine_mode mode = GET_MODE (op0);
28501 rtx xa, mask;
28502
28503 xa = gen_reg_rtx (mode);
28504 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
28505 if (!VECTOR_MODE_P (mode))
28506 {
28507 /* We need to generate a scalar mode mask in this case. */
28508 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28509 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28510 mask = gen_reg_rtx (mode);
28511 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28512 }
28513 emit_insn (gen_rtx_SET (VOIDmode, xa,
28514 gen_rtx_AND (mode, op0, mask)));
28515
28516 if (smask)
28517 *smask = mask;
28518
28519 return xa;
28520 }
28521
28522 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
28523 swapping the operands if SWAP_OPERANDS is true. The expanded
28524 code is a forward jump to a newly created label in case the
28525 comparison is true. The generated label rtx is returned. */
28526 static rtx
28527 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
28528 bool swap_operands)
28529 {
28530 rtx label, tmp;
28531
28532 if (swap_operands)
28533 {
28534 tmp = op0;
28535 op0 = op1;
28536 op1 = tmp;
28537 }
28538
28539 label = gen_label_rtx ();
28540 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
28541 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28542 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
28543 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
28544 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28545 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
28546 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
28547 JUMP_LABEL (tmp) = label;
28548
28549 return label;
28550 }
28551
28552 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
28553 using comparison code CODE. Operands are swapped for the comparison if
28554 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
28555 static rtx
28556 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
28557 bool swap_operands)
28558 {
28559 enum machine_mode mode = GET_MODE (op0);
28560 rtx mask = gen_reg_rtx (mode);
28561
28562 if (swap_operands)
28563 {
28564 rtx tmp = op0;
28565 op0 = op1;
28566 op1 = tmp;
28567 }
28568
28569 if (mode == DFmode)
28570 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
28571 gen_rtx_fmt_ee (code, mode, op0, op1)));
28572 else
28573 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
28574 gen_rtx_fmt_ee (code, mode, op0, op1)));
28575
28576 return mask;
28577 }
28578
28579 /* Generate and return a rtx of mode MODE for 2**n where n is the number
28580 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
28581 static rtx
28582 ix86_gen_TWO52 (enum machine_mode mode)
28583 {
28584 REAL_VALUE_TYPE TWO52r;
28585 rtx TWO52;
28586
28587 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
28588 TWO52 = const_double_from_real_value (TWO52r, mode);
28589 TWO52 = force_reg (mode, TWO52);
28590
28591 return TWO52;
28592 }
28593
28594 /* Expand SSE sequence for computing lround from OP1 storing
28595 into OP0. */
28596 void
28597 ix86_expand_lround (rtx op0, rtx op1)
28598 {
28599 /* C code for the stuff we're doing below:
28600 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
28601 return (long)tmp;
28602 */
28603 enum machine_mode mode = GET_MODE (op1);
28604 const struct real_format *fmt;
28605 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28606 rtx adj;
28607
28608 /* load nextafter (0.5, 0.0) */
28609 fmt = REAL_MODE_FORMAT (mode);
28610 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28611 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28612
28613 /* adj = copysign (0.5, op1) */
28614 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
28615 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
28616
28617 /* adj = op1 + adj */
28618 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
28619
28620 /* op0 = (imode)adj */
28621 expand_fix (op0, adj, 0);
28622 }
28623
28624 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
28625 into OPERAND0. */
28626 void
28627 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
28628 {
28629 /* C code for the stuff we're doing below (for do_floor):
28630 xi = (long)op1;
28631 xi -= (double)xi > op1 ? 1 : 0;
28632 return xi;
28633 */
28634 enum machine_mode fmode = GET_MODE (op1);
28635 enum machine_mode imode = GET_MODE (op0);
28636 rtx ireg, freg, label, tmp;
28637
28638 /* reg = (long)op1 */
28639 ireg = gen_reg_rtx (imode);
28640 expand_fix (ireg, op1, 0);
28641
28642 /* freg = (double)reg */
28643 freg = gen_reg_rtx (fmode);
28644 expand_float (freg, ireg, 0);
28645
28646 /* ireg = (freg > op1) ? ireg - 1 : ireg */
28647 label = ix86_expand_sse_compare_and_jump (UNLE,
28648 freg, op1, !do_floor);
28649 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
28650 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
28651 emit_move_insn (ireg, tmp);
28652
28653 emit_label (label);
28654 LABEL_NUSES (label) = 1;
28655
28656 emit_move_insn (op0, ireg);
28657 }
28658
28659 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
28660 result in OPERAND0. */
28661 void
28662 ix86_expand_rint (rtx operand0, rtx operand1)
28663 {
28664 /* C code for the stuff we're doing below:
28665 xa = fabs (operand1);
28666 if (!isless (xa, 2**52))
28667 return operand1;
28668 xa = xa + 2**52 - 2**52;
28669 return copysign (xa, operand1);
28670 */
28671 enum machine_mode mode = GET_MODE (operand0);
28672 rtx res, xa, label, TWO52, mask;
28673
28674 res = gen_reg_rtx (mode);
28675 emit_move_insn (res, operand1);
28676
28677 /* xa = abs (operand1) */
28678 xa = ix86_expand_sse_fabs (res, &mask);
28679
28680 /* if (!isless (xa, TWO52)) goto label; */
28681 TWO52 = ix86_gen_TWO52 (mode);
28682 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28683
28684 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28685 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28686
28687 ix86_sse_copysign_to_positive (res, xa, res, mask);
28688
28689 emit_label (label);
28690 LABEL_NUSES (label) = 1;
28691
28692 emit_move_insn (operand0, res);
28693 }
28694
28695 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28696 into OPERAND0. */
28697 void
28698 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
28699 {
28700 /* C code for the stuff we expand below.
28701 double xa = fabs (x), x2;
28702 if (!isless (xa, TWO52))
28703 return x;
28704 xa = xa + TWO52 - TWO52;
28705 x2 = copysign (xa, x);
28706 Compensate. Floor:
28707 if (x2 > x)
28708 x2 -= 1;
28709 Compensate. Ceil:
28710 if (x2 < x)
28711 x2 -= -1;
28712 return x2;
28713 */
28714 enum machine_mode mode = GET_MODE (operand0);
28715 rtx xa, TWO52, tmp, label, one, res, mask;
28716
28717 TWO52 = ix86_gen_TWO52 (mode);
28718
28719 /* Temporary for holding the result, initialized to the input
28720 operand to ease control flow. */
28721 res = gen_reg_rtx (mode);
28722 emit_move_insn (res, operand1);
28723
28724 /* xa = abs (operand1) */
28725 xa = ix86_expand_sse_fabs (res, &mask);
28726
28727 /* if (!isless (xa, TWO52)) goto label; */
28728 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28729
28730 /* xa = xa + TWO52 - TWO52; */
28731 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28732 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28733
28734 /* xa = copysign (xa, operand1) */
28735 ix86_sse_copysign_to_positive (xa, xa, res, mask);
28736
28737 /* generate 1.0 or -1.0 */
28738 one = force_reg (mode,
28739 const_double_from_real_value (do_floor
28740 ? dconst1 : dconstm1, mode));
28741
28742 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28743 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28744 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28745 gen_rtx_AND (mode, one, tmp)));
28746 /* We always need to subtract here to preserve signed zero. */
28747 tmp = expand_simple_binop (mode, MINUS,
28748 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28749 emit_move_insn (res, tmp);
28750
28751 emit_label (label);
28752 LABEL_NUSES (label) = 1;
28753
28754 emit_move_insn (operand0, res);
28755 }
28756
28757 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28758 into OPERAND0. */
28759 void
28760 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
28761 {
28762 /* C code for the stuff we expand below.
28763 double xa = fabs (x), x2;
28764 if (!isless (xa, TWO52))
28765 return x;
28766 x2 = (double)(long)x;
28767 Compensate. Floor:
28768 if (x2 > x)
28769 x2 -= 1;
28770 Compensate. Ceil:
28771 if (x2 < x)
28772 x2 += 1;
28773 if (HONOR_SIGNED_ZEROS (mode))
28774 return copysign (x2, x);
28775 return x2;
28776 */
28777 enum machine_mode mode = GET_MODE (operand0);
28778 rtx xa, xi, TWO52, tmp, label, one, res, mask;
28779
28780 TWO52 = ix86_gen_TWO52 (mode);
28781
28782 /* Temporary for holding the result, initialized to the input
28783 operand to ease control flow. */
28784 res = gen_reg_rtx (mode);
28785 emit_move_insn (res, operand1);
28786
28787 /* xa = abs (operand1) */
28788 xa = ix86_expand_sse_fabs (res, &mask);
28789
28790 /* if (!isless (xa, TWO52)) goto label; */
28791 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28792
28793 /* xa = (double)(long)x */
28794 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28795 expand_fix (xi, res, 0);
28796 expand_float (xa, xi, 0);
28797
28798 /* generate 1.0 */
28799 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28800
28801 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28802 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28803 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28804 gen_rtx_AND (mode, one, tmp)));
28805 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
28806 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28807 emit_move_insn (res, tmp);
28808
28809 if (HONOR_SIGNED_ZEROS (mode))
28810 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28811
28812 emit_label (label);
28813 LABEL_NUSES (label) = 1;
28814
28815 emit_move_insn (operand0, res);
28816 }
28817
28818 /* Expand SSE sequence for computing round from OPERAND1 storing
28819 into OPERAND0. Sequence that works without relying on DImode truncation
28820 via cvttsd2siq that is only available on 64bit targets. */
28821 void
28822 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
28823 {
28824 /* C code for the stuff we expand below.
28825 double xa = fabs (x), xa2, x2;
28826 if (!isless (xa, TWO52))
28827 return x;
28828 Using the absolute value and copying back sign makes
28829 -0.0 -> -0.0 correct.
28830 xa2 = xa + TWO52 - TWO52;
28831 Compensate.
28832 dxa = xa2 - xa;
28833 if (dxa <= -0.5)
28834 xa2 += 1;
28835 else if (dxa > 0.5)
28836 xa2 -= 1;
28837 x2 = copysign (xa2, x);
28838 return x2;
28839 */
28840 enum machine_mode mode = GET_MODE (operand0);
28841 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
28842
28843 TWO52 = ix86_gen_TWO52 (mode);
28844
28845 /* Temporary for holding the result, initialized to the input
28846 operand to ease control flow. */
28847 res = gen_reg_rtx (mode);
28848 emit_move_insn (res, operand1);
28849
28850 /* xa = abs (operand1) */
28851 xa = ix86_expand_sse_fabs (res, &mask);
28852
28853 /* if (!isless (xa, TWO52)) goto label; */
28854 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28855
28856 /* xa2 = xa + TWO52 - TWO52; */
28857 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28858 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
28859
28860 /* dxa = xa2 - xa; */
28861 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
28862
28863 /* generate 0.5, 1.0 and -0.5 */
28864 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
28865 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
28866 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
28867 0, OPTAB_DIRECT);
28868
28869 /* Compensate. */
28870 tmp = gen_reg_rtx (mode);
28871 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
28872 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
28873 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28874 gen_rtx_AND (mode, one, tmp)));
28875 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28876 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
28877 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
28878 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28879 gen_rtx_AND (mode, one, tmp)));
28880 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28881
28882 /* res = copysign (xa2, operand1) */
28883 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
28884
28885 emit_label (label);
28886 LABEL_NUSES (label) = 1;
28887
28888 emit_move_insn (operand0, res);
28889 }
28890
28891 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28892 into OPERAND0. */
28893 void
28894 ix86_expand_trunc (rtx operand0, rtx operand1)
28895 {
28896 /* C code for SSE variant we expand below.
28897 double xa = fabs (x), x2;
28898 if (!isless (xa, TWO52))
28899 return x;
28900 x2 = (double)(long)x;
28901 if (HONOR_SIGNED_ZEROS (mode))
28902 return copysign (x2, x);
28903 return x2;
28904 */
28905 enum machine_mode mode = GET_MODE (operand0);
28906 rtx xa, xi, TWO52, label, res, mask;
28907
28908 TWO52 = ix86_gen_TWO52 (mode);
28909
28910 /* Temporary for holding the result, initialized to the input
28911 operand to ease control flow. */
28912 res = gen_reg_rtx (mode);
28913 emit_move_insn (res, operand1);
28914
28915 /* xa = abs (operand1) */
28916 xa = ix86_expand_sse_fabs (res, &mask);
28917
28918 /* if (!isless (xa, TWO52)) goto label; */
28919 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28920
28921 /* x = (double)(long)x */
28922 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28923 expand_fix (xi, res, 0);
28924 expand_float (res, xi, 0);
28925
28926 if (HONOR_SIGNED_ZEROS (mode))
28927 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28928
28929 emit_label (label);
28930 LABEL_NUSES (label) = 1;
28931
28932 emit_move_insn (operand0, res);
28933 }
28934
28935 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28936 into OPERAND0. */
28937 void
28938 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
28939 {
28940 enum machine_mode mode = GET_MODE (operand0);
28941 rtx xa, mask, TWO52, label, one, res, smask, tmp;
28942
28943 /* C code for SSE variant we expand below.
28944 double xa = fabs (x), x2;
28945 if (!isless (xa, TWO52))
28946 return x;
28947 xa2 = xa + TWO52 - TWO52;
28948 Compensate:
28949 if (xa2 > xa)
28950 xa2 -= 1.0;
28951 x2 = copysign (xa2, x);
28952 return x2;
28953 */
28954
28955 TWO52 = ix86_gen_TWO52 (mode);
28956
28957 /* Temporary for holding the result, initialized to the input
28958 operand to ease control flow. */
28959 res = gen_reg_rtx (mode);
28960 emit_move_insn (res, operand1);
28961
28962 /* xa = abs (operand1) */
28963 xa = ix86_expand_sse_fabs (res, &smask);
28964
28965 /* if (!isless (xa, TWO52)) goto label; */
28966 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28967
28968 /* res = xa + TWO52 - TWO52; */
28969 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28970 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
28971 emit_move_insn (res, tmp);
28972
28973 /* generate 1.0 */
28974 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28975
28976 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
28977 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
28978 emit_insn (gen_rtx_SET (VOIDmode, mask,
28979 gen_rtx_AND (mode, mask, one)));
28980 tmp = expand_simple_binop (mode, MINUS,
28981 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
28982 emit_move_insn (res, tmp);
28983
28984 /* res = copysign (res, operand1) */
28985 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
28986
28987 emit_label (label);
28988 LABEL_NUSES (label) = 1;
28989
28990 emit_move_insn (operand0, res);
28991 }
28992
28993 /* Expand SSE sequence for computing round from OPERAND1 storing
28994 into OPERAND0. */
28995 void
28996 ix86_expand_round (rtx operand0, rtx operand1)
28997 {
28998 /* C code for the stuff we're doing below:
28999 double xa = fabs (x);
29000 if (!isless (xa, TWO52))
29001 return x;
29002 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29003 return copysign (xa, x);
29004 */
29005 enum machine_mode mode = GET_MODE (operand0);
29006 rtx res, TWO52, xa, label, xi, half, mask;
29007 const struct real_format *fmt;
29008 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29009
29010 /* Temporary for holding the result, initialized to the input
29011 operand to ease control flow. */
29012 res = gen_reg_rtx (mode);
29013 emit_move_insn (res, operand1);
29014
29015 TWO52 = ix86_gen_TWO52 (mode);
29016 xa = ix86_expand_sse_fabs (res, &mask);
29017 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29018
29019 /* load nextafter (0.5, 0.0) */
29020 fmt = REAL_MODE_FORMAT (mode);
29021 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29022 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29023
29024 /* xa = xa + 0.5 */
29025 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29026 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29027
29028 /* xa = (double)(int64_t)xa */
29029 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29030 expand_fix (xi, xa, 0);
29031 expand_float (xa, xi, 0);
29032
29033 /* res = copysign (xa, operand1) */
29034 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29035
29036 emit_label (label);
29037 LABEL_NUSES (label) = 1;
29038
29039 emit_move_insn (operand0, res);
29040 }
29041
29042 \f
29043 /* Validate whether a SSE5 instruction is valid or not.
29044 OPERANDS is the array of operands.
29045 NUM is the number of operands.
29046 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29047 NUM_MEMORY is the maximum number of memory operands to accept.
29048 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29049
29050 bool
29051 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29052 bool uses_oc0, int num_memory, bool commutative)
29053 {
29054 int mem_mask;
29055 int mem_count;
29056 int i;
29057
29058 /* Count the number of memory arguments */
29059 mem_mask = 0;
29060 mem_count = 0;
29061 for (i = 0; i < num; i++)
29062 {
29063 enum machine_mode mode = GET_MODE (operands[i]);
29064 if (register_operand (operands[i], mode))
29065 ;
29066
29067 else if (memory_operand (operands[i], mode))
29068 {
29069 mem_mask |= (1 << i);
29070 mem_count++;
29071 }
29072
29073 else
29074 {
29075 rtx pattern = PATTERN (insn);
29076
29077 /* allow 0 for pcmov */
29078 if (GET_CODE (pattern) != SET
29079 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29080 || i < 2
29081 || operands[i] != CONST0_RTX (mode))
29082 return false;
29083 }
29084 }
29085
29086 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29087 a memory operation. */
29088 if (num_memory < 0)
29089 {
29090 num_memory = -num_memory;
29091 if ((mem_mask & (1 << (num-1))) != 0)
29092 {
29093 mem_mask &= ~(1 << (num-1));
29094 mem_count--;
29095 }
29096 }
29097
29098 /* If there were no memory operations, allow the insn */
29099 if (mem_mask == 0)
29100 return true;
29101
29102 /* Do not allow the destination register to be a memory operand. */
29103 else if (mem_mask & (1 << 0))
29104 return false;
29105
29106 /* If there are too many memory operations, disallow the instruction. While
29107 the hardware only allows 1 memory reference, before register allocation
29108 for some insns, we allow two memory operations sometimes in order to allow
29109 code like the following to be optimized:
29110
29111 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29112
29113 or similar cases that are vectorized into using the fmaddss
29114 instruction. */
29115 else if (mem_count > num_memory)
29116 return false;
29117
29118 /* Don't allow more than one memory operation if not optimizing. */
29119 else if (mem_count > 1 && !optimize)
29120 return false;
29121
29122 else if (num == 4 && mem_count == 1)
29123 {
29124 /* formats (destination is the first argument), example fmaddss:
29125 xmm1, xmm1, xmm2, xmm3/mem
29126 xmm1, xmm1, xmm2/mem, xmm3
29127 xmm1, xmm2, xmm3/mem, xmm1
29128 xmm1, xmm2/mem, xmm3, xmm1 */
29129 if (uses_oc0)
29130 return ((mem_mask == (1 << 1))
29131 || (mem_mask == (1 << 2))
29132 || (mem_mask == (1 << 3)));
29133
29134 /* format, example pmacsdd:
29135 xmm1, xmm2, xmm3/mem, xmm1 */
29136 if (commutative)
29137 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29138 else
29139 return (mem_mask == (1 << 2));
29140 }
29141
29142 else if (num == 4 && num_memory == 2)
29143 {
29144 /* If there are two memory operations, we can load one of the memory ops
29145 into the destination register. This is for optimizing the
29146 multiply/add ops, which the combiner has optimized both the multiply
29147 and the add insns to have a memory operation. We have to be careful
29148 that the destination doesn't overlap with the inputs. */
29149 rtx op0 = operands[0];
29150
29151 if (reg_mentioned_p (op0, operands[1])
29152 || reg_mentioned_p (op0, operands[2])
29153 || reg_mentioned_p (op0, operands[3]))
29154 return false;
29155
29156 /* formats (destination is the first argument), example fmaddss:
29157 xmm1, xmm1, xmm2, xmm3/mem
29158 xmm1, xmm1, xmm2/mem, xmm3
29159 xmm1, xmm2, xmm3/mem, xmm1
29160 xmm1, xmm2/mem, xmm3, xmm1
29161
29162 For the oc0 case, we will load either operands[1] or operands[3] into
29163 operands[0], so any combination of 2 memory operands is ok. */
29164 if (uses_oc0)
29165 return true;
29166
29167 /* format, example pmacsdd:
29168 xmm1, xmm2, xmm3/mem, xmm1
29169
29170 For the integer multiply/add instructions be more restrictive and
29171 require operands[2] and operands[3] to be the memory operands. */
29172 if (commutative)
29173 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29174 else
29175 return (mem_mask == ((1 << 2) | (1 << 3)));
29176 }
29177
29178 else if (num == 3 && num_memory == 1)
29179 {
29180 /* formats, example protb:
29181 xmm1, xmm2, xmm3/mem
29182 xmm1, xmm2/mem, xmm3 */
29183 if (uses_oc0)
29184 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29185
29186 /* format, example comeq:
29187 xmm1, xmm2, xmm3/mem */
29188 else
29189 return (mem_mask == (1 << 2));
29190 }
29191
29192 else
29193 gcc_unreachable ();
29194
29195 return false;
29196 }
29197
29198 \f
29199 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29200 hardware will allow by using the destination register to load one of the
29201 memory operations. Presently this is used by the multiply/add routines to
29202 allow 2 memory references. */
29203
29204 void
29205 ix86_expand_sse5_multiple_memory (rtx operands[],
29206 int num,
29207 enum machine_mode mode)
29208 {
29209 rtx op0 = operands[0];
29210 if (num != 4
29211 || memory_operand (op0, mode)
29212 || reg_mentioned_p (op0, operands[1])
29213 || reg_mentioned_p (op0, operands[2])
29214 || reg_mentioned_p (op0, operands[3]))
29215 gcc_unreachable ();
29216
29217 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29218 the destination register. */
29219 if (memory_operand (operands[1], mode))
29220 {
29221 emit_move_insn (op0, operands[1]);
29222 operands[1] = op0;
29223 }
29224 else if (memory_operand (operands[3], mode))
29225 {
29226 emit_move_insn (op0, operands[3]);
29227 operands[3] = op0;
29228 }
29229 else
29230 gcc_unreachable ();
29231
29232 return;
29233 }
29234
29235 \f
29236 /* Table of valid machine attributes. */
29237 static const struct attribute_spec ix86_attribute_table[] =
29238 {
29239 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29240 /* Stdcall attribute says callee is responsible for popping arguments
29241 if they are not variable. */
29242 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29243 /* Fastcall attribute says callee is responsible for popping arguments
29244 if they are not variable. */
29245 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29246 /* Cdecl attribute says the callee is a normal C declaration */
29247 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29248 /* Regparm attribute specifies how many integer arguments are to be
29249 passed in registers. */
29250 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29251 /* Sseregparm attribute says we are using x86_64 calling conventions
29252 for FP arguments. */
29253 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29254 /* force_align_arg_pointer says this function realigns the stack at entry. */
29255 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29256 false, true, true, ix86_handle_cconv_attribute },
29257 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29258 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29259 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29260 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29261 #endif
29262 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29263 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29264 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29265 SUBTARGET_ATTRIBUTE_TABLE,
29266 #endif
29267 /* ms_abi and sysv_abi calling convention function attributes. */
29268 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29269 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29270 /* End element. */
29271 { NULL, 0, 0, false, false, false, NULL }
29272 };
29273
29274 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29275 static int
29276 x86_builtin_vectorization_cost (bool runtime_test)
29277 {
29278 /* If the branch of the runtime test is taken - i.e. - the vectorized
29279 version is skipped - this incurs a misprediction cost (because the
29280 vectorized version is expected to be the fall-through). So we subtract
29281 the latency of a mispredicted branch from the costs that are incured
29282 when the vectorized version is executed.
29283
29284 TODO: The values in individual target tables have to be tuned or new
29285 fields may be needed. For eg. on K8, the default branch path is the
29286 not-taken path. If the taken path is predicted correctly, the minimum
29287 penalty of going down the taken-path is 1 cycle. If the taken-path is
29288 not predicted correctly, then the minimum penalty is 10 cycles. */
29289
29290 if (runtime_test)
29291 {
29292 return (-(ix86_cost->cond_taken_branch_cost));
29293 }
29294 else
29295 return 0;
29296 }
29297
29298 /* This function returns the calling abi specific va_list type node.
29299 It returns the FNDECL specific va_list type. */
29300
29301 tree
29302 ix86_fn_abi_va_list (tree fndecl)
29303 {
29304 int abi;
29305
29306 if (!TARGET_64BIT)
29307 return va_list_type_node;
29308 gcc_assert (fndecl != NULL_TREE);
29309 abi = ix86_function_abi ((const_tree) fndecl);
29310
29311 if (abi == MS_ABI)
29312 return ms_va_list_type_node;
29313 else
29314 return sysv_va_list_type_node;
29315 }
29316
29317 /* Returns the canonical va_list type specified by TYPE. If there
29318 is no valid TYPE provided, it return NULL_TREE. */
29319
29320 tree
29321 ix86_canonical_va_list_type (tree type)
29322 {
29323 tree wtype, htype;
29324
29325 /* Resolve references and pointers to va_list type. */
29326 if (INDIRECT_REF_P (type))
29327 type = TREE_TYPE (type);
29328 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29329 type = TREE_TYPE (type);
29330
29331 if (TARGET_64BIT)
29332 {
29333 wtype = va_list_type_node;
29334 gcc_assert (wtype != NULL_TREE);
29335 htype = type;
29336 if (TREE_CODE (wtype) == ARRAY_TYPE)
29337 {
29338 /* If va_list is an array type, the argument may have decayed
29339 to a pointer type, e.g. by being passed to another function.
29340 In that case, unwrap both types so that we can compare the
29341 underlying records. */
29342 if (TREE_CODE (htype) == ARRAY_TYPE
29343 || POINTER_TYPE_P (htype))
29344 {
29345 wtype = TREE_TYPE (wtype);
29346 htype = TREE_TYPE (htype);
29347 }
29348 }
29349 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29350 return va_list_type_node;
29351 wtype = sysv_va_list_type_node;
29352 gcc_assert (wtype != NULL_TREE);
29353 htype = type;
29354 if (TREE_CODE (wtype) == ARRAY_TYPE)
29355 {
29356 /* If va_list is an array type, the argument may have decayed
29357 to a pointer type, e.g. by being passed to another function.
29358 In that case, unwrap both types so that we can compare the
29359 underlying records. */
29360 if (TREE_CODE (htype) == ARRAY_TYPE
29361 || POINTER_TYPE_P (htype))
29362 {
29363 wtype = TREE_TYPE (wtype);
29364 htype = TREE_TYPE (htype);
29365 }
29366 }
29367 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29368 return sysv_va_list_type_node;
29369 wtype = ms_va_list_type_node;
29370 gcc_assert (wtype != NULL_TREE);
29371 htype = type;
29372 if (TREE_CODE (wtype) == ARRAY_TYPE)
29373 {
29374 /* If va_list is an array type, the argument may have decayed
29375 to a pointer type, e.g. by being passed to another function.
29376 In that case, unwrap both types so that we can compare the
29377 underlying records. */
29378 if (TREE_CODE (htype) == ARRAY_TYPE
29379 || POINTER_TYPE_P (htype))
29380 {
29381 wtype = TREE_TYPE (wtype);
29382 htype = TREE_TYPE (htype);
29383 }
29384 }
29385 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29386 return ms_va_list_type_node;
29387 return NULL_TREE;
29388 }
29389 return std_canonical_va_list_type (type);
29390 }
29391
29392 /* Iterate through the target-specific builtin types for va_list.
29393 IDX denotes the iterator, *PTREE is set to the result type of
29394 the va_list builtin, and *PNAME to its internal type.
29395 Returns zero if there is no element for this index, otherwise
29396 IDX should be increased upon the next call.
29397 Note, do not iterate a base builtin's name like __builtin_va_list.
29398 Used from c_common_nodes_and_builtins. */
29399
29400 int
29401 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
29402 {
29403 if (!TARGET_64BIT)
29404 return 0;
29405 switch (idx) {
29406 case 0:
29407 *ptree = ms_va_list_type_node;
29408 *pname = "__builtin_ms_va_list";
29409 break;
29410 case 1:
29411 *ptree = sysv_va_list_type_node;
29412 *pname = "__builtin_sysv_va_list";
29413 break;
29414 default:
29415 return 0;
29416 }
29417 return 1;
29418 }
29419
29420 /* Initialize the GCC target structure. */
29421 #undef TARGET_RETURN_IN_MEMORY
29422 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
29423
29424 #undef TARGET_ATTRIBUTE_TABLE
29425 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
29426 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29427 # undef TARGET_MERGE_DECL_ATTRIBUTES
29428 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
29429 #endif
29430
29431 #undef TARGET_COMP_TYPE_ATTRIBUTES
29432 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
29433
29434 #undef TARGET_INIT_BUILTINS
29435 #define TARGET_INIT_BUILTINS ix86_init_builtins
29436 #undef TARGET_EXPAND_BUILTIN
29437 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
29438
29439 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
29440 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
29441 ix86_builtin_vectorized_function
29442
29443 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
29444 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
29445
29446 #undef TARGET_BUILTIN_RECIPROCAL
29447 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
29448
29449 #undef TARGET_ASM_FUNCTION_EPILOGUE
29450 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
29451
29452 #undef TARGET_ENCODE_SECTION_INFO
29453 #ifndef SUBTARGET_ENCODE_SECTION_INFO
29454 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
29455 #else
29456 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
29457 #endif
29458
29459 #undef TARGET_ASM_OPEN_PAREN
29460 #define TARGET_ASM_OPEN_PAREN ""
29461 #undef TARGET_ASM_CLOSE_PAREN
29462 #define TARGET_ASM_CLOSE_PAREN ""
29463
29464 #undef TARGET_ASM_ALIGNED_HI_OP
29465 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
29466 #undef TARGET_ASM_ALIGNED_SI_OP
29467 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
29468 #ifdef ASM_QUAD
29469 #undef TARGET_ASM_ALIGNED_DI_OP
29470 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
29471 #endif
29472
29473 #undef TARGET_ASM_UNALIGNED_HI_OP
29474 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
29475 #undef TARGET_ASM_UNALIGNED_SI_OP
29476 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
29477 #undef TARGET_ASM_UNALIGNED_DI_OP
29478 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
29479
29480 #undef TARGET_SCHED_ADJUST_COST
29481 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
29482 #undef TARGET_SCHED_ISSUE_RATE
29483 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
29484 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
29485 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
29486 ia32_multipass_dfa_lookahead
29487
29488 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
29489 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
29490
29491 #ifdef HAVE_AS_TLS
29492 #undef TARGET_HAVE_TLS
29493 #define TARGET_HAVE_TLS true
29494 #endif
29495 #undef TARGET_CANNOT_FORCE_CONST_MEM
29496 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
29497 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
29498 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
29499
29500 #undef TARGET_DELEGITIMIZE_ADDRESS
29501 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
29502
29503 #undef TARGET_MS_BITFIELD_LAYOUT_P
29504 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
29505
29506 #if TARGET_MACHO
29507 #undef TARGET_BINDS_LOCAL_P
29508 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
29509 #endif
29510 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29511 #undef TARGET_BINDS_LOCAL_P
29512 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
29513 #endif
29514
29515 #undef TARGET_ASM_OUTPUT_MI_THUNK
29516 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
29517 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
29518 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
29519
29520 #undef TARGET_ASM_FILE_START
29521 #define TARGET_ASM_FILE_START x86_file_start
29522
29523 #undef TARGET_DEFAULT_TARGET_FLAGS
29524 #define TARGET_DEFAULT_TARGET_FLAGS \
29525 (TARGET_DEFAULT \
29526 | TARGET_SUBTARGET_DEFAULT \
29527 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
29528
29529 #undef TARGET_HANDLE_OPTION
29530 #define TARGET_HANDLE_OPTION ix86_handle_option
29531
29532 #undef TARGET_RTX_COSTS
29533 #define TARGET_RTX_COSTS ix86_rtx_costs
29534 #undef TARGET_ADDRESS_COST
29535 #define TARGET_ADDRESS_COST ix86_address_cost
29536
29537 #undef TARGET_FIXED_CONDITION_CODE_REGS
29538 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
29539 #undef TARGET_CC_MODES_COMPATIBLE
29540 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
29541
29542 #undef TARGET_MACHINE_DEPENDENT_REORG
29543 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
29544
29545 #undef TARGET_BUILD_BUILTIN_VA_LIST
29546 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
29547
29548 #undef TARGET_FN_ABI_VA_LIST
29549 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
29550
29551 #undef TARGET_CANONICAL_VA_LIST_TYPE
29552 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
29553
29554 #undef TARGET_EXPAND_BUILTIN_VA_START
29555 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
29556
29557 #undef TARGET_MD_ASM_CLOBBERS
29558 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
29559
29560 #undef TARGET_PROMOTE_PROTOTYPES
29561 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
29562 #undef TARGET_STRUCT_VALUE_RTX
29563 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
29564 #undef TARGET_SETUP_INCOMING_VARARGS
29565 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
29566 #undef TARGET_MUST_PASS_IN_STACK
29567 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
29568 #undef TARGET_PASS_BY_REFERENCE
29569 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
29570 #undef TARGET_INTERNAL_ARG_POINTER
29571 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
29572 #undef TARGET_UPDATE_STACK_BOUNDARY
29573 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
29574 #undef TARGET_GET_DRAP_RTX
29575 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
29576 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
29577 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
29578 #undef TARGET_STRICT_ARGUMENT_NAMING
29579 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
29580
29581 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
29582 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
29583
29584 #undef TARGET_SCALAR_MODE_SUPPORTED_P
29585 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
29586
29587 #undef TARGET_VECTOR_MODE_SUPPORTED_P
29588 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
29589
29590 #undef TARGET_C_MODE_FOR_SUFFIX
29591 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
29592
29593 #ifdef HAVE_AS_TLS
29594 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
29595 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
29596 #endif
29597
29598 #ifdef SUBTARGET_INSERT_ATTRIBUTES
29599 #undef TARGET_INSERT_ATTRIBUTES
29600 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
29601 #endif
29602
29603 #undef TARGET_MANGLE_TYPE
29604 #define TARGET_MANGLE_TYPE ix86_mangle_type
29605
29606 #undef TARGET_STACK_PROTECT_FAIL
29607 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
29608
29609 #undef TARGET_FUNCTION_VALUE
29610 #define TARGET_FUNCTION_VALUE ix86_function_value
29611
29612 #undef TARGET_SECONDARY_RELOAD
29613 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
29614
29615 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
29616 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
29617
29618 #undef TARGET_SET_CURRENT_FUNCTION
29619 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
29620
29621 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
29622 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
29623
29624 #undef TARGET_OPTION_SAVE
29625 #define TARGET_OPTION_SAVE ix86_function_specific_save
29626
29627 #undef TARGET_OPTION_RESTORE
29628 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
29629
29630 #undef TARGET_OPTION_PRINT
29631 #define TARGET_OPTION_PRINT ix86_function_specific_print
29632
29633 #undef TARGET_OPTION_CAN_INLINE_P
29634 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
29635
29636 #undef TARGET_EXPAND_TO_RTL_HOOK
29637 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
29638
29639 struct gcc_target targetm = TARGET_INITIALIZER;
29640 \f
29641 #include "gt-i386.h"