]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
10d2fcbea2964c0004d2f3d723319c0937963332
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
84 #include "tree-vectorizer.h"
85
86 static rtx legitimize_dllimport_symbol (rtx, bool);
87 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
88 static rtx legitimize_pe_coff_symbol (rtx, bool);
89
90 #ifndef CHECK_STACK_LIMIT
91 #define CHECK_STACK_LIMIT (-1)
92 #endif
93
94 /* Return index of given mode in mult and division cost tables. */
95 #define MODE_INDEX(mode) \
96 ((mode) == QImode ? 0 \
97 : (mode) == HImode ? 1 \
98 : (mode) == SImode ? 2 \
99 : (mode) == DImode ? 3 \
100 : 4)
101
102 /* Processor costs (relative to an add) */
103 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
104 #define COSTS_N_BYTES(N) ((N) * 2)
105
106 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
107
108 static stringop_algs ix86_size_memcpy[2] = {
109 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
110 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
111 static stringop_algs ix86_size_memset[2] = {
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
113 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
114
115 const
116 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
117 COSTS_N_BYTES (2), /* cost of an add instruction */
118 COSTS_N_BYTES (3), /* cost of a lea instruction */
119 COSTS_N_BYTES (2), /* variable shift costs */
120 COSTS_N_BYTES (3), /* constant shift costs */
121 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
122 COSTS_N_BYTES (3), /* HI */
123 COSTS_N_BYTES (3), /* SI */
124 COSTS_N_BYTES (3), /* DI */
125 COSTS_N_BYTES (5)}, /* other */
126 0, /* cost of multiply per each bit set */
127 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
128 COSTS_N_BYTES (3), /* HI */
129 COSTS_N_BYTES (3), /* SI */
130 COSTS_N_BYTES (3), /* DI */
131 COSTS_N_BYTES (5)}, /* other */
132 COSTS_N_BYTES (3), /* cost of movsx */
133 COSTS_N_BYTES (3), /* cost of movzx */
134 0, /* "large" insn */
135 2, /* MOVE_RATIO */
136 2, /* cost for loading QImode using movzbl */
137 {2, 2, 2}, /* cost of loading integer registers
138 in QImode, HImode and SImode.
139 Relative to reg-reg move (2). */
140 {2, 2, 2}, /* cost of storing integer registers */
141 2, /* cost of reg,reg fld/fst */
142 {2, 2, 2}, /* cost of loading fp registers
143 in SFmode, DFmode and XFmode */
144 {2, 2, 2}, /* cost of storing fp registers
145 in SFmode, DFmode and XFmode */
146 3, /* cost of moving MMX register */
147 {3, 3}, /* cost of loading MMX registers
148 in SImode and DImode */
149 {3, 3}, /* cost of storing MMX registers
150 in SImode and DImode */
151 3, /* cost of moving SSE register */
152 {3, 3, 3}, /* cost of loading SSE registers
153 in SImode, DImode and TImode */
154 {3, 3, 3}, /* cost of storing SSE registers
155 in SImode, DImode and TImode */
156 3, /* MMX or SSE register to integer */
157 0, /* size of l1 cache */
158 0, /* size of l2 cache */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
161 2, /* Branch cost */
162 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
163 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
164 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
165 COSTS_N_BYTES (2), /* cost of FABS instruction. */
166 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
167 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
168 ix86_size_memcpy,
169 ix86_size_memset,
170 1, /* scalar_stmt_cost. */
171 1, /* scalar load_cost. */
172 1, /* scalar_store_cost. */
173 1, /* vec_stmt_cost. */
174 1, /* vec_to_scalar_cost. */
175 1, /* scalar_to_vec_cost. */
176 1, /* vec_align_load_cost. */
177 1, /* vec_unalign_load_cost. */
178 1, /* vec_store_cost. */
179 1, /* cond_taken_branch_cost. */
180 1, /* cond_not_taken_branch_cost. */
181 };
182
183 /* Processor costs (relative to an add) */
184 static stringop_algs i386_memcpy[2] = {
185 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
186 DUMMY_STRINGOP_ALGS};
187 static stringop_algs i386_memset[2] = {
188 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
189 DUMMY_STRINGOP_ALGS};
190
191 static const
192 struct processor_costs i386_cost = { /* 386 specific costs */
193 COSTS_N_INSNS (1), /* cost of an add instruction */
194 COSTS_N_INSNS (1), /* cost of a lea instruction */
195 COSTS_N_INSNS (3), /* variable shift costs */
196 COSTS_N_INSNS (2), /* constant shift costs */
197 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
198 COSTS_N_INSNS (6), /* HI */
199 COSTS_N_INSNS (6), /* SI */
200 COSTS_N_INSNS (6), /* DI */
201 COSTS_N_INSNS (6)}, /* other */
202 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
203 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
204 COSTS_N_INSNS (23), /* HI */
205 COSTS_N_INSNS (23), /* SI */
206 COSTS_N_INSNS (23), /* DI */
207 COSTS_N_INSNS (23)}, /* other */
208 COSTS_N_INSNS (3), /* cost of movsx */
209 COSTS_N_INSNS (2), /* cost of movzx */
210 15, /* "large" insn */
211 3, /* MOVE_RATIO */
212 4, /* cost for loading QImode using movzbl */
213 {2, 4, 2}, /* cost of loading integer registers
214 in QImode, HImode and SImode.
215 Relative to reg-reg move (2). */
216 {2, 4, 2}, /* cost of storing integer registers */
217 2, /* cost of reg,reg fld/fst */
218 {8, 8, 8}, /* cost of loading fp registers
219 in SFmode, DFmode and XFmode */
220 {8, 8, 8}, /* cost of storing fp registers
221 in SFmode, DFmode and XFmode */
222 2, /* cost of moving MMX register */
223 {4, 8}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {4, 8}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {4, 8, 16}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {4, 8, 16}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 0, /* size of l1 cache */
234 0, /* size of l2 cache */
235 0, /* size of prefetch block */
236 0, /* number of parallel prefetches */
237 1, /* Branch cost */
238 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
239 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
240 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
241 COSTS_N_INSNS (22), /* cost of FABS instruction. */
242 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
243 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
244 i386_memcpy,
245 i386_memset,
246 1, /* scalar_stmt_cost. */
247 1, /* scalar load_cost. */
248 1, /* scalar_store_cost. */
249 1, /* vec_stmt_cost. */
250 1, /* vec_to_scalar_cost. */
251 1, /* scalar_to_vec_cost. */
252 1, /* vec_align_load_cost. */
253 2, /* vec_unalign_load_cost. */
254 1, /* vec_store_cost. */
255 3, /* cond_taken_branch_cost. */
256 1, /* cond_not_taken_branch_cost. */
257 };
258
259 static stringop_algs i486_memcpy[2] = {
260 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
261 DUMMY_STRINGOP_ALGS};
262 static stringop_algs i486_memset[2] = {
263 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
264 DUMMY_STRINGOP_ALGS};
265
266 static const
267 struct processor_costs i486_cost = { /* 486 specific costs */
268 COSTS_N_INSNS (1), /* cost of an add instruction */
269 COSTS_N_INSNS (1), /* cost of a lea instruction */
270 COSTS_N_INSNS (3), /* variable shift costs */
271 COSTS_N_INSNS (2), /* constant shift costs */
272 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
273 COSTS_N_INSNS (12), /* HI */
274 COSTS_N_INSNS (12), /* SI */
275 COSTS_N_INSNS (12), /* DI */
276 COSTS_N_INSNS (12)}, /* other */
277 1, /* cost of multiply per each bit set */
278 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
279 COSTS_N_INSNS (40), /* HI */
280 COSTS_N_INSNS (40), /* SI */
281 COSTS_N_INSNS (40), /* DI */
282 COSTS_N_INSNS (40)}, /* other */
283 COSTS_N_INSNS (3), /* cost of movsx */
284 COSTS_N_INSNS (2), /* cost of movzx */
285 15, /* "large" insn */
286 3, /* MOVE_RATIO */
287 4, /* cost for loading QImode using movzbl */
288 {2, 4, 2}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 4, 2}, /* cost of storing integer registers */
292 2, /* cost of reg,reg fld/fst */
293 {8, 8, 8}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {8, 8, 8}, /* cost of storing fp registers
296 in SFmode, DFmode and XFmode */
297 2, /* cost of moving MMX register */
298 {4, 8}, /* cost of loading MMX registers
299 in SImode and DImode */
300 {4, 8}, /* cost of storing MMX registers
301 in SImode and DImode */
302 2, /* cost of moving SSE register */
303 {4, 8, 16}, /* cost of loading SSE registers
304 in SImode, DImode and TImode */
305 {4, 8, 16}, /* cost of storing SSE registers
306 in SImode, DImode and TImode */
307 3, /* MMX or SSE register to integer */
308 4, /* size of l1 cache. 486 has 8kB cache
309 shared for code and data, so 4kB is
310 not really precise. */
311 4, /* size of l2 cache */
312 0, /* size of prefetch block */
313 0, /* number of parallel prefetches */
314 1, /* Branch cost */
315 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
316 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
317 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
318 COSTS_N_INSNS (3), /* cost of FABS instruction. */
319 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
320 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
321 i486_memcpy,
322 i486_memset,
323 1, /* scalar_stmt_cost. */
324 1, /* scalar load_cost. */
325 1, /* scalar_store_cost. */
326 1, /* vec_stmt_cost. */
327 1, /* vec_to_scalar_cost. */
328 1, /* scalar_to_vec_cost. */
329 1, /* vec_align_load_cost. */
330 2, /* vec_unalign_load_cost. */
331 1, /* vec_store_cost. */
332 3, /* cond_taken_branch_cost. */
333 1, /* cond_not_taken_branch_cost. */
334 };
335
336 static stringop_algs pentium_memcpy[2] = {
337 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
338 DUMMY_STRINGOP_ALGS};
339 static stringop_algs pentium_memset[2] = {
340 {libcall, {{-1, rep_prefix_4_byte, false}}},
341 DUMMY_STRINGOP_ALGS};
342
343 static const
344 struct processor_costs pentium_cost = {
345 COSTS_N_INSNS (1), /* cost of an add instruction */
346 COSTS_N_INSNS (1), /* cost of a lea instruction */
347 COSTS_N_INSNS (4), /* variable shift costs */
348 COSTS_N_INSNS (1), /* constant shift costs */
349 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
350 COSTS_N_INSNS (11), /* HI */
351 COSTS_N_INSNS (11), /* SI */
352 COSTS_N_INSNS (11), /* DI */
353 COSTS_N_INSNS (11)}, /* other */
354 0, /* cost of multiply per each bit set */
355 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
356 COSTS_N_INSNS (25), /* HI */
357 COSTS_N_INSNS (25), /* SI */
358 COSTS_N_INSNS (25), /* DI */
359 COSTS_N_INSNS (25)}, /* other */
360 COSTS_N_INSNS (3), /* cost of movsx */
361 COSTS_N_INSNS (2), /* cost of movzx */
362 8, /* "large" insn */
363 6, /* MOVE_RATIO */
364 6, /* cost for loading QImode using movzbl */
365 {2, 4, 2}, /* cost of loading integer registers
366 in QImode, HImode and SImode.
367 Relative to reg-reg move (2). */
368 {2, 4, 2}, /* cost of storing integer registers */
369 2, /* cost of reg,reg fld/fst */
370 {2, 2, 6}, /* cost of loading fp registers
371 in SFmode, DFmode and XFmode */
372 {4, 4, 6}, /* cost of storing fp registers
373 in SFmode, DFmode and XFmode */
374 8, /* cost of moving MMX register */
375 {8, 8}, /* cost of loading MMX registers
376 in SImode and DImode */
377 {8, 8}, /* cost of storing MMX registers
378 in SImode and DImode */
379 2, /* cost of moving SSE register */
380 {4, 8, 16}, /* cost of loading SSE registers
381 in SImode, DImode and TImode */
382 {4, 8, 16}, /* cost of storing SSE registers
383 in SImode, DImode and TImode */
384 3, /* MMX or SSE register to integer */
385 8, /* size of l1 cache. */
386 8, /* size of l2 cache */
387 0, /* size of prefetch block */
388 0, /* number of parallel prefetches */
389 2, /* Branch cost */
390 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
391 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
392 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
393 COSTS_N_INSNS (1), /* cost of FABS instruction. */
394 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
395 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
396 pentium_memcpy,
397 pentium_memset,
398 1, /* scalar_stmt_cost. */
399 1, /* scalar load_cost. */
400 1, /* scalar_store_cost. */
401 1, /* vec_stmt_cost. */
402 1, /* vec_to_scalar_cost. */
403 1, /* scalar_to_vec_cost. */
404 1, /* vec_align_load_cost. */
405 2, /* vec_unalign_load_cost. */
406 1, /* vec_store_cost. */
407 3, /* cond_taken_branch_cost. */
408 1, /* cond_not_taken_branch_cost. */
409 };
410
411 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
412 (we ensure the alignment). For small blocks inline loop is still a
413 noticeable win, for bigger blocks either rep movsl or rep movsb is
414 way to go. Rep movsb has apparently more expensive startup time in CPU,
415 but after 4K the difference is down in the noise. */
416 static stringop_algs pentiumpro_memcpy[2] = {
417 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
418 {8192, rep_prefix_4_byte, false},
419 {-1, rep_prefix_1_byte, false}}},
420 DUMMY_STRINGOP_ALGS};
421 static stringop_algs pentiumpro_memset[2] = {
422 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
423 {8192, rep_prefix_4_byte, false},
424 {-1, libcall, false}}},
425 DUMMY_STRINGOP_ALGS};
426 static const
427 struct processor_costs pentiumpro_cost = {
428 COSTS_N_INSNS (1), /* cost of an add instruction */
429 COSTS_N_INSNS (1), /* cost of a lea instruction */
430 COSTS_N_INSNS (1), /* variable shift costs */
431 COSTS_N_INSNS (1), /* constant shift costs */
432 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
433 COSTS_N_INSNS (4), /* HI */
434 COSTS_N_INSNS (4), /* SI */
435 COSTS_N_INSNS (4), /* DI */
436 COSTS_N_INSNS (4)}, /* other */
437 0, /* cost of multiply per each bit set */
438 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
439 COSTS_N_INSNS (17), /* HI */
440 COSTS_N_INSNS (17), /* SI */
441 COSTS_N_INSNS (17), /* DI */
442 COSTS_N_INSNS (17)}, /* other */
443 COSTS_N_INSNS (1), /* cost of movsx */
444 COSTS_N_INSNS (1), /* cost of movzx */
445 8, /* "large" insn */
446 6, /* MOVE_RATIO */
447 2, /* cost for loading QImode using movzbl */
448 {4, 4, 4}, /* cost of loading integer registers
449 in QImode, HImode and SImode.
450 Relative to reg-reg move (2). */
451 {2, 2, 2}, /* cost of storing integer registers */
452 2, /* cost of reg,reg fld/fst */
453 {2, 2, 6}, /* cost of loading fp registers
454 in SFmode, DFmode and XFmode */
455 {4, 4, 6}, /* cost of storing fp registers
456 in SFmode, DFmode and XFmode */
457 2, /* cost of moving MMX register */
458 {2, 2}, /* cost of loading MMX registers
459 in SImode and DImode */
460 {2, 2}, /* cost of storing MMX registers
461 in SImode and DImode */
462 2, /* cost of moving SSE register */
463 {2, 2, 8}, /* cost of loading SSE registers
464 in SImode, DImode and TImode */
465 {2, 2, 8}, /* cost of storing SSE registers
466 in SImode, DImode and TImode */
467 3, /* MMX or SSE register to integer */
468 8, /* size of l1 cache. */
469 256, /* size of l2 cache */
470 32, /* size of prefetch block */
471 6, /* number of parallel prefetches */
472 2, /* Branch cost */
473 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
474 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
475 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
476 COSTS_N_INSNS (2), /* cost of FABS instruction. */
477 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
478 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
479 pentiumpro_memcpy,
480 pentiumpro_memset,
481 1, /* scalar_stmt_cost. */
482 1, /* scalar load_cost. */
483 1, /* scalar_store_cost. */
484 1, /* vec_stmt_cost. */
485 1, /* vec_to_scalar_cost. */
486 1, /* scalar_to_vec_cost. */
487 1, /* vec_align_load_cost. */
488 2, /* vec_unalign_load_cost. */
489 1, /* vec_store_cost. */
490 3, /* cond_taken_branch_cost. */
491 1, /* cond_not_taken_branch_cost. */
492 };
493
494 static stringop_algs geode_memcpy[2] = {
495 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
496 DUMMY_STRINGOP_ALGS};
497 static stringop_algs geode_memset[2] = {
498 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
499 DUMMY_STRINGOP_ALGS};
500 static const
501 struct processor_costs geode_cost = {
502 COSTS_N_INSNS (1), /* cost of an add instruction */
503 COSTS_N_INSNS (1), /* cost of a lea instruction */
504 COSTS_N_INSNS (2), /* variable shift costs */
505 COSTS_N_INSNS (1), /* constant shift costs */
506 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
507 COSTS_N_INSNS (4), /* HI */
508 COSTS_N_INSNS (7), /* SI */
509 COSTS_N_INSNS (7), /* DI */
510 COSTS_N_INSNS (7)}, /* other */
511 0, /* cost of multiply per each bit set */
512 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
513 COSTS_N_INSNS (23), /* HI */
514 COSTS_N_INSNS (39), /* SI */
515 COSTS_N_INSNS (39), /* DI */
516 COSTS_N_INSNS (39)}, /* other */
517 COSTS_N_INSNS (1), /* cost of movsx */
518 COSTS_N_INSNS (1), /* cost of movzx */
519 8, /* "large" insn */
520 4, /* MOVE_RATIO */
521 1, /* cost for loading QImode using movzbl */
522 {1, 1, 1}, /* cost of loading integer registers
523 in QImode, HImode and SImode.
524 Relative to reg-reg move (2). */
525 {1, 1, 1}, /* cost of storing integer registers */
526 1, /* cost of reg,reg fld/fst */
527 {1, 1, 1}, /* cost of loading fp registers
528 in SFmode, DFmode and XFmode */
529 {4, 6, 6}, /* cost of storing fp registers
530 in SFmode, DFmode and XFmode */
531
532 1, /* cost of moving MMX register */
533 {1, 1}, /* cost of loading MMX registers
534 in SImode and DImode */
535 {1, 1}, /* cost of storing MMX registers
536 in SImode and DImode */
537 1, /* cost of moving SSE register */
538 {1, 1, 1}, /* cost of loading SSE registers
539 in SImode, DImode and TImode */
540 {1, 1, 1}, /* cost of storing SSE registers
541 in SImode, DImode and TImode */
542 1, /* MMX or SSE register to integer */
543 64, /* size of l1 cache. */
544 128, /* size of l2 cache. */
545 32, /* size of prefetch block */
546 1, /* number of parallel prefetches */
547 1, /* Branch cost */
548 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
549 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
550 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
551 COSTS_N_INSNS (1), /* cost of FABS instruction. */
552 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
553 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
554 geode_memcpy,
555 geode_memset,
556 1, /* scalar_stmt_cost. */
557 1, /* scalar load_cost. */
558 1, /* scalar_store_cost. */
559 1, /* vec_stmt_cost. */
560 1, /* vec_to_scalar_cost. */
561 1, /* scalar_to_vec_cost. */
562 1, /* vec_align_load_cost. */
563 2, /* vec_unalign_load_cost. */
564 1, /* vec_store_cost. */
565 3, /* cond_taken_branch_cost. */
566 1, /* cond_not_taken_branch_cost. */
567 };
568
569 static stringop_algs k6_memcpy[2] = {
570 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
571 DUMMY_STRINGOP_ALGS};
572 static stringop_algs k6_memset[2] = {
573 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
574 DUMMY_STRINGOP_ALGS};
575 static const
576 struct processor_costs k6_cost = {
577 COSTS_N_INSNS (1), /* cost of an add instruction */
578 COSTS_N_INSNS (2), /* cost of a lea instruction */
579 COSTS_N_INSNS (1), /* variable shift costs */
580 COSTS_N_INSNS (1), /* constant shift costs */
581 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
582 COSTS_N_INSNS (3), /* HI */
583 COSTS_N_INSNS (3), /* SI */
584 COSTS_N_INSNS (3), /* DI */
585 COSTS_N_INSNS (3)}, /* other */
586 0, /* cost of multiply per each bit set */
587 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
588 COSTS_N_INSNS (18), /* HI */
589 COSTS_N_INSNS (18), /* SI */
590 COSTS_N_INSNS (18), /* DI */
591 COSTS_N_INSNS (18)}, /* other */
592 COSTS_N_INSNS (2), /* cost of movsx */
593 COSTS_N_INSNS (2), /* cost of movzx */
594 8, /* "large" insn */
595 4, /* MOVE_RATIO */
596 3, /* cost for loading QImode using movzbl */
597 {4, 5, 4}, /* cost of loading integer registers
598 in QImode, HImode and SImode.
599 Relative to reg-reg move (2). */
600 {2, 3, 2}, /* cost of storing integer registers */
601 4, /* cost of reg,reg fld/fst */
602 {6, 6, 6}, /* cost of loading fp registers
603 in SFmode, DFmode and XFmode */
604 {4, 4, 4}, /* cost of storing fp registers
605 in SFmode, DFmode and XFmode */
606 2, /* cost of moving MMX register */
607 {2, 2}, /* cost of loading MMX registers
608 in SImode and DImode */
609 {2, 2}, /* cost of storing MMX registers
610 in SImode and DImode */
611 2, /* cost of moving SSE register */
612 {2, 2, 8}, /* cost of loading SSE registers
613 in SImode, DImode and TImode */
614 {2, 2, 8}, /* cost of storing SSE registers
615 in SImode, DImode and TImode */
616 6, /* MMX or SSE register to integer */
617 32, /* size of l1 cache. */
618 32, /* size of l2 cache. Some models
619 have integrated l2 cache, but
620 optimizing for k6 is not important
621 enough to worry about that. */
622 32, /* size of prefetch block */
623 1, /* number of parallel prefetches */
624 1, /* Branch cost */
625 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
626 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
627 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
628 COSTS_N_INSNS (2), /* cost of FABS instruction. */
629 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
630 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
631 k6_memcpy,
632 k6_memset,
633 1, /* scalar_stmt_cost. */
634 1, /* scalar load_cost. */
635 1, /* scalar_store_cost. */
636 1, /* vec_stmt_cost. */
637 1, /* vec_to_scalar_cost. */
638 1, /* scalar_to_vec_cost. */
639 1, /* vec_align_load_cost. */
640 2, /* vec_unalign_load_cost. */
641 1, /* vec_store_cost. */
642 3, /* cond_taken_branch_cost. */
643 1, /* cond_not_taken_branch_cost. */
644 };
645
646 /* For some reason, Athlon deals better with REP prefix (relative to loops)
647 compared to K8. Alignment becomes important after 8 bytes for memcpy and
648 128 bytes for memset. */
649 static stringop_algs athlon_memcpy[2] = {
650 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
651 DUMMY_STRINGOP_ALGS};
652 static stringop_algs athlon_memset[2] = {
653 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
654 DUMMY_STRINGOP_ALGS};
655 static const
656 struct processor_costs athlon_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (5), /* HI */
663 COSTS_N_INSNS (5), /* SI */
664 COSTS_N_INSNS (5), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
675 9, /* MOVE_RATIO */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {4, 4}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 4, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 256, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 6, /* number of parallel prefetches */
701 5, /* Branch cost */
702 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
703 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
704 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
705 COSTS_N_INSNS (2), /* cost of FABS instruction. */
706 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
707 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
708 athlon_memcpy,
709 athlon_memset,
710 1, /* scalar_stmt_cost. */
711 1, /* scalar load_cost. */
712 1, /* scalar_store_cost. */
713 1, /* vec_stmt_cost. */
714 1, /* vec_to_scalar_cost. */
715 1, /* scalar_to_vec_cost. */
716 1, /* vec_align_load_cost. */
717 2, /* vec_unalign_load_cost. */
718 1, /* vec_store_cost. */
719 3, /* cond_taken_branch_cost. */
720 1, /* cond_not_taken_branch_cost. */
721 };
722
723 /* K8 has optimized REP instruction for medium sized blocks, but for very
724 small blocks it is better to use loop. For large blocks, libcall can
725 do nontemporary accesses and beat inline considerably. */
726 static stringop_algs k8_memcpy[2] = {
727 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
728 {-1, rep_prefix_4_byte, false}}},
729 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
730 {-1, libcall, false}}}};
731 static stringop_algs k8_memset[2] = {
732 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
733 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
734 {libcall, {{48, unrolled_loop, false},
735 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
736 static const
737 struct processor_costs k8_cost = {
738 COSTS_N_INSNS (1), /* cost of an add instruction */
739 COSTS_N_INSNS (2), /* cost of a lea instruction */
740 COSTS_N_INSNS (1), /* variable shift costs */
741 COSTS_N_INSNS (1), /* constant shift costs */
742 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
743 COSTS_N_INSNS (4), /* HI */
744 COSTS_N_INSNS (3), /* SI */
745 COSTS_N_INSNS (4), /* DI */
746 COSTS_N_INSNS (5)}, /* other */
747 0, /* cost of multiply per each bit set */
748 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
749 COSTS_N_INSNS (26), /* HI */
750 COSTS_N_INSNS (42), /* SI */
751 COSTS_N_INSNS (74), /* DI */
752 COSTS_N_INSNS (74)}, /* other */
753 COSTS_N_INSNS (1), /* cost of movsx */
754 COSTS_N_INSNS (1), /* cost of movzx */
755 8, /* "large" insn */
756 9, /* MOVE_RATIO */
757 4, /* cost for loading QImode using movzbl */
758 {3, 4, 3}, /* cost of loading integer registers
759 in QImode, HImode and SImode.
760 Relative to reg-reg move (2). */
761 {3, 4, 3}, /* cost of storing integer registers */
762 4, /* cost of reg,reg fld/fst */
763 {4, 4, 12}, /* cost of loading fp registers
764 in SFmode, DFmode and XFmode */
765 {6, 6, 8}, /* cost of storing fp registers
766 in SFmode, DFmode and XFmode */
767 2, /* cost of moving MMX register */
768 {3, 3}, /* cost of loading MMX registers
769 in SImode and DImode */
770 {4, 4}, /* cost of storing MMX registers
771 in SImode and DImode */
772 2, /* cost of moving SSE register */
773 {4, 3, 6}, /* cost of loading SSE registers
774 in SImode, DImode and TImode */
775 {4, 4, 5}, /* cost of storing SSE registers
776 in SImode, DImode and TImode */
777 5, /* MMX or SSE register to integer */
778 64, /* size of l1 cache. */
779 512, /* size of l2 cache. */
780 64, /* size of prefetch block */
781 /* New AMD processors never drop prefetches; if they cannot be performed
782 immediately, they are queued. We set number of simultaneous prefetches
783 to a large constant to reflect this (it probably is not a good idea not
784 to limit number of prefetches at all, as their execution also takes some
785 time). */
786 100, /* number of parallel prefetches */
787 3, /* Branch cost */
788 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
789 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
790 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
791 COSTS_N_INSNS (2), /* cost of FABS instruction. */
792 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
793 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
794
795 k8_memcpy,
796 k8_memset,
797 4, /* scalar_stmt_cost. */
798 2, /* scalar load_cost. */
799 2, /* scalar_store_cost. */
800 5, /* vec_stmt_cost. */
801 0, /* vec_to_scalar_cost. */
802 2, /* scalar_to_vec_cost. */
803 2, /* vec_align_load_cost. */
804 3, /* vec_unalign_load_cost. */
805 3, /* vec_store_cost. */
806 3, /* cond_taken_branch_cost. */
807 2, /* cond_not_taken_branch_cost. */
808 };
809
810 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
811 very small blocks it is better to use loop. For large blocks, libcall can
812 do nontemporary accesses and beat inline considerably. */
813 static stringop_algs amdfam10_memcpy[2] = {
814 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
815 {-1, rep_prefix_4_byte, false}}},
816 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
817 {-1, libcall, false}}}};
818 static stringop_algs amdfam10_memset[2] = {
819 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
820 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
821 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
822 {-1, libcall, false}}}};
823 struct processor_costs amdfam10_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (2), /* cost of a lea instruction */
826 COSTS_N_INSNS (1), /* variable shift costs */
827 COSTS_N_INSNS (1), /* constant shift costs */
828 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (4), /* HI */
830 COSTS_N_INSNS (3), /* SI */
831 COSTS_N_INSNS (4), /* DI */
832 COSTS_N_INSNS (5)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (35), /* HI */
836 COSTS_N_INSNS (51), /* SI */
837 COSTS_N_INSNS (83), /* DI */
838 COSTS_N_INSNS (83)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 8, /* "large" insn */
842 9, /* MOVE_RATIO */
843 4, /* cost for loading QImode using movzbl */
844 {3, 4, 3}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {3, 4, 3}, /* cost of storing integer registers */
848 4, /* cost of reg,reg fld/fst */
849 {4, 4, 12}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {6, 6, 8}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {3, 3}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {4, 4}, /* cost of storing MMX registers
857 in SImode and DImode */
858 2, /* cost of moving SSE register */
859 {4, 4, 3}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {4, 4, 5}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 3, /* MMX or SSE register to integer */
864 /* On K8:
865 MOVD reg64, xmmreg Double FSTORE 4
866 MOVD reg32, xmmreg Double FSTORE 4
867 On AMDFAM10:
868 MOVD reg64, xmmreg Double FADD 3
869 1/1 1/1
870 MOVD reg32, xmmreg Double FADD 3
871 1/1 1/1 */
872 64, /* size of l1 cache. */
873 512, /* size of l2 cache. */
874 64, /* size of prefetch block */
875 /* New AMD processors never drop prefetches; if they cannot be performed
876 immediately, they are queued. We set number of simultaneous prefetches
877 to a large constant to reflect this (it probably is not a good idea not
878 to limit number of prefetches at all, as their execution also takes some
879 time). */
880 100, /* number of parallel prefetches */
881 2, /* Branch cost */
882 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
883 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
884 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
885 COSTS_N_INSNS (2), /* cost of FABS instruction. */
886 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
887 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888
889 amdfam10_memcpy,
890 amdfam10_memset,
891 4, /* scalar_stmt_cost. */
892 2, /* scalar load_cost. */
893 2, /* scalar_store_cost. */
894 6, /* vec_stmt_cost. */
895 0, /* vec_to_scalar_cost. */
896 2, /* scalar_to_vec_cost. */
897 2, /* vec_align_load_cost. */
898 2, /* vec_unalign_load_cost. */
899 2, /* vec_store_cost. */
900 2, /* cond_taken_branch_cost. */
901 1, /* cond_not_taken_branch_cost. */
902 };
903
904 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
905 very small blocks it is better to use loop. For large blocks, libcall
906 can do nontemporary accesses and beat inline considerably. */
907 static stringop_algs bdver1_memcpy[2] = {
908 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
909 {-1, rep_prefix_4_byte, false}}},
910 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
911 {-1, libcall, false}}}};
912 static stringop_algs bdver1_memset[2] = {
913 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
914 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
915 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
916 {-1, libcall, false}}}};
917
918 const struct processor_costs bdver1_cost = {
919 COSTS_N_INSNS (1), /* cost of an add instruction */
920 COSTS_N_INSNS (1), /* cost of a lea instruction */
921 COSTS_N_INSNS (1), /* variable shift costs */
922 COSTS_N_INSNS (1), /* constant shift costs */
923 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
924 COSTS_N_INSNS (4), /* HI */
925 COSTS_N_INSNS (4), /* SI */
926 COSTS_N_INSNS (6), /* DI */
927 COSTS_N_INSNS (6)}, /* other */
928 0, /* cost of multiply per each bit set */
929 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
930 COSTS_N_INSNS (35), /* HI */
931 COSTS_N_INSNS (51), /* SI */
932 COSTS_N_INSNS (83), /* DI */
933 COSTS_N_INSNS (83)}, /* other */
934 COSTS_N_INSNS (1), /* cost of movsx */
935 COSTS_N_INSNS (1), /* cost of movzx */
936 8, /* "large" insn */
937 9, /* MOVE_RATIO */
938 4, /* cost for loading QImode using movzbl */
939 {5, 5, 4}, /* cost of loading integer registers
940 in QImode, HImode and SImode.
941 Relative to reg-reg move (2). */
942 {4, 4, 4}, /* cost of storing integer registers */
943 2, /* cost of reg,reg fld/fst */
944 {5, 5, 12}, /* cost of loading fp registers
945 in SFmode, DFmode and XFmode */
946 {4, 4, 8}, /* cost of storing fp registers
947 in SFmode, DFmode and XFmode */
948 2, /* cost of moving MMX register */
949 {4, 4}, /* cost of loading MMX registers
950 in SImode and DImode */
951 {4, 4}, /* cost of storing MMX registers
952 in SImode and DImode */
953 2, /* cost of moving SSE register */
954 {4, 4, 4}, /* cost of loading SSE registers
955 in SImode, DImode and TImode */
956 {4, 4, 4}, /* cost of storing SSE registers
957 in SImode, DImode and TImode */
958 2, /* MMX or SSE register to integer */
959 /* On K8:
960 MOVD reg64, xmmreg Double FSTORE 4
961 MOVD reg32, xmmreg Double FSTORE 4
962 On AMDFAM10:
963 MOVD reg64, xmmreg Double FADD 3
964 1/1 1/1
965 MOVD reg32, xmmreg Double FADD 3
966 1/1 1/1 */
967 16, /* size of l1 cache. */
968 2048, /* size of l2 cache. */
969 64, /* size of prefetch block */
970 /* New AMD processors never drop prefetches; if they cannot be performed
971 immediately, they are queued. We set number of simultaneous prefetches
972 to a large constant to reflect this (it probably is not a good idea not
973 to limit number of prefetches at all, as their execution also takes some
974 time). */
975 100, /* number of parallel prefetches */
976 2, /* Branch cost */
977 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
978 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
979 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
980 COSTS_N_INSNS (2), /* cost of FABS instruction. */
981 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
982 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
983
984 bdver1_memcpy,
985 bdver1_memset,
986 6, /* scalar_stmt_cost. */
987 4, /* scalar load_cost. */
988 4, /* scalar_store_cost. */
989 6, /* vec_stmt_cost. */
990 0, /* vec_to_scalar_cost. */
991 2, /* scalar_to_vec_cost. */
992 4, /* vec_align_load_cost. */
993 4, /* vec_unalign_load_cost. */
994 4, /* vec_store_cost. */
995 2, /* cond_taken_branch_cost. */
996 1, /* cond_not_taken_branch_cost. */
997 };
998
999 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1000 very small blocks it is better to use loop. For large blocks, libcall
1001 can do nontemporary accesses and beat inline considerably. */
1002
1003 static stringop_algs bdver2_memcpy[2] = {
1004 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1005 {-1, rep_prefix_4_byte, false}}},
1006 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1007 {-1, libcall, false}}}};
1008 static stringop_algs bdver2_memset[2] = {
1009 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1010 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1011 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1012 {-1, libcall, false}}}};
1013
1014 const struct processor_costs bdver2_cost = {
1015 COSTS_N_INSNS (1), /* cost of an add instruction */
1016 COSTS_N_INSNS (1), /* cost of a lea instruction */
1017 COSTS_N_INSNS (1), /* variable shift costs */
1018 COSTS_N_INSNS (1), /* constant shift costs */
1019 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1020 COSTS_N_INSNS (4), /* HI */
1021 COSTS_N_INSNS (4), /* SI */
1022 COSTS_N_INSNS (6), /* DI */
1023 COSTS_N_INSNS (6)}, /* other */
1024 0, /* cost of multiply per each bit set */
1025 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1026 COSTS_N_INSNS (35), /* HI */
1027 COSTS_N_INSNS (51), /* SI */
1028 COSTS_N_INSNS (83), /* DI */
1029 COSTS_N_INSNS (83)}, /* other */
1030 COSTS_N_INSNS (1), /* cost of movsx */
1031 COSTS_N_INSNS (1), /* cost of movzx */
1032 8, /* "large" insn */
1033 9, /* MOVE_RATIO */
1034 4, /* cost for loading QImode using movzbl */
1035 {5, 5, 4}, /* cost of loading integer registers
1036 in QImode, HImode and SImode.
1037 Relative to reg-reg move (2). */
1038 {4, 4, 4}, /* cost of storing integer registers */
1039 2, /* cost of reg,reg fld/fst */
1040 {5, 5, 12}, /* cost of loading fp registers
1041 in SFmode, DFmode and XFmode */
1042 {4, 4, 8}, /* cost of storing fp registers
1043 in SFmode, DFmode and XFmode */
1044 2, /* cost of moving MMX register */
1045 {4, 4}, /* cost of loading MMX registers
1046 in SImode and DImode */
1047 {4, 4}, /* cost of storing MMX registers
1048 in SImode and DImode */
1049 2, /* cost of moving SSE register */
1050 {4, 4, 4}, /* cost of loading SSE registers
1051 in SImode, DImode and TImode */
1052 {4, 4, 4}, /* cost of storing SSE registers
1053 in SImode, DImode and TImode */
1054 2, /* MMX or SSE register to integer */
1055 /* On K8:
1056 MOVD reg64, xmmreg Double FSTORE 4
1057 MOVD reg32, xmmreg Double FSTORE 4
1058 On AMDFAM10:
1059 MOVD reg64, xmmreg Double FADD 3
1060 1/1 1/1
1061 MOVD reg32, xmmreg Double FADD 3
1062 1/1 1/1 */
1063 16, /* size of l1 cache. */
1064 2048, /* size of l2 cache. */
1065 64, /* size of prefetch block */
1066 /* New AMD processors never drop prefetches; if they cannot be performed
1067 immediately, they are queued. We set number of simultaneous prefetches
1068 to a large constant to reflect this (it probably is not a good idea not
1069 to limit number of prefetches at all, as their execution also takes some
1070 time). */
1071 100, /* number of parallel prefetches */
1072 2, /* Branch cost */
1073 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1074 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1075 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1076 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1077 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1078 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1079
1080 bdver2_memcpy,
1081 bdver2_memset,
1082 6, /* scalar_stmt_cost. */
1083 4, /* scalar load_cost. */
1084 4, /* scalar_store_cost. */
1085 6, /* vec_stmt_cost. */
1086 0, /* vec_to_scalar_cost. */
1087 2, /* scalar_to_vec_cost. */
1088 4, /* vec_align_load_cost. */
1089 4, /* vec_unalign_load_cost. */
1090 4, /* vec_store_cost. */
1091 2, /* cond_taken_branch_cost. */
1092 1, /* cond_not_taken_branch_cost. */
1093 };
1094
1095
1096 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1097 very small blocks it is better to use loop. For large blocks, libcall
1098 can do nontemporary accesses and beat inline considerably. */
1099 static stringop_algs bdver3_memcpy[2] = {
1100 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1101 {-1, rep_prefix_4_byte, false}}},
1102 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1103 {-1, libcall, false}}}};
1104 static stringop_algs bdver3_memset[2] = {
1105 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1106 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1107 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1108 {-1, libcall, false}}}};
1109 struct processor_costs bdver3_cost = {
1110 COSTS_N_INSNS (1), /* cost of an add instruction */
1111 COSTS_N_INSNS (1), /* cost of a lea instruction */
1112 COSTS_N_INSNS (1), /* variable shift costs */
1113 COSTS_N_INSNS (1), /* constant shift costs */
1114 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1115 COSTS_N_INSNS (4), /* HI */
1116 COSTS_N_INSNS (4), /* SI */
1117 COSTS_N_INSNS (6), /* DI */
1118 COSTS_N_INSNS (6)}, /* other */
1119 0, /* cost of multiply per each bit set */
1120 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1121 COSTS_N_INSNS (35), /* HI */
1122 COSTS_N_INSNS (51), /* SI */
1123 COSTS_N_INSNS (83), /* DI */
1124 COSTS_N_INSNS (83)}, /* other */
1125 COSTS_N_INSNS (1), /* cost of movsx */
1126 COSTS_N_INSNS (1), /* cost of movzx */
1127 8, /* "large" insn */
1128 9, /* MOVE_RATIO */
1129 4, /* cost for loading QImode using movzbl */
1130 {5, 5, 4}, /* cost of loading integer registers
1131 in QImode, HImode and SImode.
1132 Relative to reg-reg move (2). */
1133 {4, 4, 4}, /* cost of storing integer registers */
1134 2, /* cost of reg,reg fld/fst */
1135 {5, 5, 12}, /* cost of loading fp registers
1136 in SFmode, DFmode and XFmode */
1137 {4, 4, 8}, /* cost of storing fp registers
1138 in SFmode, DFmode and XFmode */
1139 2, /* cost of moving MMX register */
1140 {4, 4}, /* cost of loading MMX registers
1141 in SImode and DImode */
1142 {4, 4}, /* cost of storing MMX registers
1143 in SImode and DImode */
1144 2, /* cost of moving SSE register */
1145 {4, 4, 4}, /* cost of loading SSE registers
1146 in SImode, DImode and TImode */
1147 {4, 4, 4}, /* cost of storing SSE registers
1148 in SImode, DImode and TImode */
1149 2, /* MMX or SSE register to integer */
1150 16, /* size of l1 cache. */
1151 2048, /* size of l2 cache. */
1152 64, /* size of prefetch block */
1153 /* New AMD processors never drop prefetches; if they cannot be performed
1154 immediately, they are queued. We set number of simultaneous prefetches
1155 to a large constant to reflect this (it probably is not a good idea not
1156 to limit number of prefetches at all, as their execution also takes some
1157 time). */
1158 100, /* number of parallel prefetches */
1159 2, /* Branch cost */
1160 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1161 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1162 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1163 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1164 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1165 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1166
1167 bdver3_memcpy,
1168 bdver3_memset,
1169 6, /* scalar_stmt_cost. */
1170 4, /* scalar load_cost. */
1171 4, /* scalar_store_cost. */
1172 6, /* vec_stmt_cost. */
1173 0, /* vec_to_scalar_cost. */
1174 2, /* scalar_to_vec_cost. */
1175 4, /* vec_align_load_cost. */
1176 4, /* vec_unalign_load_cost. */
1177 4, /* vec_store_cost. */
1178 2, /* cond_taken_branch_cost. */
1179 1, /* cond_not_taken_branch_cost. */
1180 };
1181
1182 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1183 very small blocks it is better to use loop. For large blocks, libcall
1184 can do nontemporary accesses and beat inline considerably. */
1185 static stringop_algs bdver4_memcpy[2] = {
1186 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1187 {-1, rep_prefix_4_byte, false}}},
1188 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1189 {-1, libcall, false}}}};
1190 static stringop_algs bdver4_memset[2] = {
1191 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1192 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1193 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1194 {-1, libcall, false}}}};
1195 struct processor_costs bdver4_cost = {
1196 COSTS_N_INSNS (1), /* cost of an add instruction */
1197 COSTS_N_INSNS (1), /* cost of a lea instruction */
1198 COSTS_N_INSNS (1), /* variable shift costs */
1199 COSTS_N_INSNS (1), /* constant shift costs */
1200 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1201 COSTS_N_INSNS (4), /* HI */
1202 COSTS_N_INSNS (4), /* SI */
1203 COSTS_N_INSNS (6), /* DI */
1204 COSTS_N_INSNS (6)}, /* other */
1205 0, /* cost of multiply per each bit set */
1206 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1207 COSTS_N_INSNS (35), /* HI */
1208 COSTS_N_INSNS (51), /* SI */
1209 COSTS_N_INSNS (83), /* DI */
1210 COSTS_N_INSNS (83)}, /* other */
1211 COSTS_N_INSNS (1), /* cost of movsx */
1212 COSTS_N_INSNS (1), /* cost of movzx */
1213 8, /* "large" insn */
1214 9, /* MOVE_RATIO */
1215 4, /* cost for loading QImode using movzbl */
1216 {5, 5, 4}, /* cost of loading integer registers
1217 in QImode, HImode and SImode.
1218 Relative to reg-reg move (2). */
1219 {4, 4, 4}, /* cost of storing integer registers */
1220 2, /* cost of reg,reg fld/fst */
1221 {5, 5, 12}, /* cost of loading fp registers
1222 in SFmode, DFmode and XFmode */
1223 {4, 4, 8}, /* cost of storing fp registers
1224 in SFmode, DFmode and XFmode */
1225 2, /* cost of moving MMX register */
1226 {4, 4}, /* cost of loading MMX registers
1227 in SImode and DImode */
1228 {4, 4}, /* cost of storing MMX registers
1229 in SImode and DImode */
1230 2, /* cost of moving SSE register */
1231 {4, 4, 4}, /* cost of loading SSE registers
1232 in SImode, DImode and TImode */
1233 {4, 4, 4}, /* cost of storing SSE registers
1234 in SImode, DImode and TImode */
1235 2, /* MMX or SSE register to integer */
1236 16, /* size of l1 cache. */
1237 2048, /* size of l2 cache. */
1238 64, /* size of prefetch block */
1239 /* New AMD processors never drop prefetches; if they cannot be performed
1240 immediately, they are queued. We set number of simultaneous prefetches
1241 to a large constant to reflect this (it probably is not a good idea not
1242 to limit number of prefetches at all, as their execution also takes some
1243 time). */
1244 100, /* number of parallel prefetches */
1245 2, /* Branch cost */
1246 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1247 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1248 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1249 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1250 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1251 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1252
1253 bdver4_memcpy,
1254 bdver4_memset,
1255 6, /* scalar_stmt_cost. */
1256 4, /* scalar load_cost. */
1257 4, /* scalar_store_cost. */
1258 6, /* vec_stmt_cost. */
1259 0, /* vec_to_scalar_cost. */
1260 2, /* scalar_to_vec_cost. */
1261 4, /* vec_align_load_cost. */
1262 4, /* vec_unalign_load_cost. */
1263 4, /* vec_store_cost. */
1264 2, /* cond_taken_branch_cost. */
1265 1, /* cond_not_taken_branch_cost. */
1266 };
1267
1268 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1269 very small blocks it is better to use loop. For large blocks, libcall can
1270 do nontemporary accesses and beat inline considerably. */
1271 static stringop_algs btver1_memcpy[2] = {
1272 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1273 {-1, rep_prefix_4_byte, false}}},
1274 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1275 {-1, libcall, false}}}};
1276 static stringop_algs btver1_memset[2] = {
1277 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1278 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1279 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1280 {-1, libcall, false}}}};
1281 const struct processor_costs btver1_cost = {
1282 COSTS_N_INSNS (1), /* cost of an add instruction */
1283 COSTS_N_INSNS (2), /* cost of a lea instruction */
1284 COSTS_N_INSNS (1), /* variable shift costs */
1285 COSTS_N_INSNS (1), /* constant shift costs */
1286 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1287 COSTS_N_INSNS (4), /* HI */
1288 COSTS_N_INSNS (3), /* SI */
1289 COSTS_N_INSNS (4), /* DI */
1290 COSTS_N_INSNS (5)}, /* other */
1291 0, /* cost of multiply per each bit set */
1292 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1293 COSTS_N_INSNS (35), /* HI */
1294 COSTS_N_INSNS (51), /* SI */
1295 COSTS_N_INSNS (83), /* DI */
1296 COSTS_N_INSNS (83)}, /* other */
1297 COSTS_N_INSNS (1), /* cost of movsx */
1298 COSTS_N_INSNS (1), /* cost of movzx */
1299 8, /* "large" insn */
1300 9, /* MOVE_RATIO */
1301 4, /* cost for loading QImode using movzbl */
1302 {3, 4, 3}, /* cost of loading integer registers
1303 in QImode, HImode and SImode.
1304 Relative to reg-reg move (2). */
1305 {3, 4, 3}, /* cost of storing integer registers */
1306 4, /* cost of reg,reg fld/fst */
1307 {4, 4, 12}, /* cost of loading fp registers
1308 in SFmode, DFmode and XFmode */
1309 {6, 6, 8}, /* cost of storing fp registers
1310 in SFmode, DFmode and XFmode */
1311 2, /* cost of moving MMX register */
1312 {3, 3}, /* cost of loading MMX registers
1313 in SImode and DImode */
1314 {4, 4}, /* cost of storing MMX registers
1315 in SImode and DImode */
1316 2, /* cost of moving SSE register */
1317 {4, 4, 3}, /* cost of loading SSE registers
1318 in SImode, DImode and TImode */
1319 {4, 4, 5}, /* cost of storing SSE registers
1320 in SImode, DImode and TImode */
1321 3, /* MMX or SSE register to integer */
1322 /* On K8:
1323 MOVD reg64, xmmreg Double FSTORE 4
1324 MOVD reg32, xmmreg Double FSTORE 4
1325 On AMDFAM10:
1326 MOVD reg64, xmmreg Double FADD 3
1327 1/1 1/1
1328 MOVD reg32, xmmreg Double FADD 3
1329 1/1 1/1 */
1330 32, /* size of l1 cache. */
1331 512, /* size of l2 cache. */
1332 64, /* size of prefetch block */
1333 100, /* number of parallel prefetches */
1334 2, /* Branch cost */
1335 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1336 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1337 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1338 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1339 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1340 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1341
1342 btver1_memcpy,
1343 btver1_memset,
1344 4, /* scalar_stmt_cost. */
1345 2, /* scalar load_cost. */
1346 2, /* scalar_store_cost. */
1347 6, /* vec_stmt_cost. */
1348 0, /* vec_to_scalar_cost. */
1349 2, /* scalar_to_vec_cost. */
1350 2, /* vec_align_load_cost. */
1351 2, /* vec_unalign_load_cost. */
1352 2, /* vec_store_cost. */
1353 2, /* cond_taken_branch_cost. */
1354 1, /* cond_not_taken_branch_cost. */
1355 };
1356
1357 static stringop_algs btver2_memcpy[2] = {
1358 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1359 {-1, rep_prefix_4_byte, false}}},
1360 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1361 {-1, libcall, false}}}};
1362 static stringop_algs btver2_memset[2] = {
1363 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1364 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1365 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1366 {-1, libcall, false}}}};
1367 const struct processor_costs btver2_cost = {
1368 COSTS_N_INSNS (1), /* cost of an add instruction */
1369 COSTS_N_INSNS (2), /* cost of a lea instruction */
1370 COSTS_N_INSNS (1), /* variable shift costs */
1371 COSTS_N_INSNS (1), /* constant shift costs */
1372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1373 COSTS_N_INSNS (4), /* HI */
1374 COSTS_N_INSNS (3), /* SI */
1375 COSTS_N_INSNS (4), /* DI */
1376 COSTS_N_INSNS (5)}, /* other */
1377 0, /* cost of multiply per each bit set */
1378 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1379 COSTS_N_INSNS (35), /* HI */
1380 COSTS_N_INSNS (51), /* SI */
1381 COSTS_N_INSNS (83), /* DI */
1382 COSTS_N_INSNS (83)}, /* other */
1383 COSTS_N_INSNS (1), /* cost of movsx */
1384 COSTS_N_INSNS (1), /* cost of movzx */
1385 8, /* "large" insn */
1386 9, /* MOVE_RATIO */
1387 4, /* cost for loading QImode using movzbl */
1388 {3, 4, 3}, /* cost of loading integer registers
1389 in QImode, HImode and SImode.
1390 Relative to reg-reg move (2). */
1391 {3, 4, 3}, /* cost of storing integer registers */
1392 4, /* cost of reg,reg fld/fst */
1393 {4, 4, 12}, /* cost of loading fp registers
1394 in SFmode, DFmode and XFmode */
1395 {6, 6, 8}, /* cost of storing fp registers
1396 in SFmode, DFmode and XFmode */
1397 2, /* cost of moving MMX register */
1398 {3, 3}, /* cost of loading MMX registers
1399 in SImode and DImode */
1400 {4, 4}, /* cost of storing MMX registers
1401 in SImode and DImode */
1402 2, /* cost of moving SSE register */
1403 {4, 4, 3}, /* cost of loading SSE registers
1404 in SImode, DImode and TImode */
1405 {4, 4, 5}, /* cost of storing SSE registers
1406 in SImode, DImode and TImode */
1407 3, /* MMX or SSE register to integer */
1408 /* On K8:
1409 MOVD reg64, xmmreg Double FSTORE 4
1410 MOVD reg32, xmmreg Double FSTORE 4
1411 On AMDFAM10:
1412 MOVD reg64, xmmreg Double FADD 3
1413 1/1 1/1
1414 MOVD reg32, xmmreg Double FADD 3
1415 1/1 1/1 */
1416 32, /* size of l1 cache. */
1417 2048, /* size of l2 cache. */
1418 64, /* size of prefetch block */
1419 100, /* number of parallel prefetches */
1420 2, /* Branch cost */
1421 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1422 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1423 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1424 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1425 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1426 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1427 btver2_memcpy,
1428 btver2_memset,
1429 4, /* scalar_stmt_cost. */
1430 2, /* scalar load_cost. */
1431 2, /* scalar_store_cost. */
1432 6, /* vec_stmt_cost. */
1433 0, /* vec_to_scalar_cost. */
1434 2, /* scalar_to_vec_cost. */
1435 2, /* vec_align_load_cost. */
1436 2, /* vec_unalign_load_cost. */
1437 2, /* vec_store_cost. */
1438 2, /* cond_taken_branch_cost. */
1439 1, /* cond_not_taken_branch_cost. */
1440 };
1441
1442 static stringop_algs pentium4_memcpy[2] = {
1443 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1444 DUMMY_STRINGOP_ALGS};
1445 static stringop_algs pentium4_memset[2] = {
1446 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1447 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1448 DUMMY_STRINGOP_ALGS};
1449
1450 static const
1451 struct processor_costs pentium4_cost = {
1452 COSTS_N_INSNS (1), /* cost of an add instruction */
1453 COSTS_N_INSNS (3), /* cost of a lea instruction */
1454 COSTS_N_INSNS (4), /* variable shift costs */
1455 COSTS_N_INSNS (4), /* constant shift costs */
1456 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1457 COSTS_N_INSNS (15), /* HI */
1458 COSTS_N_INSNS (15), /* SI */
1459 COSTS_N_INSNS (15), /* DI */
1460 COSTS_N_INSNS (15)}, /* other */
1461 0, /* cost of multiply per each bit set */
1462 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1463 COSTS_N_INSNS (56), /* HI */
1464 COSTS_N_INSNS (56), /* SI */
1465 COSTS_N_INSNS (56), /* DI */
1466 COSTS_N_INSNS (56)}, /* other */
1467 COSTS_N_INSNS (1), /* cost of movsx */
1468 COSTS_N_INSNS (1), /* cost of movzx */
1469 16, /* "large" insn */
1470 6, /* MOVE_RATIO */
1471 2, /* cost for loading QImode using movzbl */
1472 {4, 5, 4}, /* cost of loading integer registers
1473 in QImode, HImode and SImode.
1474 Relative to reg-reg move (2). */
1475 {2, 3, 2}, /* cost of storing integer registers */
1476 2, /* cost of reg,reg fld/fst */
1477 {2, 2, 6}, /* cost of loading fp registers
1478 in SFmode, DFmode and XFmode */
1479 {4, 4, 6}, /* cost of storing fp registers
1480 in SFmode, DFmode and XFmode */
1481 2, /* cost of moving MMX register */
1482 {2, 2}, /* cost of loading MMX registers
1483 in SImode and DImode */
1484 {2, 2}, /* cost of storing MMX registers
1485 in SImode and DImode */
1486 12, /* cost of moving SSE register */
1487 {12, 12, 12}, /* cost of loading SSE registers
1488 in SImode, DImode and TImode */
1489 {2, 2, 8}, /* cost of storing SSE registers
1490 in SImode, DImode and TImode */
1491 10, /* MMX or SSE register to integer */
1492 8, /* size of l1 cache. */
1493 256, /* size of l2 cache. */
1494 64, /* size of prefetch block */
1495 6, /* number of parallel prefetches */
1496 2, /* Branch cost */
1497 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1498 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1499 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1500 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1501 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1502 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1503 pentium4_memcpy,
1504 pentium4_memset,
1505 1, /* scalar_stmt_cost. */
1506 1, /* scalar load_cost. */
1507 1, /* scalar_store_cost. */
1508 1, /* vec_stmt_cost. */
1509 1, /* vec_to_scalar_cost. */
1510 1, /* scalar_to_vec_cost. */
1511 1, /* vec_align_load_cost. */
1512 2, /* vec_unalign_load_cost. */
1513 1, /* vec_store_cost. */
1514 3, /* cond_taken_branch_cost. */
1515 1, /* cond_not_taken_branch_cost. */
1516 };
1517
1518 static stringop_algs nocona_memcpy[2] = {
1519 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1520 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1521 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1522
1523 static stringop_algs nocona_memset[2] = {
1524 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1525 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1526 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1527 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1528
1529 static const
1530 struct processor_costs nocona_cost = {
1531 COSTS_N_INSNS (1), /* cost of an add instruction */
1532 COSTS_N_INSNS (1), /* cost of a lea instruction */
1533 COSTS_N_INSNS (1), /* variable shift costs */
1534 COSTS_N_INSNS (1), /* constant shift costs */
1535 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1536 COSTS_N_INSNS (10), /* HI */
1537 COSTS_N_INSNS (10), /* SI */
1538 COSTS_N_INSNS (10), /* DI */
1539 COSTS_N_INSNS (10)}, /* other */
1540 0, /* cost of multiply per each bit set */
1541 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1542 COSTS_N_INSNS (66), /* HI */
1543 COSTS_N_INSNS (66), /* SI */
1544 COSTS_N_INSNS (66), /* DI */
1545 COSTS_N_INSNS (66)}, /* other */
1546 COSTS_N_INSNS (1), /* cost of movsx */
1547 COSTS_N_INSNS (1), /* cost of movzx */
1548 16, /* "large" insn */
1549 17, /* MOVE_RATIO */
1550 4, /* cost for loading QImode using movzbl */
1551 {4, 4, 4}, /* cost of loading integer registers
1552 in QImode, HImode and SImode.
1553 Relative to reg-reg move (2). */
1554 {4, 4, 4}, /* cost of storing integer registers */
1555 3, /* cost of reg,reg fld/fst */
1556 {12, 12, 12}, /* cost of loading fp registers
1557 in SFmode, DFmode and XFmode */
1558 {4, 4, 4}, /* cost of storing fp registers
1559 in SFmode, DFmode and XFmode */
1560 6, /* cost of moving MMX register */
1561 {12, 12}, /* cost of loading MMX registers
1562 in SImode and DImode */
1563 {12, 12}, /* cost of storing MMX registers
1564 in SImode and DImode */
1565 6, /* cost of moving SSE register */
1566 {12, 12, 12}, /* cost of loading SSE registers
1567 in SImode, DImode and TImode */
1568 {12, 12, 12}, /* cost of storing SSE registers
1569 in SImode, DImode and TImode */
1570 8, /* MMX or SSE register to integer */
1571 8, /* size of l1 cache. */
1572 1024, /* size of l2 cache. */
1573 64, /* size of prefetch block */
1574 8, /* number of parallel prefetches */
1575 1, /* Branch cost */
1576 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1577 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1578 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1579 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1580 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1581 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1582 nocona_memcpy,
1583 nocona_memset,
1584 1, /* scalar_stmt_cost. */
1585 1, /* scalar load_cost. */
1586 1, /* scalar_store_cost. */
1587 1, /* vec_stmt_cost. */
1588 1, /* vec_to_scalar_cost. */
1589 1, /* scalar_to_vec_cost. */
1590 1, /* vec_align_load_cost. */
1591 2, /* vec_unalign_load_cost. */
1592 1, /* vec_store_cost. */
1593 3, /* cond_taken_branch_cost. */
1594 1, /* cond_not_taken_branch_cost. */
1595 };
1596
1597 static stringop_algs atom_memcpy[2] = {
1598 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1599 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1600 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1601 static stringop_algs atom_memset[2] = {
1602 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1603 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1604 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1605 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1606 static const
1607 struct processor_costs atom_cost = {
1608 COSTS_N_INSNS (1), /* cost of an add instruction */
1609 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1610 COSTS_N_INSNS (1), /* variable shift costs */
1611 COSTS_N_INSNS (1), /* constant shift costs */
1612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1613 COSTS_N_INSNS (4), /* HI */
1614 COSTS_N_INSNS (3), /* SI */
1615 COSTS_N_INSNS (4), /* DI */
1616 COSTS_N_INSNS (2)}, /* other */
1617 0, /* cost of multiply per each bit set */
1618 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1619 COSTS_N_INSNS (26), /* HI */
1620 COSTS_N_INSNS (42), /* SI */
1621 COSTS_N_INSNS (74), /* DI */
1622 COSTS_N_INSNS (74)}, /* other */
1623 COSTS_N_INSNS (1), /* cost of movsx */
1624 COSTS_N_INSNS (1), /* cost of movzx */
1625 8, /* "large" insn */
1626 17, /* MOVE_RATIO */
1627 4, /* cost for loading QImode using movzbl */
1628 {4, 4, 4}, /* cost of loading integer registers
1629 in QImode, HImode and SImode.
1630 Relative to reg-reg move (2). */
1631 {4, 4, 4}, /* cost of storing integer registers */
1632 4, /* cost of reg,reg fld/fst */
1633 {12, 12, 12}, /* cost of loading fp registers
1634 in SFmode, DFmode and XFmode */
1635 {6, 6, 8}, /* cost of storing fp registers
1636 in SFmode, DFmode and XFmode */
1637 2, /* cost of moving MMX register */
1638 {8, 8}, /* cost of loading MMX registers
1639 in SImode and DImode */
1640 {8, 8}, /* cost of storing MMX registers
1641 in SImode and DImode */
1642 2, /* cost of moving SSE register */
1643 {8, 8, 8}, /* cost of loading SSE registers
1644 in SImode, DImode and TImode */
1645 {8, 8, 8}, /* cost of storing SSE registers
1646 in SImode, DImode and TImode */
1647 5, /* MMX or SSE register to integer */
1648 32, /* size of l1 cache. */
1649 256, /* size of l2 cache. */
1650 64, /* size of prefetch block */
1651 6, /* number of parallel prefetches */
1652 3, /* Branch cost */
1653 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1655 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1656 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1657 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1658 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1659 atom_memcpy,
1660 atom_memset,
1661 1, /* scalar_stmt_cost. */
1662 1, /* scalar load_cost. */
1663 1, /* scalar_store_cost. */
1664 1, /* vec_stmt_cost. */
1665 1, /* vec_to_scalar_cost. */
1666 1, /* scalar_to_vec_cost. */
1667 1, /* vec_align_load_cost. */
1668 2, /* vec_unalign_load_cost. */
1669 1, /* vec_store_cost. */
1670 3, /* cond_taken_branch_cost. */
1671 1, /* cond_not_taken_branch_cost. */
1672 };
1673
1674 static stringop_algs slm_memcpy[2] = {
1675 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1676 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1677 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1678 static stringop_algs slm_memset[2] = {
1679 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1680 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1681 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1682 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1683 static const
1684 struct processor_costs slm_cost = {
1685 COSTS_N_INSNS (1), /* cost of an add instruction */
1686 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1687 COSTS_N_INSNS (1), /* variable shift costs */
1688 COSTS_N_INSNS (1), /* constant shift costs */
1689 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1690 COSTS_N_INSNS (3), /* HI */
1691 COSTS_N_INSNS (3), /* SI */
1692 COSTS_N_INSNS (4), /* DI */
1693 COSTS_N_INSNS (2)}, /* other */
1694 0, /* cost of multiply per each bit set */
1695 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1696 COSTS_N_INSNS (26), /* HI */
1697 COSTS_N_INSNS (42), /* SI */
1698 COSTS_N_INSNS (74), /* DI */
1699 COSTS_N_INSNS (74)}, /* other */
1700 COSTS_N_INSNS (1), /* cost of movsx */
1701 COSTS_N_INSNS (1), /* cost of movzx */
1702 8, /* "large" insn */
1703 17, /* MOVE_RATIO */
1704 4, /* cost for loading QImode using movzbl */
1705 {4, 4, 4}, /* cost of loading integer registers
1706 in QImode, HImode and SImode.
1707 Relative to reg-reg move (2). */
1708 {4, 4, 4}, /* cost of storing integer registers */
1709 4, /* cost of reg,reg fld/fst */
1710 {12, 12, 12}, /* cost of loading fp registers
1711 in SFmode, DFmode and XFmode */
1712 {6, 6, 8}, /* cost of storing fp registers
1713 in SFmode, DFmode and XFmode */
1714 2, /* cost of moving MMX register */
1715 {8, 8}, /* cost of loading MMX registers
1716 in SImode and DImode */
1717 {8, 8}, /* cost of storing MMX registers
1718 in SImode and DImode */
1719 2, /* cost of moving SSE register */
1720 {8, 8, 8}, /* cost of loading SSE registers
1721 in SImode, DImode and TImode */
1722 {8, 8, 8}, /* cost of storing SSE registers
1723 in SImode, DImode and TImode */
1724 5, /* MMX or SSE register to integer */
1725 32, /* size of l1 cache. */
1726 256, /* size of l2 cache. */
1727 64, /* size of prefetch block */
1728 6, /* number of parallel prefetches */
1729 3, /* Branch cost */
1730 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1731 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1732 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1733 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1734 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1735 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1736 slm_memcpy,
1737 slm_memset,
1738 1, /* scalar_stmt_cost. */
1739 1, /* scalar load_cost. */
1740 1, /* scalar_store_cost. */
1741 1, /* vec_stmt_cost. */
1742 4, /* vec_to_scalar_cost. */
1743 1, /* scalar_to_vec_cost. */
1744 1, /* vec_align_load_cost. */
1745 2, /* vec_unalign_load_cost. */
1746 1, /* vec_store_cost. */
1747 3, /* cond_taken_branch_cost. */
1748 1, /* cond_not_taken_branch_cost. */
1749 };
1750
1751 static stringop_algs intel_memcpy[2] = {
1752 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1753 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1754 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1755 static stringop_algs intel_memset[2] = {
1756 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1757 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1758 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1759 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1760 static const
1761 struct processor_costs intel_cost = {
1762 COSTS_N_INSNS (1), /* cost of an add instruction */
1763 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1764 COSTS_N_INSNS (1), /* variable shift costs */
1765 COSTS_N_INSNS (1), /* constant shift costs */
1766 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1767 COSTS_N_INSNS (3), /* HI */
1768 COSTS_N_INSNS (3), /* SI */
1769 COSTS_N_INSNS (4), /* DI */
1770 COSTS_N_INSNS (2)}, /* other */
1771 0, /* cost of multiply per each bit set */
1772 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1773 COSTS_N_INSNS (26), /* HI */
1774 COSTS_N_INSNS (42), /* SI */
1775 COSTS_N_INSNS (74), /* DI */
1776 COSTS_N_INSNS (74)}, /* other */
1777 COSTS_N_INSNS (1), /* cost of movsx */
1778 COSTS_N_INSNS (1), /* cost of movzx */
1779 8, /* "large" insn */
1780 17, /* MOVE_RATIO */
1781 4, /* cost for loading QImode using movzbl */
1782 {4, 4, 4}, /* cost of loading integer registers
1783 in QImode, HImode and SImode.
1784 Relative to reg-reg move (2). */
1785 {4, 4, 4}, /* cost of storing integer registers */
1786 4, /* cost of reg,reg fld/fst */
1787 {12, 12, 12}, /* cost of loading fp registers
1788 in SFmode, DFmode and XFmode */
1789 {6, 6, 8}, /* cost of storing fp registers
1790 in SFmode, DFmode and XFmode */
1791 2, /* cost of moving MMX register */
1792 {8, 8}, /* cost of loading MMX registers
1793 in SImode and DImode */
1794 {8, 8}, /* cost of storing MMX registers
1795 in SImode and DImode */
1796 2, /* cost of moving SSE register */
1797 {8, 8, 8}, /* cost of loading SSE registers
1798 in SImode, DImode and TImode */
1799 {8, 8, 8}, /* cost of storing SSE registers
1800 in SImode, DImode and TImode */
1801 5, /* MMX or SSE register to integer */
1802 32, /* size of l1 cache. */
1803 256, /* size of l2 cache. */
1804 64, /* size of prefetch block */
1805 6, /* number of parallel prefetches */
1806 3, /* Branch cost */
1807 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1808 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1809 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1810 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1811 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1812 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1813 intel_memcpy,
1814 intel_memset,
1815 1, /* scalar_stmt_cost. */
1816 1, /* scalar load_cost. */
1817 1, /* scalar_store_cost. */
1818 1, /* vec_stmt_cost. */
1819 4, /* vec_to_scalar_cost. */
1820 1, /* scalar_to_vec_cost. */
1821 1, /* vec_align_load_cost. */
1822 2, /* vec_unalign_load_cost. */
1823 1, /* vec_store_cost. */
1824 3, /* cond_taken_branch_cost. */
1825 1, /* cond_not_taken_branch_cost. */
1826 };
1827
1828 /* Generic should produce code tuned for Core-i7 (and newer chips)
1829 and btver1 (and newer chips). */
1830
1831 static stringop_algs generic_memcpy[2] = {
1832 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1833 {-1, libcall, false}}},
1834 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1835 {-1, libcall, false}}}};
1836 static stringop_algs generic_memset[2] = {
1837 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1838 {-1, libcall, false}}},
1839 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1840 {-1, libcall, false}}}};
1841 static const
1842 struct processor_costs generic_cost = {
1843 COSTS_N_INSNS (1), /* cost of an add instruction */
1844 /* On all chips taken into consideration lea is 2 cycles and more. With
1845 this cost however our current implementation of synth_mult results in
1846 use of unnecessary temporary registers causing regression on several
1847 SPECfp benchmarks. */
1848 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1849 COSTS_N_INSNS (1), /* variable shift costs */
1850 COSTS_N_INSNS (1), /* constant shift costs */
1851 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1852 COSTS_N_INSNS (4), /* HI */
1853 COSTS_N_INSNS (3), /* SI */
1854 COSTS_N_INSNS (4), /* DI */
1855 COSTS_N_INSNS (2)}, /* other */
1856 0, /* cost of multiply per each bit set */
1857 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1858 COSTS_N_INSNS (26), /* HI */
1859 COSTS_N_INSNS (42), /* SI */
1860 COSTS_N_INSNS (74), /* DI */
1861 COSTS_N_INSNS (74)}, /* other */
1862 COSTS_N_INSNS (1), /* cost of movsx */
1863 COSTS_N_INSNS (1), /* cost of movzx */
1864 8, /* "large" insn */
1865 17, /* MOVE_RATIO */
1866 4, /* cost for loading QImode using movzbl */
1867 {4, 4, 4}, /* cost of loading integer registers
1868 in QImode, HImode and SImode.
1869 Relative to reg-reg move (2). */
1870 {4, 4, 4}, /* cost of storing integer registers */
1871 4, /* cost of reg,reg fld/fst */
1872 {12, 12, 12}, /* cost of loading fp registers
1873 in SFmode, DFmode and XFmode */
1874 {6, 6, 8}, /* cost of storing fp registers
1875 in SFmode, DFmode and XFmode */
1876 2, /* cost of moving MMX register */
1877 {8, 8}, /* cost of loading MMX registers
1878 in SImode and DImode */
1879 {8, 8}, /* cost of storing MMX registers
1880 in SImode and DImode */
1881 2, /* cost of moving SSE register */
1882 {8, 8, 8}, /* cost of loading SSE registers
1883 in SImode, DImode and TImode */
1884 {8, 8, 8}, /* cost of storing SSE registers
1885 in SImode, DImode and TImode */
1886 5, /* MMX or SSE register to integer */
1887 32, /* size of l1 cache. */
1888 512, /* size of l2 cache. */
1889 64, /* size of prefetch block */
1890 6, /* number of parallel prefetches */
1891 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1892 value is increased to perhaps more appropriate value of 5. */
1893 3, /* Branch cost */
1894 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1895 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1896 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1897 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1898 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1899 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1900 generic_memcpy,
1901 generic_memset,
1902 1, /* scalar_stmt_cost. */
1903 1, /* scalar load_cost. */
1904 1, /* scalar_store_cost. */
1905 1, /* vec_stmt_cost. */
1906 1, /* vec_to_scalar_cost. */
1907 1, /* scalar_to_vec_cost. */
1908 1, /* vec_align_load_cost. */
1909 2, /* vec_unalign_load_cost. */
1910 1, /* vec_store_cost. */
1911 3, /* cond_taken_branch_cost. */
1912 1, /* cond_not_taken_branch_cost. */
1913 };
1914
1915 /* core_cost should produce code tuned for Core familly of CPUs. */
1916 static stringop_algs core_memcpy[2] = {
1917 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1918 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1919 {-1, libcall, false}}}};
1920 static stringop_algs core_memset[2] = {
1921 {libcall, {{6, loop_1_byte, true},
1922 {24, loop, true},
1923 {8192, rep_prefix_4_byte, true},
1924 {-1, libcall, false}}},
1925 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1926 {-1, libcall, false}}}};
1927
1928 static const
1929 struct processor_costs core_cost = {
1930 COSTS_N_INSNS (1), /* cost of an add instruction */
1931 /* On all chips taken into consideration lea is 2 cycles and more. With
1932 this cost however our current implementation of synth_mult results in
1933 use of unnecessary temporary registers causing regression on several
1934 SPECfp benchmarks. */
1935 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1936 COSTS_N_INSNS (1), /* variable shift costs */
1937 COSTS_N_INSNS (1), /* constant shift costs */
1938 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1939 COSTS_N_INSNS (4), /* HI */
1940 COSTS_N_INSNS (3), /* SI */
1941 COSTS_N_INSNS (4), /* DI */
1942 COSTS_N_INSNS (2)}, /* other */
1943 0, /* cost of multiply per each bit set */
1944 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1945 COSTS_N_INSNS (26), /* HI */
1946 COSTS_N_INSNS (42), /* SI */
1947 COSTS_N_INSNS (74), /* DI */
1948 COSTS_N_INSNS (74)}, /* other */
1949 COSTS_N_INSNS (1), /* cost of movsx */
1950 COSTS_N_INSNS (1), /* cost of movzx */
1951 8, /* "large" insn */
1952 17, /* MOVE_RATIO */
1953 4, /* cost for loading QImode using movzbl */
1954 {4, 4, 4}, /* cost of loading integer registers
1955 in QImode, HImode and SImode.
1956 Relative to reg-reg move (2). */
1957 {4, 4, 4}, /* cost of storing integer registers */
1958 4, /* cost of reg,reg fld/fst */
1959 {12, 12, 12}, /* cost of loading fp registers
1960 in SFmode, DFmode and XFmode */
1961 {6, 6, 8}, /* cost of storing fp registers
1962 in SFmode, DFmode and XFmode */
1963 2, /* cost of moving MMX register */
1964 {8, 8}, /* cost of loading MMX registers
1965 in SImode and DImode */
1966 {8, 8}, /* cost of storing MMX registers
1967 in SImode and DImode */
1968 2, /* cost of moving SSE register */
1969 {8, 8, 8}, /* cost of loading SSE registers
1970 in SImode, DImode and TImode */
1971 {8, 8, 8}, /* cost of storing SSE registers
1972 in SImode, DImode and TImode */
1973 5, /* MMX or SSE register to integer */
1974 64, /* size of l1 cache. */
1975 512, /* size of l2 cache. */
1976 64, /* size of prefetch block */
1977 6, /* number of parallel prefetches */
1978 /* FIXME perhaps more appropriate value is 5. */
1979 3, /* Branch cost */
1980 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1981 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1982 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1983 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1984 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1985 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1986 core_memcpy,
1987 core_memset,
1988 1, /* scalar_stmt_cost. */
1989 1, /* scalar load_cost. */
1990 1, /* scalar_store_cost. */
1991 1, /* vec_stmt_cost. */
1992 1, /* vec_to_scalar_cost. */
1993 1, /* scalar_to_vec_cost. */
1994 1, /* vec_align_load_cost. */
1995 2, /* vec_unalign_load_cost. */
1996 1, /* vec_store_cost. */
1997 3, /* cond_taken_branch_cost. */
1998 1, /* cond_not_taken_branch_cost. */
1999 };
2000
2001
2002 /* Set by -mtune. */
2003 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2004
2005 /* Set by -mtune or -Os. */
2006 const struct processor_costs *ix86_cost = &pentium_cost;
2007
2008 /* Processor feature/optimization bitmasks. */
2009 #define m_386 (1<<PROCESSOR_I386)
2010 #define m_486 (1<<PROCESSOR_I486)
2011 #define m_PENT (1<<PROCESSOR_PENTIUM)
2012 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2013 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2014 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2015 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2016 #define m_CORE2 (1<<PROCESSOR_CORE2)
2017 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2018 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2019 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2020 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2021 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2022 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2023 #define m_INTEL (1<<PROCESSOR_INTEL)
2024
2025 #define m_GEODE (1<<PROCESSOR_GEODE)
2026 #define m_K6 (1<<PROCESSOR_K6)
2027 #define m_K6_GEODE (m_K6 | m_GEODE)
2028 #define m_K8 (1<<PROCESSOR_K8)
2029 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2030 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2031 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2032 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2033 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2034 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2035 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2036 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2037 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2038 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2039 #define m_BTVER (m_BTVER1 | m_BTVER2)
2040 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2041
2042 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2043
2044 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2045 #undef DEF_TUNE
2046 #define DEF_TUNE(tune, name, selector) name,
2047 #include "x86-tune.def"
2048 #undef DEF_TUNE
2049 };
2050
2051 /* Feature tests against the various tunings. */
2052 unsigned char ix86_tune_features[X86_TUNE_LAST];
2053
2054 /* Feature tests against the various tunings used to create ix86_tune_features
2055 based on the processor mask. */
2056 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2057 #undef DEF_TUNE
2058 #define DEF_TUNE(tune, name, selector) selector,
2059 #include "x86-tune.def"
2060 #undef DEF_TUNE
2061 };
2062
2063 /* Feature tests against the various architecture variations. */
2064 unsigned char ix86_arch_features[X86_ARCH_LAST];
2065
2066 /* Feature tests against the various architecture variations, used to create
2067 ix86_arch_features based on the processor mask. */
2068 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2069 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2070 ~(m_386 | m_486 | m_PENT | m_K6),
2071
2072 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2073 ~m_386,
2074
2075 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2076 ~(m_386 | m_486),
2077
2078 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2079 ~m_386,
2080
2081 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2082 ~m_386,
2083 };
2084
2085 /* In case the average insn count for single function invocation is
2086 lower than this constant, emit fast (but longer) prologue and
2087 epilogue code. */
2088 #define FAST_PROLOGUE_INSN_COUNT 20
2089
2090 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2091 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2092 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2093 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2094
2095 /* Array of the smallest class containing reg number REGNO, indexed by
2096 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2097
2098 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2099 {
2100 /* ax, dx, cx, bx */
2101 AREG, DREG, CREG, BREG,
2102 /* si, di, bp, sp */
2103 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2104 /* FP registers */
2105 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2106 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2107 /* arg pointer */
2108 NON_Q_REGS,
2109 /* flags, fpsr, fpcr, frame */
2110 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2111 /* SSE registers */
2112 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2113 SSE_REGS, SSE_REGS,
2114 /* MMX registers */
2115 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2116 MMX_REGS, MMX_REGS,
2117 /* REX registers */
2118 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2119 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2120 /* SSE REX registers */
2121 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2122 SSE_REGS, SSE_REGS,
2123 /* AVX-512 SSE registers */
2124 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2125 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 /* Mask registers. */
2129 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2130 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2131 };
2132
2133 /* The "default" register map used in 32bit mode. */
2134
2135 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2136 {
2137 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2138 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2139 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2140 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2141 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2142 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2143 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2146 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2147 };
2148
2149 /* The "default" register map used in 64bit mode. */
2150
2151 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2152 {
2153 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2154 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2155 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2156 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2157 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2158 8,9,10,11,12,13,14,15, /* extended integer registers */
2159 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2160 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2161 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2162 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2163 };
2164
2165 /* Define the register numbers to be used in Dwarf debugging information.
2166 The SVR4 reference port C compiler uses the following register numbers
2167 in its Dwarf output code:
2168 0 for %eax (gcc regno = 0)
2169 1 for %ecx (gcc regno = 2)
2170 2 for %edx (gcc regno = 1)
2171 3 for %ebx (gcc regno = 3)
2172 4 for %esp (gcc regno = 7)
2173 5 for %ebp (gcc regno = 6)
2174 6 for %esi (gcc regno = 4)
2175 7 for %edi (gcc regno = 5)
2176 The following three DWARF register numbers are never generated by
2177 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2178 believes these numbers have these meanings.
2179 8 for %eip (no gcc equivalent)
2180 9 for %eflags (gcc regno = 17)
2181 10 for %trapno (no gcc equivalent)
2182 It is not at all clear how we should number the FP stack registers
2183 for the x86 architecture. If the version of SDB on x86/svr4 were
2184 a bit less brain dead with respect to floating-point then we would
2185 have a precedent to follow with respect to DWARF register numbers
2186 for x86 FP registers, but the SDB on x86/svr4 is so completely
2187 broken with respect to FP registers that it is hardly worth thinking
2188 of it as something to strive for compatibility with.
2189 The version of x86/svr4 SDB I have at the moment does (partially)
2190 seem to believe that DWARF register number 11 is associated with
2191 the x86 register %st(0), but that's about all. Higher DWARF
2192 register numbers don't seem to be associated with anything in
2193 particular, and even for DWARF regno 11, SDB only seems to under-
2194 stand that it should say that a variable lives in %st(0) (when
2195 asked via an `=' command) if we said it was in DWARF regno 11,
2196 but SDB still prints garbage when asked for the value of the
2197 variable in question (via a `/' command).
2198 (Also note that the labels SDB prints for various FP stack regs
2199 when doing an `x' command are all wrong.)
2200 Note that these problems generally don't affect the native SVR4
2201 C compiler because it doesn't allow the use of -O with -g and
2202 because when it is *not* optimizing, it allocates a memory
2203 location for each floating-point variable, and the memory
2204 location is what gets described in the DWARF AT_location
2205 attribute for the variable in question.
2206 Regardless of the severe mental illness of the x86/svr4 SDB, we
2207 do something sensible here and we use the following DWARF
2208 register numbers. Note that these are all stack-top-relative
2209 numbers.
2210 11 for %st(0) (gcc regno = 8)
2211 12 for %st(1) (gcc regno = 9)
2212 13 for %st(2) (gcc regno = 10)
2213 14 for %st(3) (gcc regno = 11)
2214 15 for %st(4) (gcc regno = 12)
2215 16 for %st(5) (gcc regno = 13)
2216 17 for %st(6) (gcc regno = 14)
2217 18 for %st(7) (gcc regno = 15)
2218 */
2219 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2220 {
2221 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2222 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2223 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2224 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2225 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2226 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2227 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2230 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2231 };
2232
2233 /* Define parameter passing and return registers. */
2234
2235 static int const x86_64_int_parameter_registers[6] =
2236 {
2237 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2238 };
2239
2240 static int const x86_64_ms_abi_int_parameter_registers[4] =
2241 {
2242 CX_REG, DX_REG, R8_REG, R9_REG
2243 };
2244
2245 static int const x86_64_int_return_registers[4] =
2246 {
2247 AX_REG, DX_REG, DI_REG, SI_REG
2248 };
2249
2250 /* Additional registers that are clobbered by SYSV calls. */
2251
2252 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2253 {
2254 SI_REG, DI_REG,
2255 XMM6_REG, XMM7_REG,
2256 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2257 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2258 };
2259
2260 /* Define the structure for the machine field in struct function. */
2261
2262 struct GTY(()) stack_local_entry {
2263 unsigned short mode;
2264 unsigned short n;
2265 rtx rtl;
2266 struct stack_local_entry *next;
2267 };
2268
2269 /* Structure describing stack frame layout.
2270 Stack grows downward:
2271
2272 [arguments]
2273 <- ARG_POINTER
2274 saved pc
2275
2276 saved static chain if ix86_static_chain_on_stack
2277
2278 saved frame pointer if frame_pointer_needed
2279 <- HARD_FRAME_POINTER
2280 [saved regs]
2281 <- regs_save_offset
2282 [padding0]
2283
2284 [saved SSE regs]
2285 <- sse_regs_save_offset
2286 [padding1] |
2287 | <- FRAME_POINTER
2288 [va_arg registers] |
2289 |
2290 [frame] |
2291 |
2292 [padding2] | = to_allocate
2293 <- STACK_POINTER
2294 */
2295 struct ix86_frame
2296 {
2297 int nsseregs;
2298 int nregs;
2299 int va_arg_size;
2300 int red_zone_size;
2301 int outgoing_arguments_size;
2302
2303 /* The offsets relative to ARG_POINTER. */
2304 HOST_WIDE_INT frame_pointer_offset;
2305 HOST_WIDE_INT hard_frame_pointer_offset;
2306 HOST_WIDE_INT stack_pointer_offset;
2307 HOST_WIDE_INT hfp_save_offset;
2308 HOST_WIDE_INT reg_save_offset;
2309 HOST_WIDE_INT sse_reg_save_offset;
2310
2311 /* When save_regs_using_mov is set, emit prologue using
2312 move instead of push instructions. */
2313 bool save_regs_using_mov;
2314 };
2315
2316 /* Which cpu are we scheduling for. */
2317 enum attr_cpu ix86_schedule;
2318
2319 /* Which cpu are we optimizing for. */
2320 enum processor_type ix86_tune;
2321
2322 /* Which instruction set architecture to use. */
2323 enum processor_type ix86_arch;
2324
2325 /* True if processor has SSE prefetch instruction. */
2326 unsigned char x86_prefetch_sse;
2327
2328 /* -mstackrealign option */
2329 static const char ix86_force_align_arg_pointer_string[]
2330 = "force_align_arg_pointer";
2331
2332 static rtx (*ix86_gen_leave) (void);
2333 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2334 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2335 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2336 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2337 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2338 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2339 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2340 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2342 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2343 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2344
2345 /* Preferred alignment for stack boundary in bits. */
2346 unsigned int ix86_preferred_stack_boundary;
2347
2348 /* Alignment for incoming stack boundary in bits specified at
2349 command line. */
2350 static unsigned int ix86_user_incoming_stack_boundary;
2351
2352 /* Default alignment for incoming stack boundary in bits. */
2353 static unsigned int ix86_default_incoming_stack_boundary;
2354
2355 /* Alignment for incoming stack boundary in bits. */
2356 unsigned int ix86_incoming_stack_boundary;
2357
2358 /* Calling abi specific va_list type nodes. */
2359 static GTY(()) tree sysv_va_list_type_node;
2360 static GTY(()) tree ms_va_list_type_node;
2361
2362 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2363 char internal_label_prefix[16];
2364 int internal_label_prefix_len;
2365
2366 /* Fence to use after loop using movnt. */
2367 tree x86_mfence;
2368
2369 /* Register class used for passing given 64bit part of the argument.
2370 These represent classes as documented by the PS ABI, with the exception
2371 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2372 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2373
2374 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2375 whenever possible (upper half does contain padding). */
2376 enum x86_64_reg_class
2377 {
2378 X86_64_NO_CLASS,
2379 X86_64_INTEGER_CLASS,
2380 X86_64_INTEGERSI_CLASS,
2381 X86_64_SSE_CLASS,
2382 X86_64_SSESF_CLASS,
2383 X86_64_SSEDF_CLASS,
2384 X86_64_SSEUP_CLASS,
2385 X86_64_X87_CLASS,
2386 X86_64_X87UP_CLASS,
2387 X86_64_COMPLEX_X87_CLASS,
2388 X86_64_MEMORY_CLASS
2389 };
2390
2391 #define MAX_CLASSES 8
2392
2393 /* Table of constants used by fldpi, fldln2, etc.... */
2394 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2395 static bool ext_80387_constants_init = 0;
2396
2397 \f
2398 static struct machine_function * ix86_init_machine_status (void);
2399 static rtx ix86_function_value (const_tree, const_tree, bool);
2400 static bool ix86_function_value_regno_p (const unsigned int);
2401 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2402 const_tree);
2403 static rtx ix86_static_chain (const_tree, bool);
2404 static int ix86_function_regparm (const_tree, const_tree);
2405 static void ix86_compute_frame_layout (struct ix86_frame *);
2406 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2407 rtx, rtx, int);
2408 static void ix86_add_new_builtins (HOST_WIDE_INT);
2409 static tree ix86_canonical_va_list_type (tree);
2410 static void predict_jump (int);
2411 static unsigned int split_stack_prologue_scratch_regno (void);
2412 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2413
2414 enum ix86_function_specific_strings
2415 {
2416 IX86_FUNCTION_SPECIFIC_ARCH,
2417 IX86_FUNCTION_SPECIFIC_TUNE,
2418 IX86_FUNCTION_SPECIFIC_MAX
2419 };
2420
2421 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2422 const char *, enum fpmath_unit, bool);
2423 static void ix86_function_specific_save (struct cl_target_option *,
2424 struct gcc_options *opts);
2425 static void ix86_function_specific_restore (struct gcc_options *opts,
2426 struct cl_target_option *);
2427 static void ix86_function_specific_print (FILE *, int,
2428 struct cl_target_option *);
2429 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2430 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2431 struct gcc_options *,
2432 struct gcc_options *,
2433 struct gcc_options *);
2434 static bool ix86_can_inline_p (tree, tree);
2435 static void ix86_set_current_function (tree);
2436 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2437
2438 static enum calling_abi ix86_function_abi (const_tree);
2439
2440 \f
2441 #ifndef SUBTARGET32_DEFAULT_CPU
2442 #define SUBTARGET32_DEFAULT_CPU "i386"
2443 #endif
2444
2445 /* Whether -mtune= or -march= were specified */
2446 static int ix86_tune_defaulted;
2447 static int ix86_arch_specified;
2448
2449 /* Vectorization library interface and handlers. */
2450 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2451
2452 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2453 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2454
2455 /* Processor target table, indexed by processor number */
2456 struct ptt
2457 {
2458 const char *const name; /* processor name */
2459 const struct processor_costs *cost; /* Processor costs */
2460 const int align_loop; /* Default alignments. */
2461 const int align_loop_max_skip;
2462 const int align_jump;
2463 const int align_jump_max_skip;
2464 const int align_func;
2465 };
2466
2467 /* This table must be in sync with enum processor_type in i386.h. */
2468 static const struct ptt processor_target_table[PROCESSOR_max] =
2469 {
2470 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2471 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2472 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2473 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2474 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2475 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2476 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2477 {"core2", &core_cost, 16, 10, 16, 10, 16},
2478 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2479 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2480 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2481 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2482 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2483 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2484 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2485 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2486 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2487 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2488 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2489 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2490 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2491 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2492 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2493 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2494 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2495 };
2496 \f
2497 static unsigned int
2498 rest_of_handle_insert_vzeroupper (void)
2499 {
2500 int i;
2501
2502 /* vzeroupper instructions are inserted immediately after reload to
2503 account for possible spills from 256bit registers. The pass
2504 reuses mode switching infrastructure by re-running mode insertion
2505 pass, so disable entities that have already been processed. */
2506 for (i = 0; i < MAX_386_ENTITIES; i++)
2507 ix86_optimize_mode_switching[i] = 0;
2508
2509 ix86_optimize_mode_switching[AVX_U128] = 1;
2510
2511 /* Call optimize_mode_switching. */
2512 g->get_passes ()->execute_pass_mode_switching ();
2513 return 0;
2514 }
2515
2516 namespace {
2517
2518 const pass_data pass_data_insert_vzeroupper =
2519 {
2520 RTL_PASS, /* type */
2521 "vzeroupper", /* name */
2522 OPTGROUP_NONE, /* optinfo_flags */
2523 true, /* has_execute */
2524 TV_NONE, /* tv_id */
2525 0, /* properties_required */
2526 0, /* properties_provided */
2527 0, /* properties_destroyed */
2528 0, /* todo_flags_start */
2529 ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */
2530 };
2531
2532 class pass_insert_vzeroupper : public rtl_opt_pass
2533 {
2534 public:
2535 pass_insert_vzeroupper(gcc::context *ctxt)
2536 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2537 {}
2538
2539 /* opt_pass methods: */
2540 virtual bool gate (function *)
2541 {
2542 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2543 }
2544
2545 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2546
2547 }; // class pass_insert_vzeroupper
2548
2549 } // anon namespace
2550
2551 rtl_opt_pass *
2552 make_pass_insert_vzeroupper (gcc::context *ctxt)
2553 {
2554 return new pass_insert_vzeroupper (ctxt);
2555 }
2556
2557 /* Return true if a red-zone is in use. */
2558
2559 static inline bool
2560 ix86_using_red_zone (void)
2561 {
2562 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2563 }
2564 \f
2565 /* Return a string that documents the current -m options. The caller is
2566 responsible for freeing the string. */
2567
2568 static char *
2569 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2570 const char *tune, enum fpmath_unit fpmath,
2571 bool add_nl_p)
2572 {
2573 struct ix86_target_opts
2574 {
2575 const char *option; /* option string */
2576 HOST_WIDE_INT mask; /* isa mask options */
2577 };
2578
2579 /* This table is ordered so that options like -msse4.2 that imply
2580 preceding options while match those first. */
2581 static struct ix86_target_opts isa_opts[] =
2582 {
2583 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2584 { "-mfma", OPTION_MASK_ISA_FMA },
2585 { "-mxop", OPTION_MASK_ISA_XOP },
2586 { "-mlwp", OPTION_MASK_ISA_LWP },
2587 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2588 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2589 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2590 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2591 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2592 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2593 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2594 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2595 { "-msse3", OPTION_MASK_ISA_SSE3 },
2596 { "-msse2", OPTION_MASK_ISA_SSE2 },
2597 { "-msse", OPTION_MASK_ISA_SSE },
2598 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2599 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2600 { "-mmmx", OPTION_MASK_ISA_MMX },
2601 { "-mabm", OPTION_MASK_ISA_ABM },
2602 { "-mbmi", OPTION_MASK_ISA_BMI },
2603 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2604 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2605 { "-mhle", OPTION_MASK_ISA_HLE },
2606 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2607 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2608 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2609 { "-madx", OPTION_MASK_ISA_ADX },
2610 { "-mtbm", OPTION_MASK_ISA_TBM },
2611 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2612 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2613 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2614 { "-maes", OPTION_MASK_ISA_AES },
2615 { "-msha", OPTION_MASK_ISA_SHA },
2616 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2617 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2618 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2619 { "-mf16c", OPTION_MASK_ISA_F16C },
2620 { "-mrtm", OPTION_MASK_ISA_RTM },
2621 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2622 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2623 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2624 };
2625
2626 /* Flag options. */
2627 static struct ix86_target_opts flag_opts[] =
2628 {
2629 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2630 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2631 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2632 { "-m80387", MASK_80387 },
2633 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2634 { "-malign-double", MASK_ALIGN_DOUBLE },
2635 { "-mcld", MASK_CLD },
2636 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2637 { "-mieee-fp", MASK_IEEE_FP },
2638 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2639 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2640 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2641 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2642 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2643 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2644 { "-mno-red-zone", MASK_NO_RED_ZONE },
2645 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2646 { "-mrecip", MASK_RECIP },
2647 { "-mrtd", MASK_RTD },
2648 { "-msseregparm", MASK_SSEREGPARM },
2649 { "-mstack-arg-probe", MASK_STACK_PROBE },
2650 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2651 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2652 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2653 { "-mvzeroupper", MASK_VZEROUPPER },
2654 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2655 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2656 { "-mprefer-avx128", MASK_PREFER_AVX128},
2657 };
2658
2659 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2660
2661 char isa_other[40];
2662 char target_other[40];
2663 unsigned num = 0;
2664 unsigned i, j;
2665 char *ret;
2666 char *ptr;
2667 size_t len;
2668 size_t line_len;
2669 size_t sep_len;
2670 const char *abi;
2671
2672 memset (opts, '\0', sizeof (opts));
2673
2674 /* Add -march= option. */
2675 if (arch)
2676 {
2677 opts[num][0] = "-march=";
2678 opts[num++][1] = arch;
2679 }
2680
2681 /* Add -mtune= option. */
2682 if (tune)
2683 {
2684 opts[num][0] = "-mtune=";
2685 opts[num++][1] = tune;
2686 }
2687
2688 /* Add -m32/-m64/-mx32. */
2689 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2690 {
2691 if ((isa & OPTION_MASK_ABI_64) != 0)
2692 abi = "-m64";
2693 else
2694 abi = "-mx32";
2695 isa &= ~ (OPTION_MASK_ISA_64BIT
2696 | OPTION_MASK_ABI_64
2697 | OPTION_MASK_ABI_X32);
2698 }
2699 else
2700 abi = "-m32";
2701 opts[num++][0] = abi;
2702
2703 /* Pick out the options in isa options. */
2704 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2705 {
2706 if ((isa & isa_opts[i].mask) != 0)
2707 {
2708 opts[num++][0] = isa_opts[i].option;
2709 isa &= ~ isa_opts[i].mask;
2710 }
2711 }
2712
2713 if (isa && add_nl_p)
2714 {
2715 opts[num++][0] = isa_other;
2716 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2717 isa);
2718 }
2719
2720 /* Add flag options. */
2721 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2722 {
2723 if ((flags & flag_opts[i].mask) != 0)
2724 {
2725 opts[num++][0] = flag_opts[i].option;
2726 flags &= ~ flag_opts[i].mask;
2727 }
2728 }
2729
2730 if (flags && add_nl_p)
2731 {
2732 opts[num++][0] = target_other;
2733 sprintf (target_other, "(other flags: %#x)", flags);
2734 }
2735
2736 /* Add -fpmath= option. */
2737 if (fpmath)
2738 {
2739 opts[num][0] = "-mfpmath=";
2740 switch ((int) fpmath)
2741 {
2742 case FPMATH_387:
2743 opts[num++][1] = "387";
2744 break;
2745
2746 case FPMATH_SSE:
2747 opts[num++][1] = "sse";
2748 break;
2749
2750 case FPMATH_387 | FPMATH_SSE:
2751 opts[num++][1] = "sse+387";
2752 break;
2753
2754 default:
2755 gcc_unreachable ();
2756 }
2757 }
2758
2759 /* Any options? */
2760 if (num == 0)
2761 return NULL;
2762
2763 gcc_assert (num < ARRAY_SIZE (opts));
2764
2765 /* Size the string. */
2766 len = 0;
2767 sep_len = (add_nl_p) ? 3 : 1;
2768 for (i = 0; i < num; i++)
2769 {
2770 len += sep_len;
2771 for (j = 0; j < 2; j++)
2772 if (opts[i][j])
2773 len += strlen (opts[i][j]);
2774 }
2775
2776 /* Build the string. */
2777 ret = ptr = (char *) xmalloc (len);
2778 line_len = 0;
2779
2780 for (i = 0; i < num; i++)
2781 {
2782 size_t len2[2];
2783
2784 for (j = 0; j < 2; j++)
2785 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2786
2787 if (i != 0)
2788 {
2789 *ptr++ = ' ';
2790 line_len++;
2791
2792 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2793 {
2794 *ptr++ = '\\';
2795 *ptr++ = '\n';
2796 line_len = 0;
2797 }
2798 }
2799
2800 for (j = 0; j < 2; j++)
2801 if (opts[i][j])
2802 {
2803 memcpy (ptr, opts[i][j], len2[j]);
2804 ptr += len2[j];
2805 line_len += len2[j];
2806 }
2807 }
2808
2809 *ptr = '\0';
2810 gcc_assert (ret + len >= ptr);
2811
2812 return ret;
2813 }
2814
2815 /* Return true, if profiling code should be emitted before
2816 prologue. Otherwise it returns false.
2817 Note: For x86 with "hotfix" it is sorried. */
2818 static bool
2819 ix86_profile_before_prologue (void)
2820 {
2821 return flag_fentry != 0;
2822 }
2823
2824 /* Function that is callable from the debugger to print the current
2825 options. */
2826 void ATTRIBUTE_UNUSED
2827 ix86_debug_options (void)
2828 {
2829 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2830 ix86_arch_string, ix86_tune_string,
2831 ix86_fpmath, true);
2832
2833 if (opts)
2834 {
2835 fprintf (stderr, "%s\n\n", opts);
2836 free (opts);
2837 }
2838 else
2839 fputs ("<no options>\n\n", stderr);
2840
2841 return;
2842 }
2843
2844 static const char *stringop_alg_names[] = {
2845 #define DEF_ENUM
2846 #define DEF_ALG(alg, name) #name,
2847 #include "stringop.def"
2848 #undef DEF_ENUM
2849 #undef DEF_ALG
2850 };
2851
2852 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2853 The string is of the following form (or comma separated list of it):
2854
2855 strategy_alg:max_size:[align|noalign]
2856
2857 where the full size range for the strategy is either [0, max_size] or
2858 [min_size, max_size], in which min_size is the max_size + 1 of the
2859 preceding range. The last size range must have max_size == -1.
2860
2861 Examples:
2862
2863 1.
2864 -mmemcpy-strategy=libcall:-1:noalign
2865
2866 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2867
2868
2869 2.
2870 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2871
2872 This is to tell the compiler to use the following strategy for memset
2873 1) when the expected size is between [1, 16], use rep_8byte strategy;
2874 2) when the size is between [17, 2048], use vector_loop;
2875 3) when the size is > 2048, use libcall. */
2876
2877 struct stringop_size_range
2878 {
2879 int max;
2880 stringop_alg alg;
2881 bool noalign;
2882 };
2883
2884 static void
2885 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2886 {
2887 const struct stringop_algs *default_algs;
2888 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2889 char *curr_range_str, *next_range_str;
2890 int i = 0, n = 0;
2891
2892 if (is_memset)
2893 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2894 else
2895 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2896
2897 curr_range_str = strategy_str;
2898
2899 do
2900 {
2901 int maxs;
2902 char alg_name[128];
2903 char align[16];
2904 next_range_str = strchr (curr_range_str, ',');
2905 if (next_range_str)
2906 *next_range_str++ = '\0';
2907
2908 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2909 alg_name, &maxs, align))
2910 {
2911 error ("wrong arg %s to option %s", curr_range_str,
2912 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2913 return;
2914 }
2915
2916 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2917 {
2918 error ("size ranges of option %s should be increasing",
2919 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2920 return;
2921 }
2922
2923 for (i = 0; i < last_alg; i++)
2924 if (!strcmp (alg_name, stringop_alg_names[i]))
2925 break;
2926
2927 if (i == last_alg)
2928 {
2929 error ("wrong stringop strategy name %s specified for option %s",
2930 alg_name,
2931 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2932 return;
2933 }
2934
2935 input_ranges[n].max = maxs;
2936 input_ranges[n].alg = (stringop_alg) i;
2937 if (!strcmp (align, "align"))
2938 input_ranges[n].noalign = false;
2939 else if (!strcmp (align, "noalign"))
2940 input_ranges[n].noalign = true;
2941 else
2942 {
2943 error ("unknown alignment %s specified for option %s",
2944 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2945 return;
2946 }
2947 n++;
2948 curr_range_str = next_range_str;
2949 }
2950 while (curr_range_str);
2951
2952 if (input_ranges[n - 1].max != -1)
2953 {
2954 error ("the max value for the last size range should be -1"
2955 " for option %s",
2956 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2957 return;
2958 }
2959
2960 if (n > MAX_STRINGOP_ALGS)
2961 {
2962 error ("too many size ranges specified in option %s",
2963 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2964 return;
2965 }
2966
2967 /* Now override the default algs array. */
2968 for (i = 0; i < n; i++)
2969 {
2970 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2971 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2972 = input_ranges[i].alg;
2973 *const_cast<int *>(&default_algs->size[i].noalign)
2974 = input_ranges[i].noalign;
2975 }
2976 }
2977
2978 \f
2979 /* parse -mtune-ctrl= option. When DUMP is true,
2980 print the features that are explicitly set. */
2981
2982 static void
2983 parse_mtune_ctrl_str (bool dump)
2984 {
2985 if (!ix86_tune_ctrl_string)
2986 return;
2987
2988 char *next_feature_string = NULL;
2989 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2990 char *orig = curr_feature_string;
2991 int i;
2992 do
2993 {
2994 bool clear = false;
2995
2996 next_feature_string = strchr (curr_feature_string, ',');
2997 if (next_feature_string)
2998 *next_feature_string++ = '\0';
2999 if (*curr_feature_string == '^')
3000 {
3001 curr_feature_string++;
3002 clear = true;
3003 }
3004 for (i = 0; i < X86_TUNE_LAST; i++)
3005 {
3006 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3007 {
3008 ix86_tune_features[i] = !clear;
3009 if (dump)
3010 fprintf (stderr, "Explicitly %s feature %s\n",
3011 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3012 break;
3013 }
3014 }
3015 if (i == X86_TUNE_LAST)
3016 error ("Unknown parameter to option -mtune-ctrl: %s",
3017 clear ? curr_feature_string - 1 : curr_feature_string);
3018 curr_feature_string = next_feature_string;
3019 }
3020 while (curr_feature_string);
3021 free (orig);
3022 }
3023
3024 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3025 processor type. */
3026
3027 static void
3028 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3029 {
3030 unsigned int ix86_tune_mask = 1u << ix86_tune;
3031 int i;
3032
3033 for (i = 0; i < X86_TUNE_LAST; ++i)
3034 {
3035 if (ix86_tune_no_default)
3036 ix86_tune_features[i] = 0;
3037 else
3038 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3039 }
3040
3041 if (dump)
3042 {
3043 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3044 for (i = 0; i < X86_TUNE_LAST; i++)
3045 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3046 ix86_tune_features[i] ? "on" : "off");
3047 }
3048
3049 parse_mtune_ctrl_str (dump);
3050 }
3051
3052
3053 /* Override various settings based on options. If MAIN_ARGS_P, the
3054 options are from the command line, otherwise they are from
3055 attributes. */
3056
3057 static void
3058 ix86_option_override_internal (bool main_args_p,
3059 struct gcc_options *opts,
3060 struct gcc_options *opts_set)
3061 {
3062 int i;
3063 unsigned int ix86_arch_mask;
3064 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3065 const char *prefix;
3066 const char *suffix;
3067 const char *sw;
3068
3069 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3070 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3071 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3072 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3073 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3074 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3075 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3076 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3077 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3078 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3079 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3080 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3081 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3082 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3083 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3084 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3085 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3086 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3087 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3088 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3089 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3090 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3091 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3092 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3093 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3094 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3095 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3096 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3097 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3098 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3099 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3100 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3101 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3102 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3103 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3104 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3105 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3106 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3107 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3108 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3109 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3110 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3111 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3112 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3113 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3114 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3115
3116 #define PTA_CORE2 \
3117 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3118 | PTA_CX16 | PTA_FXSR)
3119 #define PTA_NEHALEM \
3120 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3121 #define PTA_WESTMERE \
3122 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3123 #define PTA_SANDYBRIDGE \
3124 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3125 #define PTA_IVYBRIDGE \
3126 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3127 #define PTA_HASWELL \
3128 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3129 | PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE)
3130 #define PTA_BROADWELL \
3131 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3132 #define PTA_BONNELL \
3133 (PTA_CORE2 | PTA_MOVBE)
3134 #define PTA_SILVERMONT \
3135 (PTA_WESTMERE | PTA_MOVBE)
3136
3137 /* if this reaches 64, need to widen struct pta flags below */
3138
3139 static struct pta
3140 {
3141 const char *const name; /* processor name or nickname. */
3142 const enum processor_type processor;
3143 const enum attr_cpu schedule;
3144 const unsigned HOST_WIDE_INT flags;
3145 }
3146 const processor_alias_table[] =
3147 {
3148 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3149 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3150 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3151 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3152 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3153 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3154 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3155 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3156 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3157 PTA_MMX | PTA_SSE | PTA_FXSR},
3158 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3159 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3160 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3161 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3162 PTA_MMX | PTA_SSE | PTA_FXSR},
3163 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3164 PTA_MMX | PTA_SSE | PTA_FXSR},
3165 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3166 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3167 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3168 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3169 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3170 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3171 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3172 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3173 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3174 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3175 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3176 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3177 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3178 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3179 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3180 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3181 PTA_SANDYBRIDGE},
3182 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3183 PTA_SANDYBRIDGE},
3184 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3185 PTA_IVYBRIDGE},
3186 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3187 PTA_IVYBRIDGE},
3188 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3189 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3190 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3191 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3192 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3193 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3194 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3195 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3196 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3197 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3198 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3199 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3200 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3201 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3202 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3203 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3204 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3205 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3206 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3207 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3208 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3209 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3210 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3211 {"x86-64", PROCESSOR_K8, CPU_K8,
3212 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3213 {"k8", PROCESSOR_K8, CPU_K8,
3214 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3215 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3216 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3217 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3218 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3219 {"opteron", PROCESSOR_K8, CPU_K8,
3220 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3221 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3222 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3223 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3224 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3225 {"athlon64", PROCESSOR_K8, CPU_K8,
3226 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3227 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3228 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3229 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3230 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3231 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3232 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3233 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3234 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3235 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3236 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3237 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3238 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3239 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3240 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3241 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3242 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3243 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3244 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3245 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3246 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3247 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3248 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3249 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3250 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3251 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3252 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3253 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3254 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3255 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3256 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3257 | PTA_XSAVEOPT | PTA_FSGSBASE},
3258 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3259 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3260 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3261 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3262 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3263 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3264 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
3265 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3266 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3267 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3268 | PTA_FXSR | PTA_XSAVE},
3269 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3270 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3271 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3272 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3273 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3274 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3275
3276 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3277 PTA_64BIT
3278 | PTA_HLE /* flags are only used for -march switch. */ },
3279 };
3280
3281 /* -mrecip options. */
3282 static struct
3283 {
3284 const char *string; /* option name */
3285 unsigned int mask; /* mask bits to set */
3286 }
3287 const recip_options[] =
3288 {
3289 { "all", RECIP_MASK_ALL },
3290 { "none", RECIP_MASK_NONE },
3291 { "div", RECIP_MASK_DIV },
3292 { "sqrt", RECIP_MASK_SQRT },
3293 { "vec-div", RECIP_MASK_VEC_DIV },
3294 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3295 };
3296
3297 int const pta_size = ARRAY_SIZE (processor_alias_table);
3298
3299 /* Set up prefix/suffix so the error messages refer to either the command
3300 line argument, or the attribute(target). */
3301 if (main_args_p)
3302 {
3303 prefix = "-m";
3304 suffix = "";
3305 sw = "switch";
3306 }
3307 else
3308 {
3309 prefix = "option(\"";
3310 suffix = "\")";
3311 sw = "attribute";
3312 }
3313
3314 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3315 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3316 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3317 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3318 #ifdef TARGET_BI_ARCH
3319 else
3320 {
3321 #if TARGET_BI_ARCH == 1
3322 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3323 is on and OPTION_MASK_ABI_X32 is off. We turn off
3324 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3325 -mx32. */
3326 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3327 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3328 #else
3329 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3330 on and OPTION_MASK_ABI_64 is off. We turn off
3331 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3332 -m64. */
3333 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3334 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3335 #endif
3336 }
3337 #endif
3338
3339 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3340 {
3341 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3342 OPTION_MASK_ABI_64 for TARGET_X32. */
3343 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3344 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3345 }
3346 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3347 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3348 | OPTION_MASK_ABI_X32
3349 | OPTION_MASK_ABI_64);
3350 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3351 {
3352 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3353 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3354 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3355 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3356 }
3357
3358 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3359 SUBTARGET_OVERRIDE_OPTIONS;
3360 #endif
3361
3362 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3363 SUBSUBTARGET_OVERRIDE_OPTIONS;
3364 #endif
3365
3366 /* -fPIC is the default for x86_64. */
3367 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3368 opts->x_flag_pic = 2;
3369
3370 /* Need to check -mtune=generic first. */
3371 if (opts->x_ix86_tune_string)
3372 {
3373 /* As special support for cross compilers we read -mtune=native
3374 as -mtune=generic. With native compilers we won't see the
3375 -mtune=native, as it was changed by the driver. */
3376 if (!strcmp (opts->x_ix86_tune_string, "native"))
3377 {
3378 opts->x_ix86_tune_string = "generic";
3379 }
3380 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3381 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3382 "%stune=k8%s or %stune=generic%s instead as appropriate",
3383 prefix, suffix, prefix, suffix, prefix, suffix);
3384 }
3385 else
3386 {
3387 if (opts->x_ix86_arch_string)
3388 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3389 if (!opts->x_ix86_tune_string)
3390 {
3391 opts->x_ix86_tune_string
3392 = processor_target_table[TARGET_CPU_DEFAULT].name;
3393 ix86_tune_defaulted = 1;
3394 }
3395
3396 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3397 or defaulted. We need to use a sensible tune option. */
3398 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3399 {
3400 opts->x_ix86_tune_string = "generic";
3401 }
3402 }
3403
3404 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3405 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3406 {
3407 /* rep; movq isn't available in 32-bit code. */
3408 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3409 opts->x_ix86_stringop_alg = no_stringop;
3410 }
3411
3412 if (!opts->x_ix86_arch_string)
3413 opts->x_ix86_arch_string
3414 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3415 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3416 else
3417 ix86_arch_specified = 1;
3418
3419 if (opts_set->x_ix86_pmode)
3420 {
3421 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3422 && opts->x_ix86_pmode == PMODE_SI)
3423 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3424 && opts->x_ix86_pmode == PMODE_DI))
3425 error ("address mode %qs not supported in the %s bit mode",
3426 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3427 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3428 }
3429 else
3430 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3431 ? PMODE_DI : PMODE_SI;
3432
3433 if (!opts_set->x_ix86_abi)
3434 opts->x_ix86_abi = DEFAULT_ABI;
3435
3436 /* For targets using ms ABI enable ms-extensions, if not
3437 explicit turned off. For non-ms ABI we turn off this
3438 option. */
3439 if (!opts_set->x_flag_ms_extensions)
3440 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3441
3442 if (opts_set->x_ix86_cmodel)
3443 {
3444 switch (opts->x_ix86_cmodel)
3445 {
3446 case CM_SMALL:
3447 case CM_SMALL_PIC:
3448 if (opts->x_flag_pic)
3449 opts->x_ix86_cmodel = CM_SMALL_PIC;
3450 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3451 error ("code model %qs not supported in the %s bit mode",
3452 "small", "32");
3453 break;
3454
3455 case CM_MEDIUM:
3456 case CM_MEDIUM_PIC:
3457 if (opts->x_flag_pic)
3458 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3459 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3460 error ("code model %qs not supported in the %s bit mode",
3461 "medium", "32");
3462 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3463 error ("code model %qs not supported in x32 mode",
3464 "medium");
3465 break;
3466
3467 case CM_LARGE:
3468 case CM_LARGE_PIC:
3469 if (opts->x_flag_pic)
3470 opts->x_ix86_cmodel = CM_LARGE_PIC;
3471 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3472 error ("code model %qs not supported in the %s bit mode",
3473 "large", "32");
3474 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3475 error ("code model %qs not supported in x32 mode",
3476 "large");
3477 break;
3478
3479 case CM_32:
3480 if (opts->x_flag_pic)
3481 error ("code model %s does not support PIC mode", "32");
3482 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3483 error ("code model %qs not supported in the %s bit mode",
3484 "32", "64");
3485 break;
3486
3487 case CM_KERNEL:
3488 if (opts->x_flag_pic)
3489 {
3490 error ("code model %s does not support PIC mode", "kernel");
3491 opts->x_ix86_cmodel = CM_32;
3492 }
3493 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3494 error ("code model %qs not supported in the %s bit mode",
3495 "kernel", "32");
3496 break;
3497
3498 default:
3499 gcc_unreachable ();
3500 }
3501 }
3502 else
3503 {
3504 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3505 use of rip-relative addressing. This eliminates fixups that
3506 would otherwise be needed if this object is to be placed in a
3507 DLL, and is essentially just as efficient as direct addressing. */
3508 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3509 && (TARGET_RDOS || TARGET_PECOFF))
3510 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3511 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3512 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3513 else
3514 opts->x_ix86_cmodel = CM_32;
3515 }
3516 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3517 {
3518 error ("-masm=intel not supported in this configuration");
3519 opts->x_ix86_asm_dialect = ASM_ATT;
3520 }
3521 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3522 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3523 sorry ("%i-bit mode not compiled in",
3524 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3525
3526 for (i = 0; i < pta_size; i++)
3527 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3528 {
3529 ix86_schedule = processor_alias_table[i].schedule;
3530 ix86_arch = processor_alias_table[i].processor;
3531 /* Default cpu tuning to the architecture. */
3532 ix86_tune = ix86_arch;
3533
3534 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3535 && !(processor_alias_table[i].flags & PTA_64BIT))
3536 error ("CPU you selected does not support x86-64 "
3537 "instruction set");
3538
3539 if (processor_alias_table[i].flags & PTA_MMX
3540 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3541 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3542 if (processor_alias_table[i].flags & PTA_3DNOW
3543 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3544 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3545 if (processor_alias_table[i].flags & PTA_3DNOW_A
3546 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3547 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3548 if (processor_alias_table[i].flags & PTA_SSE
3549 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3550 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3551 if (processor_alias_table[i].flags & PTA_SSE2
3552 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3553 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3554 if (processor_alias_table[i].flags & PTA_SSE3
3555 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3556 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3557 if (processor_alias_table[i].flags & PTA_SSSE3
3558 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3559 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3560 if (processor_alias_table[i].flags & PTA_SSE4_1
3561 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3562 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3563 if (processor_alias_table[i].flags & PTA_SSE4_2
3564 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3565 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3566 if (processor_alias_table[i].flags & PTA_AVX
3567 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3568 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3569 if (processor_alias_table[i].flags & PTA_AVX2
3570 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3571 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3572 if (processor_alias_table[i].flags & PTA_FMA
3573 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3574 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3575 if (processor_alias_table[i].flags & PTA_SSE4A
3576 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3577 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3578 if (processor_alias_table[i].flags & PTA_FMA4
3579 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3581 if (processor_alias_table[i].flags & PTA_XOP
3582 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3583 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3584 if (processor_alias_table[i].flags & PTA_LWP
3585 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3586 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3587 if (processor_alias_table[i].flags & PTA_ABM
3588 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3589 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3590 if (processor_alias_table[i].flags & PTA_BMI
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3593 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3596 if (processor_alias_table[i].flags & PTA_TBM
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3599 if (processor_alias_table[i].flags & PTA_BMI2
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3602 if (processor_alias_table[i].flags & PTA_CX16
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3605 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3608 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3609 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3610 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3611 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3612 if (processor_alias_table[i].flags & PTA_MOVBE
3613 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3614 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3615 if (processor_alias_table[i].flags & PTA_AES
3616 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3617 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3618 if (processor_alias_table[i].flags & PTA_SHA
3619 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3620 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3621 if (processor_alias_table[i].flags & PTA_PCLMUL
3622 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3623 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3624 if (processor_alias_table[i].flags & PTA_FSGSBASE
3625 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3626 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3627 if (processor_alias_table[i].flags & PTA_RDRND
3628 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3629 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3630 if (processor_alias_table[i].flags & PTA_F16C
3631 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3632 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3633 if (processor_alias_table[i].flags & PTA_RTM
3634 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3635 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3636 if (processor_alias_table[i].flags & PTA_HLE
3637 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3638 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3639 if (processor_alias_table[i].flags & PTA_PRFCHW
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3642 if (processor_alias_table[i].flags & PTA_RDSEED
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3645 if (processor_alias_table[i].flags & PTA_ADX
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3648 if (processor_alias_table[i].flags & PTA_FXSR
3649 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3650 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3651 if (processor_alias_table[i].flags & PTA_XSAVE
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3654 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3657 if (processor_alias_table[i].flags & PTA_AVX512F
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3660 if (processor_alias_table[i].flags & PTA_AVX512ER
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3663 if (processor_alias_table[i].flags & PTA_AVX512PF
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3666 if (processor_alias_table[i].flags & PTA_AVX512CD
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3669 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3670 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3671 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3672 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3673 x86_prefetch_sse = true;
3674
3675 break;
3676 }
3677
3678 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3679 error ("generic CPU can be used only for %stune=%s %s",
3680 prefix, suffix, sw);
3681 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3682 error ("intel CPU can be used only for %stune=%s %s",
3683 prefix, suffix, sw);
3684 else if (i == pta_size)
3685 error ("bad value (%s) for %sarch=%s %s",
3686 opts->x_ix86_arch_string, prefix, suffix, sw);
3687
3688 ix86_arch_mask = 1u << ix86_arch;
3689 for (i = 0; i < X86_ARCH_LAST; ++i)
3690 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3691
3692 for (i = 0; i < pta_size; i++)
3693 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3694 {
3695 ix86_schedule = processor_alias_table[i].schedule;
3696 ix86_tune = processor_alias_table[i].processor;
3697 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3698 {
3699 if (!(processor_alias_table[i].flags & PTA_64BIT))
3700 {
3701 if (ix86_tune_defaulted)
3702 {
3703 opts->x_ix86_tune_string = "x86-64";
3704 for (i = 0; i < pta_size; i++)
3705 if (! strcmp (opts->x_ix86_tune_string,
3706 processor_alias_table[i].name))
3707 break;
3708 ix86_schedule = processor_alias_table[i].schedule;
3709 ix86_tune = processor_alias_table[i].processor;
3710 }
3711 else
3712 error ("CPU you selected does not support x86-64 "
3713 "instruction set");
3714 }
3715 }
3716 /* Intel CPUs have always interpreted SSE prefetch instructions as
3717 NOPs; so, we can enable SSE prefetch instructions even when
3718 -mtune (rather than -march) points us to a processor that has them.
3719 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3720 higher processors. */
3721 if (TARGET_CMOV
3722 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3723 x86_prefetch_sse = true;
3724 break;
3725 }
3726
3727 if (ix86_tune_specified && i == pta_size)
3728 error ("bad value (%s) for %stune=%s %s",
3729 opts->x_ix86_tune_string, prefix, suffix, sw);
3730
3731 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3732
3733 #ifndef USE_IX86_FRAME_POINTER
3734 #define USE_IX86_FRAME_POINTER 0
3735 #endif
3736
3737 #ifndef USE_X86_64_FRAME_POINTER
3738 #define USE_X86_64_FRAME_POINTER 0
3739 #endif
3740
3741 /* Set the default values for switches whose default depends on TARGET_64BIT
3742 in case they weren't overwritten by command line options. */
3743 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3744 {
3745 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3746 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3747 if (opts->x_flag_asynchronous_unwind_tables
3748 && !opts_set->x_flag_unwind_tables
3749 && TARGET_64BIT_MS_ABI)
3750 opts->x_flag_unwind_tables = 1;
3751 if (opts->x_flag_asynchronous_unwind_tables == 2)
3752 opts->x_flag_unwind_tables
3753 = opts->x_flag_asynchronous_unwind_tables = 1;
3754 if (opts->x_flag_pcc_struct_return == 2)
3755 opts->x_flag_pcc_struct_return = 0;
3756 }
3757 else
3758 {
3759 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3760 opts->x_flag_omit_frame_pointer
3761 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3762 if (opts->x_flag_asynchronous_unwind_tables == 2)
3763 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3764 if (opts->x_flag_pcc_struct_return == 2)
3765 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3766 }
3767
3768 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3769 if (opts->x_optimize_size)
3770 ix86_cost = &ix86_size_cost;
3771 else
3772 ix86_cost = ix86_tune_cost;
3773
3774 /* Arrange to set up i386_stack_locals for all functions. */
3775 init_machine_status = ix86_init_machine_status;
3776
3777 /* Validate -mregparm= value. */
3778 if (opts_set->x_ix86_regparm)
3779 {
3780 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3781 warning (0, "-mregparm is ignored in 64-bit mode");
3782 if (opts->x_ix86_regparm > REGPARM_MAX)
3783 {
3784 error ("-mregparm=%d is not between 0 and %d",
3785 opts->x_ix86_regparm, REGPARM_MAX);
3786 opts->x_ix86_regparm = 0;
3787 }
3788 }
3789 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3790 opts->x_ix86_regparm = REGPARM_MAX;
3791
3792 /* Default align_* from the processor table. */
3793 if (opts->x_align_loops == 0)
3794 {
3795 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3796 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3797 }
3798 if (opts->x_align_jumps == 0)
3799 {
3800 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3801 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3802 }
3803 if (opts->x_align_functions == 0)
3804 {
3805 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3806 }
3807
3808 /* Provide default for -mbranch-cost= value. */
3809 if (!opts_set->x_ix86_branch_cost)
3810 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3811
3812 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3813 {
3814 opts->x_target_flags
3815 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3816
3817 /* Enable by default the SSE and MMX builtins. Do allow the user to
3818 explicitly disable any of these. In particular, disabling SSE and
3819 MMX for kernel code is extremely useful. */
3820 if (!ix86_arch_specified)
3821 opts->x_ix86_isa_flags
3822 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3823 | TARGET_SUBTARGET64_ISA_DEFAULT)
3824 & ~opts->x_ix86_isa_flags_explicit);
3825
3826 if (TARGET_RTD_P (opts->x_target_flags))
3827 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3828 }
3829 else
3830 {
3831 opts->x_target_flags
3832 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3833
3834 if (!ix86_arch_specified)
3835 opts->x_ix86_isa_flags
3836 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3837
3838 /* i386 ABI does not specify red zone. It still makes sense to use it
3839 when programmer takes care to stack from being destroyed. */
3840 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3841 opts->x_target_flags |= MASK_NO_RED_ZONE;
3842 }
3843
3844 /* Keep nonleaf frame pointers. */
3845 if (opts->x_flag_omit_frame_pointer)
3846 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3847 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3848 opts->x_flag_omit_frame_pointer = 1;
3849
3850 /* If we're doing fast math, we don't care about comparison order
3851 wrt NaNs. This lets us use a shorter comparison sequence. */
3852 if (opts->x_flag_finite_math_only)
3853 opts->x_target_flags &= ~MASK_IEEE_FP;
3854
3855 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3856 since the insns won't need emulation. */
3857 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3858 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3859
3860 /* Likewise, if the target doesn't have a 387, or we've specified
3861 software floating point, don't use 387 inline intrinsics. */
3862 if (!TARGET_80387_P (opts->x_target_flags))
3863 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3864
3865 /* Turn on MMX builtins for -msse. */
3866 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3867 opts->x_ix86_isa_flags
3868 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3869
3870 /* Enable SSE prefetch. */
3871 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3872 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3873 x86_prefetch_sse = true;
3874
3875 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3876 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3877 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3878 opts->x_ix86_isa_flags
3879 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3880
3881 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3882 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3883 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3884 opts->x_ix86_isa_flags
3885 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3886
3887 /* Enable lzcnt instruction for -mabm. */
3888 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3889 opts->x_ix86_isa_flags
3890 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3891
3892 /* Validate -mpreferred-stack-boundary= value or default it to
3893 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3894 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3895 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3896 {
3897 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3898 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3899 int max = (TARGET_SEH ? 4 : 12);
3900
3901 if (opts->x_ix86_preferred_stack_boundary_arg < min
3902 || opts->x_ix86_preferred_stack_boundary_arg > max)
3903 {
3904 if (min == max)
3905 error ("-mpreferred-stack-boundary is not supported "
3906 "for this target");
3907 else
3908 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3909 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3910 }
3911 else
3912 ix86_preferred_stack_boundary
3913 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3914 }
3915
3916 /* Set the default value for -mstackrealign. */
3917 if (opts->x_ix86_force_align_arg_pointer == -1)
3918 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3919
3920 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3921
3922 /* Validate -mincoming-stack-boundary= value or default it to
3923 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3924 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3925 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3926 {
3927 if (opts->x_ix86_incoming_stack_boundary_arg
3928 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3929 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3930 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3931 opts->x_ix86_incoming_stack_boundary_arg,
3932 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3933 else
3934 {
3935 ix86_user_incoming_stack_boundary
3936 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3937 ix86_incoming_stack_boundary
3938 = ix86_user_incoming_stack_boundary;
3939 }
3940 }
3941
3942 /* Accept -msseregparm only if at least SSE support is enabled. */
3943 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3944 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3945 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3946
3947 if (opts_set->x_ix86_fpmath)
3948 {
3949 if (opts->x_ix86_fpmath & FPMATH_SSE)
3950 {
3951 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3952 {
3953 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3954 opts->x_ix86_fpmath = FPMATH_387;
3955 }
3956 else if ((opts->x_ix86_fpmath & FPMATH_387)
3957 && !TARGET_80387_P (opts->x_target_flags))
3958 {
3959 warning (0, "387 instruction set disabled, using SSE arithmetics");
3960 opts->x_ix86_fpmath = FPMATH_SSE;
3961 }
3962 }
3963 }
3964 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3965 fpmath=387. The second is however default at many targets since the
3966 extra 80bit precision of temporaries is considered to be part of ABI.
3967 Overwrite the default at least for -ffast-math.
3968 TODO: -mfpmath=both seems to produce same performing code with bit
3969 smaller binaries. It is however not clear if register allocation is
3970 ready for this setting.
3971 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3972 codegen. We may switch to 387 with -ffast-math for size optimized
3973 functions. */
3974 else if (fast_math_flags_set_p (&global_options)
3975 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3976 opts->x_ix86_fpmath = FPMATH_SSE;
3977 else
3978 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
3979
3980 /* If the i387 is disabled, then do not return values in it. */
3981 if (!TARGET_80387_P (opts->x_target_flags))
3982 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
3983
3984 /* Use external vectorized library in vectorizing intrinsics. */
3985 if (opts_set->x_ix86_veclibabi_type)
3986 switch (opts->x_ix86_veclibabi_type)
3987 {
3988 case ix86_veclibabi_type_svml:
3989 ix86_veclib_handler = ix86_veclibabi_svml;
3990 break;
3991
3992 case ix86_veclibabi_type_acml:
3993 ix86_veclib_handler = ix86_veclibabi_acml;
3994 break;
3995
3996 default:
3997 gcc_unreachable ();
3998 }
3999
4000 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4001 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4002 && !opts->x_optimize_size)
4003 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4004
4005 /* If stack probes are required, the space used for large function
4006 arguments on the stack must also be probed, so enable
4007 -maccumulate-outgoing-args so this happens in the prologue. */
4008 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4009 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4010 {
4011 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4012 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4013 "for correctness", prefix, suffix);
4014 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4015 }
4016
4017 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4018 {
4019 char *p;
4020 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4021 p = strchr (internal_label_prefix, 'X');
4022 internal_label_prefix_len = p - internal_label_prefix;
4023 *p = '\0';
4024 }
4025
4026 /* When scheduling description is not available, disable scheduler pass
4027 so it won't slow down the compilation and make x87 code slower. */
4028 if (!TARGET_SCHEDULE)
4029 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4030
4031 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4032 ix86_tune_cost->simultaneous_prefetches,
4033 opts->x_param_values,
4034 opts_set->x_param_values);
4035 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4036 ix86_tune_cost->prefetch_block,
4037 opts->x_param_values,
4038 opts_set->x_param_values);
4039 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4040 ix86_tune_cost->l1_cache_size,
4041 opts->x_param_values,
4042 opts_set->x_param_values);
4043 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4044 ix86_tune_cost->l2_cache_size,
4045 opts->x_param_values,
4046 opts_set->x_param_values);
4047
4048 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4049 if (opts->x_flag_prefetch_loop_arrays < 0
4050 && HAVE_prefetch
4051 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4052 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4053 opts->x_flag_prefetch_loop_arrays = 1;
4054
4055 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4056 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4057 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4058 targetm.expand_builtin_va_start = NULL;
4059
4060 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4061 {
4062 ix86_gen_leave = gen_leave_rex64;
4063 if (Pmode == DImode)
4064 {
4065 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4066 ix86_gen_tls_local_dynamic_base_64
4067 = gen_tls_local_dynamic_base_64_di;
4068 }
4069 else
4070 {
4071 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4072 ix86_gen_tls_local_dynamic_base_64
4073 = gen_tls_local_dynamic_base_64_si;
4074 }
4075 }
4076 else
4077 ix86_gen_leave = gen_leave;
4078
4079 if (Pmode == DImode)
4080 {
4081 ix86_gen_add3 = gen_adddi3;
4082 ix86_gen_sub3 = gen_subdi3;
4083 ix86_gen_sub3_carry = gen_subdi3_carry;
4084 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4085 ix86_gen_andsp = gen_anddi3;
4086 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4087 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4088 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4089 ix86_gen_monitor = gen_sse3_monitor_di;
4090 }
4091 else
4092 {
4093 ix86_gen_add3 = gen_addsi3;
4094 ix86_gen_sub3 = gen_subsi3;
4095 ix86_gen_sub3_carry = gen_subsi3_carry;
4096 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4097 ix86_gen_andsp = gen_andsi3;
4098 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4099 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4100 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4101 ix86_gen_monitor = gen_sse3_monitor_si;
4102 }
4103
4104 #ifdef USE_IX86_CLD
4105 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4106 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4107 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4108 #endif
4109
4110 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4111 {
4112 if (opts->x_flag_fentry > 0)
4113 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4114 "with -fpic");
4115 opts->x_flag_fentry = 0;
4116 }
4117 else if (TARGET_SEH)
4118 {
4119 if (opts->x_flag_fentry == 0)
4120 sorry ("-mno-fentry isn%'t compatible with SEH");
4121 opts->x_flag_fentry = 1;
4122 }
4123 else if (opts->x_flag_fentry < 0)
4124 {
4125 #if defined(PROFILE_BEFORE_PROLOGUE)
4126 opts->x_flag_fentry = 1;
4127 #else
4128 opts->x_flag_fentry = 0;
4129 #endif
4130 }
4131
4132 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4133 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4134 AVX unaligned load/store. */
4135 if (!opts->x_optimize_size)
4136 {
4137 if (flag_expensive_optimizations
4138 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4139 opts->x_target_flags |= MASK_VZEROUPPER;
4140 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4141 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4142 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4143 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4144 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4145 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4146 /* Enable 128-bit AVX instruction generation
4147 for the auto-vectorizer. */
4148 if (TARGET_AVX128_OPTIMAL
4149 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4150 opts->x_target_flags |= MASK_PREFER_AVX128;
4151 }
4152
4153 if (opts->x_ix86_recip_name)
4154 {
4155 char *p = ASTRDUP (opts->x_ix86_recip_name);
4156 char *q;
4157 unsigned int mask, i;
4158 bool invert;
4159
4160 while ((q = strtok (p, ",")) != NULL)
4161 {
4162 p = NULL;
4163 if (*q == '!')
4164 {
4165 invert = true;
4166 q++;
4167 }
4168 else
4169 invert = false;
4170
4171 if (!strcmp (q, "default"))
4172 mask = RECIP_MASK_ALL;
4173 else
4174 {
4175 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4176 if (!strcmp (q, recip_options[i].string))
4177 {
4178 mask = recip_options[i].mask;
4179 break;
4180 }
4181
4182 if (i == ARRAY_SIZE (recip_options))
4183 {
4184 error ("unknown option for -mrecip=%s", q);
4185 invert = false;
4186 mask = RECIP_MASK_NONE;
4187 }
4188 }
4189
4190 opts->x_recip_mask_explicit |= mask;
4191 if (invert)
4192 opts->x_recip_mask &= ~mask;
4193 else
4194 opts->x_recip_mask |= mask;
4195 }
4196 }
4197
4198 if (TARGET_RECIP_P (opts->x_target_flags))
4199 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4200 else if (opts_set->x_target_flags & MASK_RECIP)
4201 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4202
4203 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4204 for 64-bit Bionic. */
4205 if (TARGET_HAS_BIONIC
4206 && !(opts_set->x_target_flags
4207 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4208 opts->x_target_flags |= (TARGET_64BIT
4209 ? MASK_LONG_DOUBLE_128
4210 : MASK_LONG_DOUBLE_64);
4211
4212 /* Only one of them can be active. */
4213 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4214 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4215
4216 /* Save the initial options in case the user does function specific
4217 options. */
4218 if (main_args_p)
4219 target_option_default_node = target_option_current_node
4220 = build_target_option_node (opts);
4221
4222 /* Handle stack protector */
4223 if (!opts_set->x_ix86_stack_protector_guard)
4224 opts->x_ix86_stack_protector_guard
4225 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4226
4227 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4228 if (opts->x_ix86_tune_memcpy_strategy)
4229 {
4230 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4231 ix86_parse_stringop_strategy_string (str, false);
4232 free (str);
4233 }
4234
4235 if (opts->x_ix86_tune_memset_strategy)
4236 {
4237 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4238 ix86_parse_stringop_strategy_string (str, true);
4239 free (str);
4240 }
4241 }
4242
4243 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4244
4245 static void
4246 ix86_option_override (void)
4247 {
4248 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4249 static struct register_pass_info insert_vzeroupper_info
4250 = { pass_insert_vzeroupper, "reload",
4251 1, PASS_POS_INSERT_AFTER
4252 };
4253
4254 ix86_option_override_internal (true, &global_options, &global_options_set);
4255
4256
4257 /* This needs to be done at start up. It's convenient to do it here. */
4258 register_pass (&insert_vzeroupper_info);
4259 }
4260
4261 /* Update register usage after having seen the compiler flags. */
4262
4263 static void
4264 ix86_conditional_register_usage (void)
4265 {
4266 int i, c_mask;
4267 unsigned int j;
4268
4269 /* The PIC register, if it exists, is fixed. */
4270 j = PIC_OFFSET_TABLE_REGNUM;
4271 if (j != INVALID_REGNUM)
4272 fixed_regs[j] = call_used_regs[j] = 1;
4273
4274 /* For 32-bit targets, squash the REX registers. */
4275 if (! TARGET_64BIT)
4276 {
4277 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4278 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4279 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4280 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4281 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4282 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4283 }
4284
4285 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4286 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4287 : TARGET_64BIT ? (1 << 2)
4288 : (1 << 1));
4289
4290 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4291
4292 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4293 {
4294 /* Set/reset conditionally defined registers from
4295 CALL_USED_REGISTERS initializer. */
4296 if (call_used_regs[i] > 1)
4297 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4298
4299 /* Calculate registers of CLOBBERED_REGS register set
4300 as call used registers from GENERAL_REGS register set. */
4301 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4302 && call_used_regs[i])
4303 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4304 }
4305
4306 /* If MMX is disabled, squash the registers. */
4307 if (! TARGET_MMX)
4308 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4309 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4310 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4311
4312 /* If SSE is disabled, squash the registers. */
4313 if (! TARGET_SSE)
4314 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4315 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4316 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4317
4318 /* If the FPU is disabled, squash the registers. */
4319 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4320 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4321 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4322 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4323
4324 /* If AVX512F is disabled, squash the registers. */
4325 if (! TARGET_AVX512F)
4326 {
4327 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4328 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4329
4330 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4331 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4332 }
4333 }
4334
4335 \f
4336 /* Save the current options */
4337
4338 static void
4339 ix86_function_specific_save (struct cl_target_option *ptr,
4340 struct gcc_options *opts)
4341 {
4342 ptr->arch = ix86_arch;
4343 ptr->schedule = ix86_schedule;
4344 ptr->tune = ix86_tune;
4345 ptr->branch_cost = ix86_branch_cost;
4346 ptr->tune_defaulted = ix86_tune_defaulted;
4347 ptr->arch_specified = ix86_arch_specified;
4348 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4349 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4350 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4351 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4352 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4353 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4354 ptr->x_ix86_abi = opts->x_ix86_abi;
4355 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4356 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4357 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4358 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4359 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4360 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4361 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4362 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4363 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4364 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4365 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4366 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4367 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4368 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4369 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4370 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4371 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4372 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4373 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4374 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4375
4376 /* The fields are char but the variables are not; make sure the
4377 values fit in the fields. */
4378 gcc_assert (ptr->arch == ix86_arch);
4379 gcc_assert (ptr->schedule == ix86_schedule);
4380 gcc_assert (ptr->tune == ix86_tune);
4381 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4382 }
4383
4384 /* Restore the current options */
4385
4386 static void
4387 ix86_function_specific_restore (struct gcc_options *opts,
4388 struct cl_target_option *ptr)
4389 {
4390 enum processor_type old_tune = ix86_tune;
4391 enum processor_type old_arch = ix86_arch;
4392 unsigned int ix86_arch_mask;
4393 int i;
4394
4395 /* We don't change -fPIC. */
4396 opts->x_flag_pic = flag_pic;
4397
4398 ix86_arch = (enum processor_type) ptr->arch;
4399 ix86_schedule = (enum attr_cpu) ptr->schedule;
4400 ix86_tune = (enum processor_type) ptr->tune;
4401 opts->x_ix86_branch_cost = ptr->branch_cost;
4402 ix86_tune_defaulted = ptr->tune_defaulted;
4403 ix86_arch_specified = ptr->arch_specified;
4404 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4405 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4406 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4407 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4408 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4409 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4410 opts->x_ix86_abi = ptr->x_ix86_abi;
4411 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4412 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4413 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4414 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4415 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4416 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4417 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4418 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4419 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4420 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4421 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4422 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4423 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4424 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4425 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4426 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4427 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4428 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4429 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4430 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4431
4432 /* Recreate the arch feature tests if the arch changed */
4433 if (old_arch != ix86_arch)
4434 {
4435 ix86_arch_mask = 1u << ix86_arch;
4436 for (i = 0; i < X86_ARCH_LAST; ++i)
4437 ix86_arch_features[i]
4438 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4439 }
4440
4441 /* Recreate the tune optimization tests */
4442 if (old_tune != ix86_tune)
4443 set_ix86_tune_features (ix86_tune, false);
4444 }
4445
4446 /* Print the current options */
4447
4448 static void
4449 ix86_function_specific_print (FILE *file, int indent,
4450 struct cl_target_option *ptr)
4451 {
4452 char *target_string
4453 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4454 NULL, NULL, ptr->x_ix86_fpmath, false);
4455
4456 gcc_assert (ptr->arch < PROCESSOR_max);
4457 fprintf (file, "%*sarch = %d (%s)\n",
4458 indent, "",
4459 ptr->arch, processor_target_table[ptr->arch].name);
4460
4461 gcc_assert (ptr->tune < PROCESSOR_max);
4462 fprintf (file, "%*stune = %d (%s)\n",
4463 indent, "",
4464 ptr->tune, processor_target_table[ptr->tune].name);
4465
4466 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4467
4468 if (target_string)
4469 {
4470 fprintf (file, "%*s%s\n", indent, "", target_string);
4471 free (target_string);
4472 }
4473 }
4474
4475 \f
4476 /* Inner function to process the attribute((target(...))), take an argument and
4477 set the current options from the argument. If we have a list, recursively go
4478 over the list. */
4479
4480 static bool
4481 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4482 struct gcc_options *opts,
4483 struct gcc_options *opts_set,
4484 struct gcc_options *enum_opts_set)
4485 {
4486 char *next_optstr;
4487 bool ret = true;
4488
4489 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4490 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4491 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4492 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4493 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4494
4495 enum ix86_opt_type
4496 {
4497 ix86_opt_unknown,
4498 ix86_opt_yes,
4499 ix86_opt_no,
4500 ix86_opt_str,
4501 ix86_opt_enum,
4502 ix86_opt_isa
4503 };
4504
4505 static const struct
4506 {
4507 const char *string;
4508 size_t len;
4509 enum ix86_opt_type type;
4510 int opt;
4511 int mask;
4512 } attrs[] = {
4513 /* isa options */
4514 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4515 IX86_ATTR_ISA ("abm", OPT_mabm),
4516 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4517 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4518 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4519 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4520 IX86_ATTR_ISA ("aes", OPT_maes),
4521 IX86_ATTR_ISA ("sha", OPT_msha),
4522 IX86_ATTR_ISA ("avx", OPT_mavx),
4523 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4524 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4525 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4526 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4527 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4528 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4529 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4530 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4531 IX86_ATTR_ISA ("sse", OPT_msse),
4532 IX86_ATTR_ISA ("sse2", OPT_msse2),
4533 IX86_ATTR_ISA ("sse3", OPT_msse3),
4534 IX86_ATTR_ISA ("sse4", OPT_msse4),
4535 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4536 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4537 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4538 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4539 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4540 IX86_ATTR_ISA ("fma", OPT_mfma),
4541 IX86_ATTR_ISA ("xop", OPT_mxop),
4542 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4543 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4544 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4545 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4546 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4547 IX86_ATTR_ISA ("hle", OPT_mhle),
4548 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4549 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4550 IX86_ATTR_ISA ("adx", OPT_madx),
4551 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4552 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4553 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4554 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4555
4556 /* enum options */
4557 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4558
4559 /* string options */
4560 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4561 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4562
4563 /* flag options */
4564 IX86_ATTR_YES ("cld",
4565 OPT_mcld,
4566 MASK_CLD),
4567
4568 IX86_ATTR_NO ("fancy-math-387",
4569 OPT_mfancy_math_387,
4570 MASK_NO_FANCY_MATH_387),
4571
4572 IX86_ATTR_YES ("ieee-fp",
4573 OPT_mieee_fp,
4574 MASK_IEEE_FP),
4575
4576 IX86_ATTR_YES ("inline-all-stringops",
4577 OPT_minline_all_stringops,
4578 MASK_INLINE_ALL_STRINGOPS),
4579
4580 IX86_ATTR_YES ("inline-stringops-dynamically",
4581 OPT_minline_stringops_dynamically,
4582 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4583
4584 IX86_ATTR_NO ("align-stringops",
4585 OPT_mno_align_stringops,
4586 MASK_NO_ALIGN_STRINGOPS),
4587
4588 IX86_ATTR_YES ("recip",
4589 OPT_mrecip,
4590 MASK_RECIP),
4591
4592 };
4593
4594 /* If this is a list, recurse to get the options. */
4595 if (TREE_CODE (args) == TREE_LIST)
4596 {
4597 bool ret = true;
4598
4599 for (; args; args = TREE_CHAIN (args))
4600 if (TREE_VALUE (args)
4601 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4602 p_strings, opts, opts_set,
4603 enum_opts_set))
4604 ret = false;
4605
4606 return ret;
4607 }
4608
4609 else if (TREE_CODE (args) != STRING_CST)
4610 {
4611 error ("attribute %<target%> argument not a string");
4612 return false;
4613 }
4614
4615 /* Handle multiple arguments separated by commas. */
4616 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4617
4618 while (next_optstr && *next_optstr != '\0')
4619 {
4620 char *p = next_optstr;
4621 char *orig_p = p;
4622 char *comma = strchr (next_optstr, ',');
4623 const char *opt_string;
4624 size_t len, opt_len;
4625 int opt;
4626 bool opt_set_p;
4627 char ch;
4628 unsigned i;
4629 enum ix86_opt_type type = ix86_opt_unknown;
4630 int mask = 0;
4631
4632 if (comma)
4633 {
4634 *comma = '\0';
4635 len = comma - next_optstr;
4636 next_optstr = comma + 1;
4637 }
4638 else
4639 {
4640 len = strlen (p);
4641 next_optstr = NULL;
4642 }
4643
4644 /* Recognize no-xxx. */
4645 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4646 {
4647 opt_set_p = false;
4648 p += 3;
4649 len -= 3;
4650 }
4651 else
4652 opt_set_p = true;
4653
4654 /* Find the option. */
4655 ch = *p;
4656 opt = N_OPTS;
4657 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4658 {
4659 type = attrs[i].type;
4660 opt_len = attrs[i].len;
4661 if (ch == attrs[i].string[0]
4662 && ((type != ix86_opt_str && type != ix86_opt_enum)
4663 ? len == opt_len
4664 : len > opt_len)
4665 && memcmp (p, attrs[i].string, opt_len) == 0)
4666 {
4667 opt = attrs[i].opt;
4668 mask = attrs[i].mask;
4669 opt_string = attrs[i].string;
4670 break;
4671 }
4672 }
4673
4674 /* Process the option. */
4675 if (opt == N_OPTS)
4676 {
4677 error ("attribute(target(\"%s\")) is unknown", orig_p);
4678 ret = false;
4679 }
4680
4681 else if (type == ix86_opt_isa)
4682 {
4683 struct cl_decoded_option decoded;
4684
4685 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4686 ix86_handle_option (opts, opts_set,
4687 &decoded, input_location);
4688 }
4689
4690 else if (type == ix86_opt_yes || type == ix86_opt_no)
4691 {
4692 if (type == ix86_opt_no)
4693 opt_set_p = !opt_set_p;
4694
4695 if (opt_set_p)
4696 opts->x_target_flags |= mask;
4697 else
4698 opts->x_target_flags &= ~mask;
4699 }
4700
4701 else if (type == ix86_opt_str)
4702 {
4703 if (p_strings[opt])
4704 {
4705 error ("option(\"%s\") was already specified", opt_string);
4706 ret = false;
4707 }
4708 else
4709 p_strings[opt] = xstrdup (p + opt_len);
4710 }
4711
4712 else if (type == ix86_opt_enum)
4713 {
4714 bool arg_ok;
4715 int value;
4716
4717 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4718 if (arg_ok)
4719 set_option (opts, enum_opts_set, opt, value,
4720 p + opt_len, DK_UNSPECIFIED, input_location,
4721 global_dc);
4722 else
4723 {
4724 error ("attribute(target(\"%s\")) is unknown", orig_p);
4725 ret = false;
4726 }
4727 }
4728
4729 else
4730 gcc_unreachable ();
4731 }
4732
4733 return ret;
4734 }
4735
4736 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4737
4738 tree
4739 ix86_valid_target_attribute_tree (tree args,
4740 struct gcc_options *opts,
4741 struct gcc_options *opts_set)
4742 {
4743 const char *orig_arch_string = opts->x_ix86_arch_string;
4744 const char *orig_tune_string = opts->x_ix86_tune_string;
4745 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4746 int orig_tune_defaulted = ix86_tune_defaulted;
4747 int orig_arch_specified = ix86_arch_specified;
4748 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4749 tree t = NULL_TREE;
4750 int i;
4751 struct cl_target_option *def
4752 = TREE_TARGET_OPTION (target_option_default_node);
4753 struct gcc_options enum_opts_set;
4754
4755 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4756
4757 /* Process each of the options on the chain. */
4758 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4759 opts_set, &enum_opts_set))
4760 return error_mark_node;
4761
4762 /* If the changed options are different from the default, rerun
4763 ix86_option_override_internal, and then save the options away.
4764 The string options are are attribute options, and will be undone
4765 when we copy the save structure. */
4766 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4767 || opts->x_target_flags != def->x_target_flags
4768 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4769 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4770 || enum_opts_set.x_ix86_fpmath)
4771 {
4772 /* If we are using the default tune= or arch=, undo the string assigned,
4773 and use the default. */
4774 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4775 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4776 else if (!orig_arch_specified)
4777 opts->x_ix86_arch_string = NULL;
4778
4779 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4780 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4781 else if (orig_tune_defaulted)
4782 opts->x_ix86_tune_string = NULL;
4783
4784 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4785 if (enum_opts_set.x_ix86_fpmath)
4786 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4787 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4788 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4789 {
4790 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4791 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4792 }
4793
4794 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4795 ix86_option_override_internal (false, opts, opts_set);
4796
4797 /* Add any builtin functions with the new isa if any. */
4798 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4799
4800 /* Save the current options unless we are validating options for
4801 #pragma. */
4802 t = build_target_option_node (opts);
4803
4804 opts->x_ix86_arch_string = orig_arch_string;
4805 opts->x_ix86_tune_string = orig_tune_string;
4806 opts_set->x_ix86_fpmath = orig_fpmath_set;
4807
4808 /* Free up memory allocated to hold the strings */
4809 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4810 free (option_strings[i]);
4811 }
4812
4813 return t;
4814 }
4815
4816 /* Hook to validate attribute((target("string"))). */
4817
4818 static bool
4819 ix86_valid_target_attribute_p (tree fndecl,
4820 tree ARG_UNUSED (name),
4821 tree args,
4822 int ARG_UNUSED (flags))
4823 {
4824 struct gcc_options func_options;
4825 tree new_target, new_optimize;
4826 bool ret = true;
4827
4828 /* attribute((target("default"))) does nothing, beyond
4829 affecting multi-versioning. */
4830 if (TREE_VALUE (args)
4831 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4832 && TREE_CHAIN (args) == NULL_TREE
4833 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4834 return true;
4835
4836 tree old_optimize = build_optimization_node (&global_options);
4837
4838 /* Get the optimization options of the current function. */
4839 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4840
4841 if (!func_optimize)
4842 func_optimize = old_optimize;
4843
4844 /* Init func_options. */
4845 memset (&func_options, 0, sizeof (func_options));
4846 init_options_struct (&func_options, NULL);
4847 lang_hooks.init_options_struct (&func_options);
4848
4849 cl_optimization_restore (&func_options,
4850 TREE_OPTIMIZATION (func_optimize));
4851
4852 /* Initialize func_options to the default before its target options can
4853 be set. */
4854 cl_target_option_restore (&func_options,
4855 TREE_TARGET_OPTION (target_option_default_node));
4856
4857 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4858 &global_options_set);
4859
4860 new_optimize = build_optimization_node (&func_options);
4861
4862 if (new_target == error_mark_node)
4863 ret = false;
4864
4865 else if (fndecl && new_target)
4866 {
4867 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4868
4869 if (old_optimize != new_optimize)
4870 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4871 }
4872
4873 return ret;
4874 }
4875
4876 \f
4877 /* Hook to determine if one function can safely inline another. */
4878
4879 static bool
4880 ix86_can_inline_p (tree caller, tree callee)
4881 {
4882 bool ret = false;
4883 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4884 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4885
4886 /* If callee has no option attributes, then it is ok to inline. */
4887 if (!callee_tree)
4888 ret = true;
4889
4890 /* If caller has no option attributes, but callee does then it is not ok to
4891 inline. */
4892 else if (!caller_tree)
4893 ret = false;
4894
4895 else
4896 {
4897 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4898 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4899
4900 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4901 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4902 function. */
4903 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4904 != callee_opts->x_ix86_isa_flags)
4905 ret = false;
4906
4907 /* See if we have the same non-isa options. */
4908 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4909 ret = false;
4910
4911 /* See if arch, tune, etc. are the same. */
4912 else if (caller_opts->arch != callee_opts->arch)
4913 ret = false;
4914
4915 else if (caller_opts->tune != callee_opts->tune)
4916 ret = false;
4917
4918 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4919 ret = false;
4920
4921 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4922 ret = false;
4923
4924 else
4925 ret = true;
4926 }
4927
4928 return ret;
4929 }
4930
4931 \f
4932 /* Remember the last target of ix86_set_current_function. */
4933 static GTY(()) tree ix86_previous_fndecl;
4934
4935 /* Invalidate ix86_previous_fndecl cache. */
4936 void
4937 ix86_reset_previous_fndecl (void)
4938 {
4939 ix86_previous_fndecl = NULL_TREE;
4940 }
4941
4942 /* Establish appropriate back-end context for processing the function
4943 FNDECL. The argument might be NULL to indicate processing at top
4944 level, outside of any function scope. */
4945 static void
4946 ix86_set_current_function (tree fndecl)
4947 {
4948 /* Only change the context if the function changes. This hook is called
4949 several times in the course of compiling a function, and we don't want to
4950 slow things down too much or call target_reinit when it isn't safe. */
4951 if (fndecl && fndecl != ix86_previous_fndecl)
4952 {
4953 tree old_tree = (ix86_previous_fndecl
4954 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4955 : NULL_TREE);
4956
4957 tree new_tree = (fndecl
4958 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4959 : NULL_TREE);
4960
4961 ix86_previous_fndecl = fndecl;
4962 if (old_tree == new_tree)
4963 ;
4964
4965 else if (new_tree)
4966 {
4967 cl_target_option_restore (&global_options,
4968 TREE_TARGET_OPTION (new_tree));
4969 if (TREE_TARGET_GLOBALS (new_tree))
4970 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4971 else
4972 TREE_TARGET_GLOBALS (new_tree)
4973 = save_target_globals_default_opts ();
4974 }
4975
4976 else if (old_tree)
4977 {
4978 new_tree = target_option_current_node;
4979 cl_target_option_restore (&global_options,
4980 TREE_TARGET_OPTION (new_tree));
4981 if (TREE_TARGET_GLOBALS (new_tree))
4982 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4983 else if (new_tree == target_option_default_node)
4984 restore_target_globals (&default_target_globals);
4985 else
4986 TREE_TARGET_GLOBALS (new_tree)
4987 = save_target_globals_default_opts ();
4988 }
4989 }
4990 }
4991
4992 \f
4993 /* Return true if this goes in large data/bss. */
4994
4995 static bool
4996 ix86_in_large_data_p (tree exp)
4997 {
4998 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4999 return false;
5000
5001 /* Functions are never large data. */
5002 if (TREE_CODE (exp) == FUNCTION_DECL)
5003 return false;
5004
5005 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5006 {
5007 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
5008 if (strcmp (section, ".ldata") == 0
5009 || strcmp (section, ".lbss") == 0)
5010 return true;
5011 return false;
5012 }
5013 else
5014 {
5015 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5016
5017 /* If this is an incomplete type with size 0, then we can't put it
5018 in data because it might be too big when completed. */
5019 if (!size || size > ix86_section_threshold)
5020 return true;
5021 }
5022
5023 return false;
5024 }
5025
5026 /* Switch to the appropriate section for output of DECL.
5027 DECL is either a `VAR_DECL' node or a constant of some sort.
5028 RELOC indicates whether forming the initial value of DECL requires
5029 link-time relocations. */
5030
5031 ATTRIBUTE_UNUSED static section *
5032 x86_64_elf_select_section (tree decl, int reloc,
5033 unsigned HOST_WIDE_INT align)
5034 {
5035 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5036 && ix86_in_large_data_p (decl))
5037 {
5038 const char *sname = NULL;
5039 unsigned int flags = SECTION_WRITE;
5040 switch (categorize_decl_for_section (decl, reloc))
5041 {
5042 case SECCAT_DATA:
5043 sname = ".ldata";
5044 break;
5045 case SECCAT_DATA_REL:
5046 sname = ".ldata.rel";
5047 break;
5048 case SECCAT_DATA_REL_LOCAL:
5049 sname = ".ldata.rel.local";
5050 break;
5051 case SECCAT_DATA_REL_RO:
5052 sname = ".ldata.rel.ro";
5053 break;
5054 case SECCAT_DATA_REL_RO_LOCAL:
5055 sname = ".ldata.rel.ro.local";
5056 break;
5057 case SECCAT_BSS:
5058 sname = ".lbss";
5059 flags |= SECTION_BSS;
5060 break;
5061 case SECCAT_RODATA:
5062 case SECCAT_RODATA_MERGE_STR:
5063 case SECCAT_RODATA_MERGE_STR_INIT:
5064 case SECCAT_RODATA_MERGE_CONST:
5065 sname = ".lrodata";
5066 flags = 0;
5067 break;
5068 case SECCAT_SRODATA:
5069 case SECCAT_SDATA:
5070 case SECCAT_SBSS:
5071 gcc_unreachable ();
5072 case SECCAT_TEXT:
5073 case SECCAT_TDATA:
5074 case SECCAT_TBSS:
5075 /* We don't split these for medium model. Place them into
5076 default sections and hope for best. */
5077 break;
5078 }
5079 if (sname)
5080 {
5081 /* We might get called with string constants, but get_named_section
5082 doesn't like them as they are not DECLs. Also, we need to set
5083 flags in that case. */
5084 if (!DECL_P (decl))
5085 return get_section (sname, flags, NULL);
5086 return get_named_section (decl, sname, reloc);
5087 }
5088 }
5089 return default_elf_select_section (decl, reloc, align);
5090 }
5091
5092 /* Select a set of attributes for section NAME based on the properties
5093 of DECL and whether or not RELOC indicates that DECL's initializer
5094 might contain runtime relocations. */
5095
5096 static unsigned int ATTRIBUTE_UNUSED
5097 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5098 {
5099 unsigned int flags = default_section_type_flags (decl, name, reloc);
5100
5101 if (decl == NULL_TREE
5102 && (strcmp (name, ".ldata.rel.ro") == 0
5103 || strcmp (name, ".ldata.rel.ro.local") == 0))
5104 flags |= SECTION_RELRO;
5105
5106 if (strcmp (name, ".lbss") == 0
5107 || strncmp (name, ".lbss.", 5) == 0
5108 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5109 flags |= SECTION_BSS;
5110
5111 return flags;
5112 }
5113
5114 /* Build up a unique section name, expressed as a
5115 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5116 RELOC indicates whether the initial value of EXP requires
5117 link-time relocations. */
5118
5119 static void ATTRIBUTE_UNUSED
5120 x86_64_elf_unique_section (tree decl, int reloc)
5121 {
5122 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5123 && ix86_in_large_data_p (decl))
5124 {
5125 const char *prefix = NULL;
5126 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5127 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5128
5129 switch (categorize_decl_for_section (decl, reloc))
5130 {
5131 case SECCAT_DATA:
5132 case SECCAT_DATA_REL:
5133 case SECCAT_DATA_REL_LOCAL:
5134 case SECCAT_DATA_REL_RO:
5135 case SECCAT_DATA_REL_RO_LOCAL:
5136 prefix = one_only ? ".ld" : ".ldata";
5137 break;
5138 case SECCAT_BSS:
5139 prefix = one_only ? ".lb" : ".lbss";
5140 break;
5141 case SECCAT_RODATA:
5142 case SECCAT_RODATA_MERGE_STR:
5143 case SECCAT_RODATA_MERGE_STR_INIT:
5144 case SECCAT_RODATA_MERGE_CONST:
5145 prefix = one_only ? ".lr" : ".lrodata";
5146 break;
5147 case SECCAT_SRODATA:
5148 case SECCAT_SDATA:
5149 case SECCAT_SBSS:
5150 gcc_unreachable ();
5151 case SECCAT_TEXT:
5152 case SECCAT_TDATA:
5153 case SECCAT_TBSS:
5154 /* We don't split these for medium model. Place them into
5155 default sections and hope for best. */
5156 break;
5157 }
5158 if (prefix)
5159 {
5160 const char *name, *linkonce;
5161 char *string;
5162
5163 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5164 name = targetm.strip_name_encoding (name);
5165
5166 /* If we're using one_only, then there needs to be a .gnu.linkonce
5167 prefix to the section name. */
5168 linkonce = one_only ? ".gnu.linkonce" : "";
5169
5170 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5171
5172 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5173 return;
5174 }
5175 }
5176 default_unique_section (decl, reloc);
5177 }
5178
5179 #ifdef COMMON_ASM_OP
5180 /* This says how to output assembler code to declare an
5181 uninitialized external linkage data object.
5182
5183 For medium model x86-64 we need to use .largecomm opcode for
5184 large objects. */
5185 void
5186 x86_elf_aligned_common (FILE *file,
5187 const char *name, unsigned HOST_WIDE_INT size,
5188 int align)
5189 {
5190 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5191 && size > (unsigned int)ix86_section_threshold)
5192 fputs (".largecomm\t", file);
5193 else
5194 fputs (COMMON_ASM_OP, file);
5195 assemble_name (file, name);
5196 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5197 size, align / BITS_PER_UNIT);
5198 }
5199 #endif
5200
5201 /* Utility function for targets to use in implementing
5202 ASM_OUTPUT_ALIGNED_BSS. */
5203
5204 void
5205 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5206 const char *name, unsigned HOST_WIDE_INT size,
5207 int align)
5208 {
5209 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5210 && size > (unsigned int)ix86_section_threshold)
5211 switch_to_section (get_named_section (decl, ".lbss", 0));
5212 else
5213 switch_to_section (bss_section);
5214 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5215 #ifdef ASM_DECLARE_OBJECT_NAME
5216 last_assemble_variable_decl = decl;
5217 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5218 #else
5219 /* Standard thing is just output label for the object. */
5220 ASM_OUTPUT_LABEL (file, name);
5221 #endif /* ASM_DECLARE_OBJECT_NAME */
5222 ASM_OUTPUT_SKIP (file, size ? size : 1);
5223 }
5224 \f
5225 /* Decide whether we must probe the stack before any space allocation
5226 on this target. It's essentially TARGET_STACK_PROBE except when
5227 -fstack-check causes the stack to be already probed differently. */
5228
5229 bool
5230 ix86_target_stack_probe (void)
5231 {
5232 /* Do not probe the stack twice if static stack checking is enabled. */
5233 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5234 return false;
5235
5236 return TARGET_STACK_PROBE;
5237 }
5238 \f
5239 /* Decide whether we can make a sibling call to a function. DECL is the
5240 declaration of the function being targeted by the call and EXP is the
5241 CALL_EXPR representing the call. */
5242
5243 static bool
5244 ix86_function_ok_for_sibcall (tree decl, tree exp)
5245 {
5246 tree type, decl_or_type;
5247 rtx a, b;
5248
5249 /* If we are generating position-independent code, we cannot sibcall
5250 optimize any indirect call, or a direct call to a global function,
5251 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5252 if (!TARGET_MACHO
5253 && !TARGET_64BIT
5254 && flag_pic
5255 && (!decl || !targetm.binds_local_p (decl)))
5256 return false;
5257
5258 /* If we need to align the outgoing stack, then sibcalling would
5259 unalign the stack, which may break the called function. */
5260 if (ix86_minimum_incoming_stack_boundary (true)
5261 < PREFERRED_STACK_BOUNDARY)
5262 return false;
5263
5264 if (decl)
5265 {
5266 decl_or_type = decl;
5267 type = TREE_TYPE (decl);
5268 }
5269 else
5270 {
5271 /* We're looking at the CALL_EXPR, we need the type of the function. */
5272 type = CALL_EXPR_FN (exp); /* pointer expression */
5273 type = TREE_TYPE (type); /* pointer type */
5274 type = TREE_TYPE (type); /* function type */
5275 decl_or_type = type;
5276 }
5277
5278 /* Check that the return value locations are the same. Like
5279 if we are returning floats on the 80387 register stack, we cannot
5280 make a sibcall from a function that doesn't return a float to a
5281 function that does or, conversely, from a function that does return
5282 a float to a function that doesn't; the necessary stack adjustment
5283 would not be executed. This is also the place we notice
5284 differences in the return value ABI. Note that it is ok for one
5285 of the functions to have void return type as long as the return
5286 value of the other is passed in a register. */
5287 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5288 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5289 cfun->decl, false);
5290 if (STACK_REG_P (a) || STACK_REG_P (b))
5291 {
5292 if (!rtx_equal_p (a, b))
5293 return false;
5294 }
5295 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5296 ;
5297 else if (!rtx_equal_p (a, b))
5298 return false;
5299
5300 if (TARGET_64BIT)
5301 {
5302 /* The SYSV ABI has more call-clobbered registers;
5303 disallow sibcalls from MS to SYSV. */
5304 if (cfun->machine->call_abi == MS_ABI
5305 && ix86_function_type_abi (type) == SYSV_ABI)
5306 return false;
5307 }
5308 else
5309 {
5310 /* If this call is indirect, we'll need to be able to use a
5311 call-clobbered register for the address of the target function.
5312 Make sure that all such registers are not used for passing
5313 parameters. Note that DLLIMPORT functions are indirect. */
5314 if (!decl
5315 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5316 {
5317 if (ix86_function_regparm (type, NULL) >= 3)
5318 {
5319 /* ??? Need to count the actual number of registers to be used,
5320 not the possible number of registers. Fix later. */
5321 return false;
5322 }
5323 }
5324 }
5325
5326 /* Otherwise okay. That also includes certain types of indirect calls. */
5327 return true;
5328 }
5329
5330 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5331 and "sseregparm" calling convention attributes;
5332 arguments as in struct attribute_spec.handler. */
5333
5334 static tree
5335 ix86_handle_cconv_attribute (tree *node, tree name,
5336 tree args,
5337 int flags ATTRIBUTE_UNUSED,
5338 bool *no_add_attrs)
5339 {
5340 if (TREE_CODE (*node) != FUNCTION_TYPE
5341 && TREE_CODE (*node) != METHOD_TYPE
5342 && TREE_CODE (*node) != FIELD_DECL
5343 && TREE_CODE (*node) != TYPE_DECL)
5344 {
5345 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5346 name);
5347 *no_add_attrs = true;
5348 return NULL_TREE;
5349 }
5350
5351 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5352 if (is_attribute_p ("regparm", name))
5353 {
5354 tree cst;
5355
5356 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5357 {
5358 error ("fastcall and regparm attributes are not compatible");
5359 }
5360
5361 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5362 {
5363 error ("regparam and thiscall attributes are not compatible");
5364 }
5365
5366 cst = TREE_VALUE (args);
5367 if (TREE_CODE (cst) != INTEGER_CST)
5368 {
5369 warning (OPT_Wattributes,
5370 "%qE attribute requires an integer constant argument",
5371 name);
5372 *no_add_attrs = true;
5373 }
5374 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5375 {
5376 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5377 name, REGPARM_MAX);
5378 *no_add_attrs = true;
5379 }
5380
5381 return NULL_TREE;
5382 }
5383
5384 if (TARGET_64BIT)
5385 {
5386 /* Do not warn when emulating the MS ABI. */
5387 if ((TREE_CODE (*node) != FUNCTION_TYPE
5388 && TREE_CODE (*node) != METHOD_TYPE)
5389 || ix86_function_type_abi (*node) != MS_ABI)
5390 warning (OPT_Wattributes, "%qE attribute ignored",
5391 name);
5392 *no_add_attrs = true;
5393 return NULL_TREE;
5394 }
5395
5396 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5397 if (is_attribute_p ("fastcall", name))
5398 {
5399 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5400 {
5401 error ("fastcall and cdecl attributes are not compatible");
5402 }
5403 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5404 {
5405 error ("fastcall and stdcall attributes are not compatible");
5406 }
5407 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5408 {
5409 error ("fastcall and regparm attributes are not compatible");
5410 }
5411 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5412 {
5413 error ("fastcall and thiscall attributes are not compatible");
5414 }
5415 }
5416
5417 /* Can combine stdcall with fastcall (redundant), regparm and
5418 sseregparm. */
5419 else if (is_attribute_p ("stdcall", name))
5420 {
5421 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5422 {
5423 error ("stdcall and cdecl attributes are not compatible");
5424 }
5425 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5426 {
5427 error ("stdcall and fastcall attributes are not compatible");
5428 }
5429 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5430 {
5431 error ("stdcall and thiscall attributes are not compatible");
5432 }
5433 }
5434
5435 /* Can combine cdecl with regparm and sseregparm. */
5436 else if (is_attribute_p ("cdecl", name))
5437 {
5438 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5439 {
5440 error ("stdcall and cdecl attributes are not compatible");
5441 }
5442 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5443 {
5444 error ("fastcall and cdecl attributes are not compatible");
5445 }
5446 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5447 {
5448 error ("cdecl and thiscall attributes are not compatible");
5449 }
5450 }
5451 else if (is_attribute_p ("thiscall", name))
5452 {
5453 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5454 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5455 name);
5456 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5457 {
5458 error ("stdcall and thiscall attributes are not compatible");
5459 }
5460 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5461 {
5462 error ("fastcall and thiscall attributes are not compatible");
5463 }
5464 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5465 {
5466 error ("cdecl and thiscall attributes are not compatible");
5467 }
5468 }
5469
5470 /* Can combine sseregparm with all attributes. */
5471
5472 return NULL_TREE;
5473 }
5474
5475 /* The transactional memory builtins are implicitly regparm or fastcall
5476 depending on the ABI. Override the generic do-nothing attribute that
5477 these builtins were declared with, and replace it with one of the two
5478 attributes that we expect elsewhere. */
5479
5480 static tree
5481 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5482 tree args ATTRIBUTE_UNUSED,
5483 int flags, bool *no_add_attrs)
5484 {
5485 tree alt;
5486
5487 /* In no case do we want to add the placeholder attribute. */
5488 *no_add_attrs = true;
5489
5490 /* The 64-bit ABI is unchanged for transactional memory. */
5491 if (TARGET_64BIT)
5492 return NULL_TREE;
5493
5494 /* ??? Is there a better way to validate 32-bit windows? We have
5495 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5496 if (CHECK_STACK_LIMIT > 0)
5497 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5498 else
5499 {
5500 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5501 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5502 }
5503 decl_attributes (node, alt, flags);
5504
5505 return NULL_TREE;
5506 }
5507
5508 /* This function determines from TYPE the calling-convention. */
5509
5510 unsigned int
5511 ix86_get_callcvt (const_tree type)
5512 {
5513 unsigned int ret = 0;
5514 bool is_stdarg;
5515 tree attrs;
5516
5517 if (TARGET_64BIT)
5518 return IX86_CALLCVT_CDECL;
5519
5520 attrs = TYPE_ATTRIBUTES (type);
5521 if (attrs != NULL_TREE)
5522 {
5523 if (lookup_attribute ("cdecl", attrs))
5524 ret |= IX86_CALLCVT_CDECL;
5525 else if (lookup_attribute ("stdcall", attrs))
5526 ret |= IX86_CALLCVT_STDCALL;
5527 else if (lookup_attribute ("fastcall", attrs))
5528 ret |= IX86_CALLCVT_FASTCALL;
5529 else if (lookup_attribute ("thiscall", attrs))
5530 ret |= IX86_CALLCVT_THISCALL;
5531
5532 /* Regparam isn't allowed for thiscall and fastcall. */
5533 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5534 {
5535 if (lookup_attribute ("regparm", attrs))
5536 ret |= IX86_CALLCVT_REGPARM;
5537 if (lookup_attribute ("sseregparm", attrs))
5538 ret |= IX86_CALLCVT_SSEREGPARM;
5539 }
5540
5541 if (IX86_BASE_CALLCVT(ret) != 0)
5542 return ret;
5543 }
5544
5545 is_stdarg = stdarg_p (type);
5546 if (TARGET_RTD && !is_stdarg)
5547 return IX86_CALLCVT_STDCALL | ret;
5548
5549 if (ret != 0
5550 || is_stdarg
5551 || TREE_CODE (type) != METHOD_TYPE
5552 || ix86_function_type_abi (type) != MS_ABI)
5553 return IX86_CALLCVT_CDECL | ret;
5554
5555 return IX86_CALLCVT_THISCALL;
5556 }
5557
5558 /* Return 0 if the attributes for two types are incompatible, 1 if they
5559 are compatible, and 2 if they are nearly compatible (which causes a
5560 warning to be generated). */
5561
5562 static int
5563 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5564 {
5565 unsigned int ccvt1, ccvt2;
5566
5567 if (TREE_CODE (type1) != FUNCTION_TYPE
5568 && TREE_CODE (type1) != METHOD_TYPE)
5569 return 1;
5570
5571 ccvt1 = ix86_get_callcvt (type1);
5572 ccvt2 = ix86_get_callcvt (type2);
5573 if (ccvt1 != ccvt2)
5574 return 0;
5575 if (ix86_function_regparm (type1, NULL)
5576 != ix86_function_regparm (type2, NULL))
5577 return 0;
5578
5579 return 1;
5580 }
5581 \f
5582 /* Return the regparm value for a function with the indicated TYPE and DECL.
5583 DECL may be NULL when calling function indirectly
5584 or considering a libcall. */
5585
5586 static int
5587 ix86_function_regparm (const_tree type, const_tree decl)
5588 {
5589 tree attr;
5590 int regparm;
5591 unsigned int ccvt;
5592
5593 if (TARGET_64BIT)
5594 return (ix86_function_type_abi (type) == SYSV_ABI
5595 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5596 ccvt = ix86_get_callcvt (type);
5597 regparm = ix86_regparm;
5598
5599 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5600 {
5601 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5602 if (attr)
5603 {
5604 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5605 return regparm;
5606 }
5607 }
5608 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5609 return 2;
5610 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5611 return 1;
5612
5613 /* Use register calling convention for local functions when possible. */
5614 if (decl
5615 && TREE_CODE (decl) == FUNCTION_DECL
5616 /* Caller and callee must agree on the calling convention, so
5617 checking here just optimize means that with
5618 __attribute__((optimize (...))) caller could use regparm convention
5619 and callee not, or vice versa. Instead look at whether the callee
5620 is optimized or not. */
5621 && opt_for_fn (decl, optimize)
5622 && !(profile_flag && !flag_fentry))
5623 {
5624 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5625 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5626 if (i && i->local && i->can_change_signature)
5627 {
5628 int local_regparm, globals = 0, regno;
5629
5630 /* Make sure no regparm register is taken by a
5631 fixed register variable. */
5632 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5633 if (fixed_regs[local_regparm])
5634 break;
5635
5636 /* We don't want to use regparm(3) for nested functions as
5637 these use a static chain pointer in the third argument. */
5638 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5639 local_regparm = 2;
5640
5641 /* In 32-bit mode save a register for the split stack. */
5642 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5643 local_regparm = 2;
5644
5645 /* Each fixed register usage increases register pressure,
5646 so less registers should be used for argument passing.
5647 This functionality can be overriden by an explicit
5648 regparm value. */
5649 for (regno = AX_REG; regno <= DI_REG; regno++)
5650 if (fixed_regs[regno])
5651 globals++;
5652
5653 local_regparm
5654 = globals < local_regparm ? local_regparm - globals : 0;
5655
5656 if (local_regparm > regparm)
5657 regparm = local_regparm;
5658 }
5659 }
5660
5661 return regparm;
5662 }
5663
5664 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5665 DFmode (2) arguments in SSE registers for a function with the
5666 indicated TYPE and DECL. DECL may be NULL when calling function
5667 indirectly or considering a libcall. Otherwise return 0. */
5668
5669 static int
5670 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5671 {
5672 gcc_assert (!TARGET_64BIT);
5673
5674 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5675 by the sseregparm attribute. */
5676 if (TARGET_SSEREGPARM
5677 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5678 {
5679 if (!TARGET_SSE)
5680 {
5681 if (warn)
5682 {
5683 if (decl)
5684 error ("calling %qD with attribute sseregparm without "
5685 "SSE/SSE2 enabled", decl);
5686 else
5687 error ("calling %qT with attribute sseregparm without "
5688 "SSE/SSE2 enabled", type);
5689 }
5690 return 0;
5691 }
5692
5693 return 2;
5694 }
5695
5696 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5697 (and DFmode for SSE2) arguments in SSE registers. */
5698 if (decl && TARGET_SSE_MATH && optimize
5699 && !(profile_flag && !flag_fentry))
5700 {
5701 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5702 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5703 if (i && i->local && i->can_change_signature)
5704 return TARGET_SSE2 ? 2 : 1;
5705 }
5706
5707 return 0;
5708 }
5709
5710 /* Return true if EAX is live at the start of the function. Used by
5711 ix86_expand_prologue to determine if we need special help before
5712 calling allocate_stack_worker. */
5713
5714 static bool
5715 ix86_eax_live_at_start_p (void)
5716 {
5717 /* Cheat. Don't bother working forward from ix86_function_regparm
5718 to the function type to whether an actual argument is located in
5719 eax. Instead just look at cfg info, which is still close enough
5720 to correct at this point. This gives false positives for broken
5721 functions that might use uninitialized data that happens to be
5722 allocated in eax, but who cares? */
5723 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5724 }
5725
5726 static bool
5727 ix86_keep_aggregate_return_pointer (tree fntype)
5728 {
5729 tree attr;
5730
5731 if (!TARGET_64BIT)
5732 {
5733 attr = lookup_attribute ("callee_pop_aggregate_return",
5734 TYPE_ATTRIBUTES (fntype));
5735 if (attr)
5736 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5737
5738 /* For 32-bit MS-ABI the default is to keep aggregate
5739 return pointer. */
5740 if (ix86_function_type_abi (fntype) == MS_ABI)
5741 return true;
5742 }
5743 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5744 }
5745
5746 /* Value is the number of bytes of arguments automatically
5747 popped when returning from a subroutine call.
5748 FUNDECL is the declaration node of the function (as a tree),
5749 FUNTYPE is the data type of the function (as a tree),
5750 or for a library call it is an identifier node for the subroutine name.
5751 SIZE is the number of bytes of arguments passed on the stack.
5752
5753 On the 80386, the RTD insn may be used to pop them if the number
5754 of args is fixed, but if the number is variable then the caller
5755 must pop them all. RTD can't be used for library calls now
5756 because the library is compiled with the Unix compiler.
5757 Use of RTD is a selectable option, since it is incompatible with
5758 standard Unix calling sequences. If the option is not selected,
5759 the caller must always pop the args.
5760
5761 The attribute stdcall is equivalent to RTD on a per module basis. */
5762
5763 static int
5764 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5765 {
5766 unsigned int ccvt;
5767
5768 /* None of the 64-bit ABIs pop arguments. */
5769 if (TARGET_64BIT)
5770 return 0;
5771
5772 ccvt = ix86_get_callcvt (funtype);
5773
5774 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5775 | IX86_CALLCVT_THISCALL)) != 0
5776 && ! stdarg_p (funtype))
5777 return size;
5778
5779 /* Lose any fake structure return argument if it is passed on the stack. */
5780 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5781 && !ix86_keep_aggregate_return_pointer (funtype))
5782 {
5783 int nregs = ix86_function_regparm (funtype, fundecl);
5784 if (nregs == 0)
5785 return GET_MODE_SIZE (Pmode);
5786 }
5787
5788 return 0;
5789 }
5790
5791 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5792
5793 static bool
5794 ix86_legitimate_combined_insn (rtx insn)
5795 {
5796 /* Check operand constraints in case hard registers were propagated
5797 into insn pattern. This check prevents combine pass from
5798 generating insn patterns with invalid hard register operands.
5799 These invalid insns can eventually confuse reload to error out
5800 with a spill failure. See also PRs 46829 and 46843. */
5801 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5802 {
5803 int i;
5804
5805 extract_insn (insn);
5806 preprocess_constraints ();
5807
5808 for (i = 0; i < recog_data.n_operands; i++)
5809 {
5810 rtx op = recog_data.operand[i];
5811 enum machine_mode mode = GET_MODE (op);
5812 struct operand_alternative *op_alt;
5813 int offset = 0;
5814 bool win;
5815 int j;
5816
5817 /* For pre-AVX disallow unaligned loads/stores where the
5818 instructions don't support it. */
5819 if (!TARGET_AVX
5820 && VECTOR_MODE_P (GET_MODE (op))
5821 && misaligned_operand (op, GET_MODE (op)))
5822 {
5823 int min_align = get_attr_ssememalign (insn);
5824 if (min_align == 0)
5825 return false;
5826 }
5827
5828 /* A unary operator may be accepted by the predicate, but it
5829 is irrelevant for matching constraints. */
5830 if (UNARY_P (op))
5831 op = XEXP (op, 0);
5832
5833 if (GET_CODE (op) == SUBREG)
5834 {
5835 if (REG_P (SUBREG_REG (op))
5836 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5837 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5838 GET_MODE (SUBREG_REG (op)),
5839 SUBREG_BYTE (op),
5840 GET_MODE (op));
5841 op = SUBREG_REG (op);
5842 }
5843
5844 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5845 continue;
5846
5847 op_alt = recog_op_alt[i];
5848
5849 /* Operand has no constraints, anything is OK. */
5850 win = !recog_data.n_alternatives;
5851
5852 for (j = 0; j < recog_data.n_alternatives; j++)
5853 {
5854 if (op_alt[j].anything_ok
5855 || (op_alt[j].matches != -1
5856 && operands_match_p
5857 (recog_data.operand[i],
5858 recog_data.operand[op_alt[j].matches]))
5859 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5860 {
5861 win = true;
5862 break;
5863 }
5864 }
5865
5866 if (!win)
5867 return false;
5868 }
5869 }
5870
5871 return true;
5872 }
5873 \f
5874 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5875
5876 static unsigned HOST_WIDE_INT
5877 ix86_asan_shadow_offset (void)
5878 {
5879 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5880 : HOST_WIDE_INT_C (0x7fff8000))
5881 : (HOST_WIDE_INT_1 << 29);
5882 }
5883 \f
5884 /* Argument support functions. */
5885
5886 /* Return true when register may be used to pass function parameters. */
5887 bool
5888 ix86_function_arg_regno_p (int regno)
5889 {
5890 int i;
5891 const int *parm_regs;
5892
5893 if (!TARGET_64BIT)
5894 {
5895 if (TARGET_MACHO)
5896 return (regno < REGPARM_MAX
5897 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5898 else
5899 return (regno < REGPARM_MAX
5900 || (TARGET_MMX && MMX_REGNO_P (regno)
5901 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5902 || (TARGET_SSE && SSE_REGNO_P (regno)
5903 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5904 }
5905
5906 if (TARGET_SSE && SSE_REGNO_P (regno)
5907 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5908 return true;
5909
5910 /* TODO: The function should depend on current function ABI but
5911 builtins.c would need updating then. Therefore we use the
5912 default ABI. */
5913
5914 /* RAX is used as hidden argument to va_arg functions. */
5915 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5916 return true;
5917
5918 if (ix86_abi == MS_ABI)
5919 parm_regs = x86_64_ms_abi_int_parameter_registers;
5920 else
5921 parm_regs = x86_64_int_parameter_registers;
5922 for (i = 0; i < (ix86_abi == MS_ABI
5923 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5924 if (regno == parm_regs[i])
5925 return true;
5926 return false;
5927 }
5928
5929 /* Return if we do not know how to pass TYPE solely in registers. */
5930
5931 static bool
5932 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5933 {
5934 if (must_pass_in_stack_var_size_or_pad (mode, type))
5935 return true;
5936
5937 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5938 The layout_type routine is crafty and tries to trick us into passing
5939 currently unsupported vector types on the stack by using TImode. */
5940 return (!TARGET_64BIT && mode == TImode
5941 && type && TREE_CODE (type) != VECTOR_TYPE);
5942 }
5943
5944 /* It returns the size, in bytes, of the area reserved for arguments passed
5945 in registers for the function represented by fndecl dependent to the used
5946 abi format. */
5947 int
5948 ix86_reg_parm_stack_space (const_tree fndecl)
5949 {
5950 enum calling_abi call_abi = SYSV_ABI;
5951 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5952 call_abi = ix86_function_abi (fndecl);
5953 else
5954 call_abi = ix86_function_type_abi (fndecl);
5955 if (TARGET_64BIT && call_abi == MS_ABI)
5956 return 32;
5957 return 0;
5958 }
5959
5960 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5961 call abi used. */
5962 enum calling_abi
5963 ix86_function_type_abi (const_tree fntype)
5964 {
5965 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5966 {
5967 enum calling_abi abi = ix86_abi;
5968 if (abi == SYSV_ABI)
5969 {
5970 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5971 abi = MS_ABI;
5972 }
5973 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5974 abi = SYSV_ABI;
5975 return abi;
5976 }
5977 return ix86_abi;
5978 }
5979
5980 /* We add this as a workaround in order to use libc_has_function
5981 hook in i386.md. */
5982 bool
5983 ix86_libc_has_function (enum function_class fn_class)
5984 {
5985 return targetm.libc_has_function (fn_class);
5986 }
5987
5988 static bool
5989 ix86_function_ms_hook_prologue (const_tree fn)
5990 {
5991 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5992 {
5993 if (decl_function_context (fn) != NULL_TREE)
5994 error_at (DECL_SOURCE_LOCATION (fn),
5995 "ms_hook_prologue is not compatible with nested function");
5996 else
5997 return true;
5998 }
5999 return false;
6000 }
6001
6002 static enum calling_abi
6003 ix86_function_abi (const_tree fndecl)
6004 {
6005 if (! fndecl)
6006 return ix86_abi;
6007 return ix86_function_type_abi (TREE_TYPE (fndecl));
6008 }
6009
6010 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6011 call abi used. */
6012 enum calling_abi
6013 ix86_cfun_abi (void)
6014 {
6015 if (! cfun)
6016 return ix86_abi;
6017 return cfun->machine->call_abi;
6018 }
6019
6020 /* Write the extra assembler code needed to declare a function properly. */
6021
6022 void
6023 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6024 tree decl)
6025 {
6026 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6027
6028 if (is_ms_hook)
6029 {
6030 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6031 unsigned int filler_cc = 0xcccccccc;
6032
6033 for (i = 0; i < filler_count; i += 4)
6034 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6035 }
6036
6037 #ifdef SUBTARGET_ASM_UNWIND_INIT
6038 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6039 #endif
6040
6041 ASM_OUTPUT_LABEL (asm_out_file, fname);
6042
6043 /* Output magic byte marker, if hot-patch attribute is set. */
6044 if (is_ms_hook)
6045 {
6046 if (TARGET_64BIT)
6047 {
6048 /* leaq [%rsp + 0], %rsp */
6049 asm_fprintf (asm_out_file, ASM_BYTE
6050 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6051 }
6052 else
6053 {
6054 /* movl.s %edi, %edi
6055 push %ebp
6056 movl.s %esp, %ebp */
6057 asm_fprintf (asm_out_file, ASM_BYTE
6058 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6059 }
6060 }
6061 }
6062
6063 /* regclass.c */
6064 extern void init_regs (void);
6065
6066 /* Implementation of call abi switching target hook. Specific to FNDECL
6067 the specific call register sets are set. See also
6068 ix86_conditional_register_usage for more details. */
6069 void
6070 ix86_call_abi_override (const_tree fndecl)
6071 {
6072 if (fndecl == NULL_TREE)
6073 cfun->machine->call_abi = ix86_abi;
6074 else
6075 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6076 }
6077
6078 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6079 expensive re-initialization of init_regs each time we switch function context
6080 since this is needed only during RTL expansion. */
6081 static void
6082 ix86_maybe_switch_abi (void)
6083 {
6084 if (TARGET_64BIT &&
6085 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6086 reinit_regs ();
6087 }
6088
6089 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6090 for a call to a function whose data type is FNTYPE.
6091 For a library call, FNTYPE is 0. */
6092
6093 void
6094 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6095 tree fntype, /* tree ptr for function decl */
6096 rtx libname, /* SYMBOL_REF of library name or 0 */
6097 tree fndecl,
6098 int caller)
6099 {
6100 struct cgraph_local_info *i;
6101
6102 memset (cum, 0, sizeof (*cum));
6103
6104 if (fndecl)
6105 {
6106 i = cgraph_local_info (fndecl);
6107 cum->call_abi = ix86_function_abi (fndecl);
6108 }
6109 else
6110 {
6111 i = NULL;
6112 cum->call_abi = ix86_function_type_abi (fntype);
6113 }
6114
6115 cum->caller = caller;
6116
6117 /* Set up the number of registers to use for passing arguments. */
6118 cum->nregs = ix86_regparm;
6119 if (TARGET_64BIT)
6120 {
6121 cum->nregs = (cum->call_abi == SYSV_ABI
6122 ? X86_64_REGPARM_MAX
6123 : X86_64_MS_REGPARM_MAX);
6124 }
6125 if (TARGET_SSE)
6126 {
6127 cum->sse_nregs = SSE_REGPARM_MAX;
6128 if (TARGET_64BIT)
6129 {
6130 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6131 ? X86_64_SSE_REGPARM_MAX
6132 : X86_64_MS_SSE_REGPARM_MAX);
6133 }
6134 }
6135 if (TARGET_MMX)
6136 cum->mmx_nregs = MMX_REGPARM_MAX;
6137 cum->warn_avx512f = true;
6138 cum->warn_avx = true;
6139 cum->warn_sse = true;
6140 cum->warn_mmx = true;
6141
6142 /* Because type might mismatch in between caller and callee, we need to
6143 use actual type of function for local calls.
6144 FIXME: cgraph_analyze can be told to actually record if function uses
6145 va_start so for local functions maybe_vaarg can be made aggressive
6146 helping K&R code.
6147 FIXME: once typesytem is fixed, we won't need this code anymore. */
6148 if (i && i->local && i->can_change_signature)
6149 fntype = TREE_TYPE (fndecl);
6150 cum->maybe_vaarg = (fntype
6151 ? (!prototype_p (fntype) || stdarg_p (fntype))
6152 : !libname);
6153
6154 if (!TARGET_64BIT)
6155 {
6156 /* If there are variable arguments, then we won't pass anything
6157 in registers in 32-bit mode. */
6158 if (stdarg_p (fntype))
6159 {
6160 cum->nregs = 0;
6161 cum->sse_nregs = 0;
6162 cum->mmx_nregs = 0;
6163 cum->warn_avx512f = false;
6164 cum->warn_avx = false;
6165 cum->warn_sse = false;
6166 cum->warn_mmx = false;
6167 return;
6168 }
6169
6170 /* Use ecx and edx registers if function has fastcall attribute,
6171 else look for regparm information. */
6172 if (fntype)
6173 {
6174 unsigned int ccvt = ix86_get_callcvt (fntype);
6175 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6176 {
6177 cum->nregs = 1;
6178 cum->fastcall = 1; /* Same first register as in fastcall. */
6179 }
6180 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6181 {
6182 cum->nregs = 2;
6183 cum->fastcall = 1;
6184 }
6185 else
6186 cum->nregs = ix86_function_regparm (fntype, fndecl);
6187 }
6188
6189 /* Set up the number of SSE registers used for passing SFmode
6190 and DFmode arguments. Warn for mismatching ABI. */
6191 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6192 }
6193 }
6194
6195 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6196 But in the case of vector types, it is some vector mode.
6197
6198 When we have only some of our vector isa extensions enabled, then there
6199 are some modes for which vector_mode_supported_p is false. For these
6200 modes, the generic vector support in gcc will choose some non-vector mode
6201 in order to implement the type. By computing the natural mode, we'll
6202 select the proper ABI location for the operand and not depend on whatever
6203 the middle-end decides to do with these vector types.
6204
6205 The midde-end can't deal with the vector types > 16 bytes. In this
6206 case, we return the original mode and warn ABI change if CUM isn't
6207 NULL.
6208
6209 If INT_RETURN is true, warn ABI change if the vector mode isn't
6210 available for function return value. */
6211
6212 static enum machine_mode
6213 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6214 bool in_return)
6215 {
6216 enum machine_mode mode = TYPE_MODE (type);
6217
6218 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6219 {
6220 HOST_WIDE_INT size = int_size_in_bytes (type);
6221 if ((size == 8 || size == 16 || size == 32 || size == 64)
6222 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6223 && TYPE_VECTOR_SUBPARTS (type) > 1)
6224 {
6225 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6226
6227 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6228 mode = MIN_MODE_VECTOR_FLOAT;
6229 else
6230 mode = MIN_MODE_VECTOR_INT;
6231
6232 /* Get the mode which has this inner mode and number of units. */
6233 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6234 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6235 && GET_MODE_INNER (mode) == innermode)
6236 {
6237 if (size == 64 && !TARGET_AVX512F)
6238 {
6239 static bool warnedavx512f;
6240 static bool warnedavx512f_ret;
6241
6242 if (cum && cum->warn_avx512f && !warnedavx512f)
6243 {
6244 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6245 "without AVX512F enabled changes the ABI"))
6246 warnedavx512f = true;
6247 }
6248 else if (in_return && !warnedavx512f_ret)
6249 {
6250 if (warning (OPT_Wpsabi, "AVX512F vector return "
6251 "without AVX512F enabled changes the ABI"))
6252 warnedavx512f_ret = true;
6253 }
6254
6255 return TYPE_MODE (type);
6256 }
6257 else if (size == 32 && !TARGET_AVX)
6258 {
6259 static bool warnedavx;
6260 static bool warnedavx_ret;
6261
6262 if (cum && cum->warn_avx && !warnedavx)
6263 {
6264 if (warning (OPT_Wpsabi, "AVX vector argument "
6265 "without AVX enabled changes the ABI"))
6266 warnedavx = true;
6267 }
6268 else if (in_return && !warnedavx_ret)
6269 {
6270 if (warning (OPT_Wpsabi, "AVX vector return "
6271 "without AVX enabled changes the ABI"))
6272 warnedavx_ret = true;
6273 }
6274
6275 return TYPE_MODE (type);
6276 }
6277 else if (((size == 8 && TARGET_64BIT) || size == 16)
6278 && !TARGET_SSE)
6279 {
6280 static bool warnedsse;
6281 static bool warnedsse_ret;
6282
6283 if (cum && cum->warn_sse && !warnedsse)
6284 {
6285 if (warning (OPT_Wpsabi, "SSE vector argument "
6286 "without SSE enabled changes the ABI"))
6287 warnedsse = true;
6288 }
6289 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6290 {
6291 if (warning (OPT_Wpsabi, "SSE vector return "
6292 "without SSE enabled changes the ABI"))
6293 warnedsse_ret = true;
6294 }
6295 }
6296 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6297 {
6298 static bool warnedmmx;
6299 static bool warnedmmx_ret;
6300
6301 if (cum && cum->warn_mmx && !warnedmmx)
6302 {
6303 if (warning (OPT_Wpsabi, "MMX vector argument "
6304 "without MMX enabled changes the ABI"))
6305 warnedmmx = true;
6306 }
6307 else if (in_return && !warnedmmx_ret)
6308 {
6309 if (warning (OPT_Wpsabi, "MMX vector return "
6310 "without MMX enabled changes the ABI"))
6311 warnedmmx_ret = true;
6312 }
6313 }
6314 return mode;
6315 }
6316
6317 gcc_unreachable ();
6318 }
6319 }
6320
6321 return mode;
6322 }
6323
6324 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6325 this may not agree with the mode that the type system has chosen for the
6326 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6327 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6328
6329 static rtx
6330 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6331 unsigned int regno)
6332 {
6333 rtx tmp;
6334
6335 if (orig_mode != BLKmode)
6336 tmp = gen_rtx_REG (orig_mode, regno);
6337 else
6338 {
6339 tmp = gen_rtx_REG (mode, regno);
6340 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6341 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6342 }
6343
6344 return tmp;
6345 }
6346
6347 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6348 of this code is to classify each 8bytes of incoming argument by the register
6349 class and assign registers accordingly. */
6350
6351 /* Return the union class of CLASS1 and CLASS2.
6352 See the x86-64 PS ABI for details. */
6353
6354 static enum x86_64_reg_class
6355 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6356 {
6357 /* Rule #1: If both classes are equal, this is the resulting class. */
6358 if (class1 == class2)
6359 return class1;
6360
6361 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6362 the other class. */
6363 if (class1 == X86_64_NO_CLASS)
6364 return class2;
6365 if (class2 == X86_64_NO_CLASS)
6366 return class1;
6367
6368 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6369 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6370 return X86_64_MEMORY_CLASS;
6371
6372 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6373 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6374 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6375 return X86_64_INTEGERSI_CLASS;
6376 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6377 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6378 return X86_64_INTEGER_CLASS;
6379
6380 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6381 MEMORY is used. */
6382 if (class1 == X86_64_X87_CLASS
6383 || class1 == X86_64_X87UP_CLASS
6384 || class1 == X86_64_COMPLEX_X87_CLASS
6385 || class2 == X86_64_X87_CLASS
6386 || class2 == X86_64_X87UP_CLASS
6387 || class2 == X86_64_COMPLEX_X87_CLASS)
6388 return X86_64_MEMORY_CLASS;
6389
6390 /* Rule #6: Otherwise class SSE is used. */
6391 return X86_64_SSE_CLASS;
6392 }
6393
6394 /* Classify the argument of type TYPE and mode MODE.
6395 CLASSES will be filled by the register class used to pass each word
6396 of the operand. The number of words is returned. In case the parameter
6397 should be passed in memory, 0 is returned. As a special case for zero
6398 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6399
6400 BIT_OFFSET is used internally for handling records and specifies offset
6401 of the offset in bits modulo 512 to avoid overflow cases.
6402
6403 See the x86-64 PS ABI for details.
6404 */
6405
6406 static int
6407 classify_argument (enum machine_mode mode, const_tree type,
6408 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6409 {
6410 HOST_WIDE_INT bytes =
6411 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6412 int words
6413 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6414
6415 /* Variable sized entities are always passed/returned in memory. */
6416 if (bytes < 0)
6417 return 0;
6418
6419 if (mode != VOIDmode
6420 && targetm.calls.must_pass_in_stack (mode, type))
6421 return 0;
6422
6423 if (type && AGGREGATE_TYPE_P (type))
6424 {
6425 int i;
6426 tree field;
6427 enum x86_64_reg_class subclasses[MAX_CLASSES];
6428
6429 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6430 if (bytes > 64)
6431 return 0;
6432
6433 for (i = 0; i < words; i++)
6434 classes[i] = X86_64_NO_CLASS;
6435
6436 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6437 signalize memory class, so handle it as special case. */
6438 if (!words)
6439 {
6440 classes[0] = X86_64_NO_CLASS;
6441 return 1;
6442 }
6443
6444 /* Classify each field of record and merge classes. */
6445 switch (TREE_CODE (type))
6446 {
6447 case RECORD_TYPE:
6448 /* And now merge the fields of structure. */
6449 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6450 {
6451 if (TREE_CODE (field) == FIELD_DECL)
6452 {
6453 int num;
6454
6455 if (TREE_TYPE (field) == error_mark_node)
6456 continue;
6457
6458 /* Bitfields are always classified as integer. Handle them
6459 early, since later code would consider them to be
6460 misaligned integers. */
6461 if (DECL_BIT_FIELD (field))
6462 {
6463 for (i = (int_bit_position (field)
6464 + (bit_offset % 64)) / 8 / 8;
6465 i < ((int_bit_position (field) + (bit_offset % 64))
6466 + tree_to_shwi (DECL_SIZE (field))
6467 + 63) / 8 / 8; i++)
6468 classes[i] =
6469 merge_classes (X86_64_INTEGER_CLASS,
6470 classes[i]);
6471 }
6472 else
6473 {
6474 int pos;
6475
6476 type = TREE_TYPE (field);
6477
6478 /* Flexible array member is ignored. */
6479 if (TYPE_MODE (type) == BLKmode
6480 && TREE_CODE (type) == ARRAY_TYPE
6481 && TYPE_SIZE (type) == NULL_TREE
6482 && TYPE_DOMAIN (type) != NULL_TREE
6483 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6484 == NULL_TREE))
6485 {
6486 static bool warned;
6487
6488 if (!warned && warn_psabi)
6489 {
6490 warned = true;
6491 inform (input_location,
6492 "the ABI of passing struct with"
6493 " a flexible array member has"
6494 " changed in GCC 4.4");
6495 }
6496 continue;
6497 }
6498 num = classify_argument (TYPE_MODE (type), type,
6499 subclasses,
6500 (int_bit_position (field)
6501 + bit_offset) % 512);
6502 if (!num)
6503 return 0;
6504 pos = (int_bit_position (field)
6505 + (bit_offset % 64)) / 8 / 8;
6506 for (i = 0; i < num && (i + pos) < words; i++)
6507 classes[i + pos] =
6508 merge_classes (subclasses[i], classes[i + pos]);
6509 }
6510 }
6511 }
6512 break;
6513
6514 case ARRAY_TYPE:
6515 /* Arrays are handled as small records. */
6516 {
6517 int num;
6518 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6519 TREE_TYPE (type), subclasses, bit_offset);
6520 if (!num)
6521 return 0;
6522
6523 /* The partial classes are now full classes. */
6524 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6525 subclasses[0] = X86_64_SSE_CLASS;
6526 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6527 && !((bit_offset % 64) == 0 && bytes == 4))
6528 subclasses[0] = X86_64_INTEGER_CLASS;
6529
6530 for (i = 0; i < words; i++)
6531 classes[i] = subclasses[i % num];
6532
6533 break;
6534 }
6535 case UNION_TYPE:
6536 case QUAL_UNION_TYPE:
6537 /* Unions are similar to RECORD_TYPE but offset is always 0.
6538 */
6539 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6540 {
6541 if (TREE_CODE (field) == FIELD_DECL)
6542 {
6543 int num;
6544
6545 if (TREE_TYPE (field) == error_mark_node)
6546 continue;
6547
6548 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6549 TREE_TYPE (field), subclasses,
6550 bit_offset);
6551 if (!num)
6552 return 0;
6553 for (i = 0; i < num; i++)
6554 classes[i] = merge_classes (subclasses[i], classes[i]);
6555 }
6556 }
6557 break;
6558
6559 default:
6560 gcc_unreachable ();
6561 }
6562
6563 if (words > 2)
6564 {
6565 /* When size > 16 bytes, if the first one isn't
6566 X86_64_SSE_CLASS or any other ones aren't
6567 X86_64_SSEUP_CLASS, everything should be passed in
6568 memory. */
6569 if (classes[0] != X86_64_SSE_CLASS)
6570 return 0;
6571
6572 for (i = 1; i < words; i++)
6573 if (classes[i] != X86_64_SSEUP_CLASS)
6574 return 0;
6575 }
6576
6577 /* Final merger cleanup. */
6578 for (i = 0; i < words; i++)
6579 {
6580 /* If one class is MEMORY, everything should be passed in
6581 memory. */
6582 if (classes[i] == X86_64_MEMORY_CLASS)
6583 return 0;
6584
6585 /* The X86_64_SSEUP_CLASS should be always preceded by
6586 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6587 if (classes[i] == X86_64_SSEUP_CLASS
6588 && classes[i - 1] != X86_64_SSE_CLASS
6589 && classes[i - 1] != X86_64_SSEUP_CLASS)
6590 {
6591 /* The first one should never be X86_64_SSEUP_CLASS. */
6592 gcc_assert (i != 0);
6593 classes[i] = X86_64_SSE_CLASS;
6594 }
6595
6596 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6597 everything should be passed in memory. */
6598 if (classes[i] == X86_64_X87UP_CLASS
6599 && (classes[i - 1] != X86_64_X87_CLASS))
6600 {
6601 static bool warned;
6602
6603 /* The first one should never be X86_64_X87UP_CLASS. */
6604 gcc_assert (i != 0);
6605 if (!warned && warn_psabi)
6606 {
6607 warned = true;
6608 inform (input_location,
6609 "the ABI of passing union with long double"
6610 " has changed in GCC 4.4");
6611 }
6612 return 0;
6613 }
6614 }
6615 return words;
6616 }
6617
6618 /* Compute alignment needed. We align all types to natural boundaries with
6619 exception of XFmode that is aligned to 64bits. */
6620 if (mode != VOIDmode && mode != BLKmode)
6621 {
6622 int mode_alignment = GET_MODE_BITSIZE (mode);
6623
6624 if (mode == XFmode)
6625 mode_alignment = 128;
6626 else if (mode == XCmode)
6627 mode_alignment = 256;
6628 if (COMPLEX_MODE_P (mode))
6629 mode_alignment /= 2;
6630 /* Misaligned fields are always returned in memory. */
6631 if (bit_offset % mode_alignment)
6632 return 0;
6633 }
6634
6635 /* for V1xx modes, just use the base mode */
6636 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6637 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6638 mode = GET_MODE_INNER (mode);
6639
6640 /* Classification of atomic types. */
6641 switch (mode)
6642 {
6643 case SDmode:
6644 case DDmode:
6645 classes[0] = X86_64_SSE_CLASS;
6646 return 1;
6647 case TDmode:
6648 classes[0] = X86_64_SSE_CLASS;
6649 classes[1] = X86_64_SSEUP_CLASS;
6650 return 2;
6651 case DImode:
6652 case SImode:
6653 case HImode:
6654 case QImode:
6655 case CSImode:
6656 case CHImode:
6657 case CQImode:
6658 {
6659 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6660
6661 /* Analyze last 128 bits only. */
6662 size = (size - 1) & 0x7f;
6663
6664 if (size < 32)
6665 {
6666 classes[0] = X86_64_INTEGERSI_CLASS;
6667 return 1;
6668 }
6669 else if (size < 64)
6670 {
6671 classes[0] = X86_64_INTEGER_CLASS;
6672 return 1;
6673 }
6674 else if (size < 64+32)
6675 {
6676 classes[0] = X86_64_INTEGER_CLASS;
6677 classes[1] = X86_64_INTEGERSI_CLASS;
6678 return 2;
6679 }
6680 else if (size < 64+64)
6681 {
6682 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6683 return 2;
6684 }
6685 else
6686 gcc_unreachable ();
6687 }
6688 case CDImode:
6689 case TImode:
6690 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6691 return 2;
6692 case COImode:
6693 case OImode:
6694 /* OImode shouldn't be used directly. */
6695 gcc_unreachable ();
6696 case CTImode:
6697 return 0;
6698 case SFmode:
6699 if (!(bit_offset % 64))
6700 classes[0] = X86_64_SSESF_CLASS;
6701 else
6702 classes[0] = X86_64_SSE_CLASS;
6703 return 1;
6704 case DFmode:
6705 classes[0] = X86_64_SSEDF_CLASS;
6706 return 1;
6707 case XFmode:
6708 classes[0] = X86_64_X87_CLASS;
6709 classes[1] = X86_64_X87UP_CLASS;
6710 return 2;
6711 case TFmode:
6712 classes[0] = X86_64_SSE_CLASS;
6713 classes[1] = X86_64_SSEUP_CLASS;
6714 return 2;
6715 case SCmode:
6716 classes[0] = X86_64_SSE_CLASS;
6717 if (!(bit_offset % 64))
6718 return 1;
6719 else
6720 {
6721 static bool warned;
6722
6723 if (!warned && warn_psabi)
6724 {
6725 warned = true;
6726 inform (input_location,
6727 "the ABI of passing structure with complex float"
6728 " member has changed in GCC 4.4");
6729 }
6730 classes[1] = X86_64_SSESF_CLASS;
6731 return 2;
6732 }
6733 case DCmode:
6734 classes[0] = X86_64_SSEDF_CLASS;
6735 classes[1] = X86_64_SSEDF_CLASS;
6736 return 2;
6737 case XCmode:
6738 classes[0] = X86_64_COMPLEX_X87_CLASS;
6739 return 1;
6740 case TCmode:
6741 /* This modes is larger than 16 bytes. */
6742 return 0;
6743 case V8SFmode:
6744 case V8SImode:
6745 case V32QImode:
6746 case V16HImode:
6747 case V4DFmode:
6748 case V4DImode:
6749 classes[0] = X86_64_SSE_CLASS;
6750 classes[1] = X86_64_SSEUP_CLASS;
6751 classes[2] = X86_64_SSEUP_CLASS;
6752 classes[3] = X86_64_SSEUP_CLASS;
6753 return 4;
6754 case V8DFmode:
6755 case V16SFmode:
6756 case V8DImode:
6757 case V16SImode:
6758 case V32HImode:
6759 case V64QImode:
6760 classes[0] = X86_64_SSE_CLASS;
6761 classes[1] = X86_64_SSEUP_CLASS;
6762 classes[2] = X86_64_SSEUP_CLASS;
6763 classes[3] = X86_64_SSEUP_CLASS;
6764 classes[4] = X86_64_SSEUP_CLASS;
6765 classes[5] = X86_64_SSEUP_CLASS;
6766 classes[6] = X86_64_SSEUP_CLASS;
6767 classes[7] = X86_64_SSEUP_CLASS;
6768 return 8;
6769 case V4SFmode:
6770 case V4SImode:
6771 case V16QImode:
6772 case V8HImode:
6773 case V2DFmode:
6774 case V2DImode:
6775 classes[0] = X86_64_SSE_CLASS;
6776 classes[1] = X86_64_SSEUP_CLASS;
6777 return 2;
6778 case V1TImode:
6779 case V1DImode:
6780 case V2SFmode:
6781 case V2SImode:
6782 case V4HImode:
6783 case V8QImode:
6784 classes[0] = X86_64_SSE_CLASS;
6785 return 1;
6786 case BLKmode:
6787 case VOIDmode:
6788 return 0;
6789 default:
6790 gcc_assert (VECTOR_MODE_P (mode));
6791
6792 if (bytes > 16)
6793 return 0;
6794
6795 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6796
6797 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6798 classes[0] = X86_64_INTEGERSI_CLASS;
6799 else
6800 classes[0] = X86_64_INTEGER_CLASS;
6801 classes[1] = X86_64_INTEGER_CLASS;
6802 return 1 + (bytes > 8);
6803 }
6804 }
6805
6806 /* Examine the argument and return set number of register required in each
6807 class. Return true iff parameter should be passed in memory. */
6808
6809 static bool
6810 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6811 int *int_nregs, int *sse_nregs)
6812 {
6813 enum x86_64_reg_class regclass[MAX_CLASSES];
6814 int n = classify_argument (mode, type, regclass, 0);
6815
6816 *int_nregs = 0;
6817 *sse_nregs = 0;
6818
6819 if (!n)
6820 return true;
6821 for (n--; n >= 0; n--)
6822 switch (regclass[n])
6823 {
6824 case X86_64_INTEGER_CLASS:
6825 case X86_64_INTEGERSI_CLASS:
6826 (*int_nregs)++;
6827 break;
6828 case X86_64_SSE_CLASS:
6829 case X86_64_SSESF_CLASS:
6830 case X86_64_SSEDF_CLASS:
6831 (*sse_nregs)++;
6832 break;
6833 case X86_64_NO_CLASS:
6834 case X86_64_SSEUP_CLASS:
6835 break;
6836 case X86_64_X87_CLASS:
6837 case X86_64_X87UP_CLASS:
6838 case X86_64_COMPLEX_X87_CLASS:
6839 if (!in_return)
6840 return true;
6841 break;
6842 case X86_64_MEMORY_CLASS:
6843 gcc_unreachable ();
6844 }
6845
6846 return false;
6847 }
6848
6849 /* Construct container for the argument used by GCC interface. See
6850 FUNCTION_ARG for the detailed description. */
6851
6852 static rtx
6853 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6854 const_tree type, int in_return, int nintregs, int nsseregs,
6855 const int *intreg, int sse_regno)
6856 {
6857 /* The following variables hold the static issued_error state. */
6858 static bool issued_sse_arg_error;
6859 static bool issued_sse_ret_error;
6860 static bool issued_x87_ret_error;
6861
6862 enum machine_mode tmpmode;
6863 int bytes =
6864 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6865 enum x86_64_reg_class regclass[MAX_CLASSES];
6866 int n;
6867 int i;
6868 int nexps = 0;
6869 int needed_sseregs, needed_intregs;
6870 rtx exp[MAX_CLASSES];
6871 rtx ret;
6872
6873 n = classify_argument (mode, type, regclass, 0);
6874 if (!n)
6875 return NULL;
6876 if (examine_argument (mode, type, in_return, &needed_intregs,
6877 &needed_sseregs))
6878 return NULL;
6879 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6880 return NULL;
6881
6882 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6883 some less clueful developer tries to use floating-point anyway. */
6884 if (needed_sseregs && !TARGET_SSE)
6885 {
6886 if (in_return)
6887 {
6888 if (!issued_sse_ret_error)
6889 {
6890 error ("SSE register return with SSE disabled");
6891 issued_sse_ret_error = true;
6892 }
6893 }
6894 else if (!issued_sse_arg_error)
6895 {
6896 error ("SSE register argument with SSE disabled");
6897 issued_sse_arg_error = true;
6898 }
6899 return NULL;
6900 }
6901
6902 /* Likewise, error if the ABI requires us to return values in the
6903 x87 registers and the user specified -mno-80387. */
6904 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6905 for (i = 0; i < n; i++)
6906 if (regclass[i] == X86_64_X87_CLASS
6907 || regclass[i] == X86_64_X87UP_CLASS
6908 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6909 {
6910 if (!issued_x87_ret_error)
6911 {
6912 error ("x87 register return with x87 disabled");
6913 issued_x87_ret_error = true;
6914 }
6915 return NULL;
6916 }
6917
6918 /* First construct simple cases. Avoid SCmode, since we want to use
6919 single register to pass this type. */
6920 if (n == 1 && mode != SCmode)
6921 switch (regclass[0])
6922 {
6923 case X86_64_INTEGER_CLASS:
6924 case X86_64_INTEGERSI_CLASS:
6925 return gen_rtx_REG (mode, intreg[0]);
6926 case X86_64_SSE_CLASS:
6927 case X86_64_SSESF_CLASS:
6928 case X86_64_SSEDF_CLASS:
6929 if (mode != BLKmode)
6930 return gen_reg_or_parallel (mode, orig_mode,
6931 SSE_REGNO (sse_regno));
6932 break;
6933 case X86_64_X87_CLASS:
6934 case X86_64_COMPLEX_X87_CLASS:
6935 return gen_rtx_REG (mode, FIRST_STACK_REG);
6936 case X86_64_NO_CLASS:
6937 /* Zero sized array, struct or class. */
6938 return NULL;
6939 default:
6940 gcc_unreachable ();
6941 }
6942 if (n == 2
6943 && regclass[0] == X86_64_SSE_CLASS
6944 && regclass[1] == X86_64_SSEUP_CLASS
6945 && mode != BLKmode)
6946 return gen_reg_or_parallel (mode, orig_mode,
6947 SSE_REGNO (sse_regno));
6948 if (n == 4
6949 && regclass[0] == X86_64_SSE_CLASS
6950 && regclass[1] == X86_64_SSEUP_CLASS
6951 && regclass[2] == X86_64_SSEUP_CLASS
6952 && regclass[3] == X86_64_SSEUP_CLASS
6953 && mode != BLKmode)
6954 return gen_reg_or_parallel (mode, orig_mode,
6955 SSE_REGNO (sse_regno));
6956 if (n == 8
6957 && regclass[0] == X86_64_SSE_CLASS
6958 && regclass[1] == X86_64_SSEUP_CLASS
6959 && regclass[2] == X86_64_SSEUP_CLASS
6960 && regclass[3] == X86_64_SSEUP_CLASS
6961 && regclass[4] == X86_64_SSEUP_CLASS
6962 && regclass[5] == X86_64_SSEUP_CLASS
6963 && regclass[6] == X86_64_SSEUP_CLASS
6964 && regclass[7] == X86_64_SSEUP_CLASS
6965 && mode != BLKmode)
6966 return gen_reg_or_parallel (mode, orig_mode,
6967 SSE_REGNO (sse_regno));
6968 if (n == 2
6969 && regclass[0] == X86_64_X87_CLASS
6970 && regclass[1] == X86_64_X87UP_CLASS)
6971 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6972
6973 if (n == 2
6974 && regclass[0] == X86_64_INTEGER_CLASS
6975 && regclass[1] == X86_64_INTEGER_CLASS
6976 && (mode == CDImode || mode == TImode)
6977 && intreg[0] + 1 == intreg[1])
6978 return gen_rtx_REG (mode, intreg[0]);
6979
6980 /* Otherwise figure out the entries of the PARALLEL. */
6981 for (i = 0; i < n; i++)
6982 {
6983 int pos;
6984
6985 switch (regclass[i])
6986 {
6987 case X86_64_NO_CLASS:
6988 break;
6989 case X86_64_INTEGER_CLASS:
6990 case X86_64_INTEGERSI_CLASS:
6991 /* Merge TImodes on aligned occasions here too. */
6992 if (i * 8 + 8 > bytes)
6993 tmpmode
6994 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6995 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6996 tmpmode = SImode;
6997 else
6998 tmpmode = DImode;
6999 /* We've requested 24 bytes we
7000 don't have mode for. Use DImode. */
7001 if (tmpmode == BLKmode)
7002 tmpmode = DImode;
7003 exp [nexps++]
7004 = gen_rtx_EXPR_LIST (VOIDmode,
7005 gen_rtx_REG (tmpmode, *intreg),
7006 GEN_INT (i*8));
7007 intreg++;
7008 break;
7009 case X86_64_SSESF_CLASS:
7010 exp [nexps++]
7011 = gen_rtx_EXPR_LIST (VOIDmode,
7012 gen_rtx_REG (SFmode,
7013 SSE_REGNO (sse_regno)),
7014 GEN_INT (i*8));
7015 sse_regno++;
7016 break;
7017 case X86_64_SSEDF_CLASS:
7018 exp [nexps++]
7019 = gen_rtx_EXPR_LIST (VOIDmode,
7020 gen_rtx_REG (DFmode,
7021 SSE_REGNO (sse_regno)),
7022 GEN_INT (i*8));
7023 sse_regno++;
7024 break;
7025 case X86_64_SSE_CLASS:
7026 pos = i;
7027 switch (n)
7028 {
7029 case 1:
7030 tmpmode = DImode;
7031 break;
7032 case 2:
7033 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7034 {
7035 tmpmode = TImode;
7036 i++;
7037 }
7038 else
7039 tmpmode = DImode;
7040 break;
7041 case 4:
7042 gcc_assert (i == 0
7043 && regclass[1] == X86_64_SSEUP_CLASS
7044 && regclass[2] == X86_64_SSEUP_CLASS
7045 && regclass[3] == X86_64_SSEUP_CLASS);
7046 tmpmode = OImode;
7047 i += 3;
7048 break;
7049 case 8:
7050 gcc_assert (i == 0
7051 && regclass[1] == X86_64_SSEUP_CLASS
7052 && regclass[2] == X86_64_SSEUP_CLASS
7053 && regclass[3] == X86_64_SSEUP_CLASS
7054 && regclass[4] == X86_64_SSEUP_CLASS
7055 && regclass[5] == X86_64_SSEUP_CLASS
7056 && regclass[6] == X86_64_SSEUP_CLASS
7057 && regclass[7] == X86_64_SSEUP_CLASS);
7058 tmpmode = XImode;
7059 i += 7;
7060 break;
7061 default:
7062 gcc_unreachable ();
7063 }
7064 exp [nexps++]
7065 = gen_rtx_EXPR_LIST (VOIDmode,
7066 gen_rtx_REG (tmpmode,
7067 SSE_REGNO (sse_regno)),
7068 GEN_INT (pos*8));
7069 sse_regno++;
7070 break;
7071 default:
7072 gcc_unreachable ();
7073 }
7074 }
7075
7076 /* Empty aligned struct, union or class. */
7077 if (nexps == 0)
7078 return NULL;
7079
7080 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7081 for (i = 0; i < nexps; i++)
7082 XVECEXP (ret, 0, i) = exp [i];
7083 return ret;
7084 }
7085
7086 /* Update the data in CUM to advance over an argument of mode MODE
7087 and data type TYPE. (TYPE is null for libcalls where that information
7088 may not be available.) */
7089
7090 static void
7091 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7092 const_tree type, HOST_WIDE_INT bytes,
7093 HOST_WIDE_INT words)
7094 {
7095 switch (mode)
7096 {
7097 default:
7098 break;
7099
7100 case BLKmode:
7101 if (bytes < 0)
7102 break;
7103 /* FALLTHRU */
7104
7105 case DImode:
7106 case SImode:
7107 case HImode:
7108 case QImode:
7109 cum->words += words;
7110 cum->nregs -= words;
7111 cum->regno += words;
7112
7113 if (cum->nregs <= 0)
7114 {
7115 cum->nregs = 0;
7116 cum->regno = 0;
7117 }
7118 break;
7119
7120 case OImode:
7121 /* OImode shouldn't be used directly. */
7122 gcc_unreachable ();
7123
7124 case DFmode:
7125 if (cum->float_in_sse < 2)
7126 break;
7127 case SFmode:
7128 if (cum->float_in_sse < 1)
7129 break;
7130 /* FALLTHRU */
7131
7132 case V8SFmode:
7133 case V8SImode:
7134 case V64QImode:
7135 case V32HImode:
7136 case V16SImode:
7137 case V8DImode:
7138 case V16SFmode:
7139 case V8DFmode:
7140 case V32QImode:
7141 case V16HImode:
7142 case V4DFmode:
7143 case V4DImode:
7144 case TImode:
7145 case V16QImode:
7146 case V8HImode:
7147 case V4SImode:
7148 case V2DImode:
7149 case V4SFmode:
7150 case V2DFmode:
7151 if (!type || !AGGREGATE_TYPE_P (type))
7152 {
7153 cum->sse_words += words;
7154 cum->sse_nregs -= 1;
7155 cum->sse_regno += 1;
7156 if (cum->sse_nregs <= 0)
7157 {
7158 cum->sse_nregs = 0;
7159 cum->sse_regno = 0;
7160 }
7161 }
7162 break;
7163
7164 case V8QImode:
7165 case V4HImode:
7166 case V2SImode:
7167 case V2SFmode:
7168 case V1TImode:
7169 case V1DImode:
7170 if (!type || !AGGREGATE_TYPE_P (type))
7171 {
7172 cum->mmx_words += words;
7173 cum->mmx_nregs -= 1;
7174 cum->mmx_regno += 1;
7175 if (cum->mmx_nregs <= 0)
7176 {
7177 cum->mmx_nregs = 0;
7178 cum->mmx_regno = 0;
7179 }
7180 }
7181 break;
7182 }
7183 }
7184
7185 static void
7186 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7187 const_tree type, HOST_WIDE_INT words, bool named)
7188 {
7189 int int_nregs, sse_nregs;
7190
7191 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7192 if (!named && (VALID_AVX512F_REG_MODE (mode)
7193 || VALID_AVX256_REG_MODE (mode)))
7194 return;
7195
7196 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7197 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7198 {
7199 cum->nregs -= int_nregs;
7200 cum->sse_nregs -= sse_nregs;
7201 cum->regno += int_nregs;
7202 cum->sse_regno += sse_nregs;
7203 }
7204 else
7205 {
7206 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7207 cum->words = (cum->words + align - 1) & ~(align - 1);
7208 cum->words += words;
7209 }
7210 }
7211
7212 static void
7213 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7214 HOST_WIDE_INT words)
7215 {
7216 /* Otherwise, this should be passed indirect. */
7217 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7218
7219 cum->words += words;
7220 if (cum->nregs > 0)
7221 {
7222 cum->nregs -= 1;
7223 cum->regno += 1;
7224 }
7225 }
7226
7227 /* Update the data in CUM to advance over an argument of mode MODE and
7228 data type TYPE. (TYPE is null for libcalls where that information
7229 may not be available.) */
7230
7231 static void
7232 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7233 const_tree type, bool named)
7234 {
7235 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7236 HOST_WIDE_INT bytes, words;
7237
7238 if (mode == BLKmode)
7239 bytes = int_size_in_bytes (type);
7240 else
7241 bytes = GET_MODE_SIZE (mode);
7242 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7243
7244 if (type)
7245 mode = type_natural_mode (type, NULL, false);
7246
7247 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7248 function_arg_advance_ms_64 (cum, bytes, words);
7249 else if (TARGET_64BIT)
7250 function_arg_advance_64 (cum, mode, type, words, named);
7251 else
7252 function_arg_advance_32 (cum, mode, type, bytes, words);
7253 }
7254
7255 /* Define where to put the arguments to a function.
7256 Value is zero to push the argument on the stack,
7257 or a hard register in which to store the argument.
7258
7259 MODE is the argument's machine mode.
7260 TYPE is the data type of the argument (as a tree).
7261 This is null for libcalls where that information may
7262 not be available.
7263 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7264 the preceding args and about the function being called.
7265 NAMED is nonzero if this argument is a named parameter
7266 (otherwise it is an extra parameter matching an ellipsis). */
7267
7268 static rtx
7269 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7270 enum machine_mode orig_mode, const_tree type,
7271 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7272 {
7273 /* Avoid the AL settings for the Unix64 ABI. */
7274 if (mode == VOIDmode)
7275 return constm1_rtx;
7276
7277 switch (mode)
7278 {
7279 default:
7280 break;
7281
7282 case BLKmode:
7283 if (bytes < 0)
7284 break;
7285 /* FALLTHRU */
7286 case DImode:
7287 case SImode:
7288 case HImode:
7289 case QImode:
7290 if (words <= cum->nregs)
7291 {
7292 int regno = cum->regno;
7293
7294 /* Fastcall allocates the first two DWORD (SImode) or
7295 smaller arguments to ECX and EDX if it isn't an
7296 aggregate type . */
7297 if (cum->fastcall)
7298 {
7299 if (mode == BLKmode
7300 || mode == DImode
7301 || (type && AGGREGATE_TYPE_P (type)))
7302 break;
7303
7304 /* ECX not EAX is the first allocated register. */
7305 if (regno == AX_REG)
7306 regno = CX_REG;
7307 }
7308 return gen_rtx_REG (mode, regno);
7309 }
7310 break;
7311
7312 case DFmode:
7313 if (cum->float_in_sse < 2)
7314 break;
7315 case SFmode:
7316 if (cum->float_in_sse < 1)
7317 break;
7318 /* FALLTHRU */
7319 case TImode:
7320 /* In 32bit, we pass TImode in xmm registers. */
7321 case V16QImode:
7322 case V8HImode:
7323 case V4SImode:
7324 case V2DImode:
7325 case V4SFmode:
7326 case V2DFmode:
7327 if (!type || !AGGREGATE_TYPE_P (type))
7328 {
7329 if (cum->sse_nregs)
7330 return gen_reg_or_parallel (mode, orig_mode,
7331 cum->sse_regno + FIRST_SSE_REG);
7332 }
7333 break;
7334
7335 case OImode:
7336 case XImode:
7337 /* OImode and XImode shouldn't be used directly. */
7338 gcc_unreachable ();
7339
7340 case V64QImode:
7341 case V32HImode:
7342 case V16SImode:
7343 case V8DImode:
7344 case V16SFmode:
7345 case V8DFmode:
7346 case V8SFmode:
7347 case V8SImode:
7348 case V32QImode:
7349 case V16HImode:
7350 case V4DFmode:
7351 case V4DImode:
7352 if (!type || !AGGREGATE_TYPE_P (type))
7353 {
7354 if (cum->sse_nregs)
7355 return gen_reg_or_parallel (mode, orig_mode,
7356 cum->sse_regno + FIRST_SSE_REG);
7357 }
7358 break;
7359
7360 case V8QImode:
7361 case V4HImode:
7362 case V2SImode:
7363 case V2SFmode:
7364 case V1TImode:
7365 case V1DImode:
7366 if (!type || !AGGREGATE_TYPE_P (type))
7367 {
7368 if (cum->mmx_nregs)
7369 return gen_reg_or_parallel (mode, orig_mode,
7370 cum->mmx_regno + FIRST_MMX_REG);
7371 }
7372 break;
7373 }
7374
7375 return NULL_RTX;
7376 }
7377
7378 static rtx
7379 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7380 enum machine_mode orig_mode, const_tree type, bool named)
7381 {
7382 /* Handle a hidden AL argument containing number of registers
7383 for varargs x86-64 functions. */
7384 if (mode == VOIDmode)
7385 return GEN_INT (cum->maybe_vaarg
7386 ? (cum->sse_nregs < 0
7387 ? X86_64_SSE_REGPARM_MAX
7388 : cum->sse_regno)
7389 : -1);
7390
7391 switch (mode)
7392 {
7393 default:
7394 break;
7395
7396 case V8SFmode:
7397 case V8SImode:
7398 case V32QImode:
7399 case V16HImode:
7400 case V4DFmode:
7401 case V4DImode:
7402 case V16SFmode:
7403 case V16SImode:
7404 case V64QImode:
7405 case V32HImode:
7406 case V8DFmode:
7407 case V8DImode:
7408 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7409 if (!named)
7410 return NULL;
7411 break;
7412 }
7413
7414 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7415 cum->sse_nregs,
7416 &x86_64_int_parameter_registers [cum->regno],
7417 cum->sse_regno);
7418 }
7419
7420 static rtx
7421 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7422 enum machine_mode orig_mode, bool named,
7423 HOST_WIDE_INT bytes)
7424 {
7425 unsigned int regno;
7426
7427 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7428 We use value of -2 to specify that current function call is MSABI. */
7429 if (mode == VOIDmode)
7430 return GEN_INT (-2);
7431
7432 /* If we've run out of registers, it goes on the stack. */
7433 if (cum->nregs == 0)
7434 return NULL_RTX;
7435
7436 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7437
7438 /* Only floating point modes are passed in anything but integer regs. */
7439 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7440 {
7441 if (named)
7442 regno = cum->regno + FIRST_SSE_REG;
7443 else
7444 {
7445 rtx t1, t2;
7446
7447 /* Unnamed floating parameters are passed in both the
7448 SSE and integer registers. */
7449 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7450 t2 = gen_rtx_REG (mode, regno);
7451 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7452 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7453 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7454 }
7455 }
7456 /* Handle aggregated types passed in register. */
7457 if (orig_mode == BLKmode)
7458 {
7459 if (bytes > 0 && bytes <= 8)
7460 mode = (bytes > 4 ? DImode : SImode);
7461 if (mode == BLKmode)
7462 mode = DImode;
7463 }
7464
7465 return gen_reg_or_parallel (mode, orig_mode, regno);
7466 }
7467
7468 /* Return where to put the arguments to a function.
7469 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7470
7471 MODE is the argument's machine mode. TYPE is the data type of the
7472 argument. It is null for libcalls where that information may not be
7473 available. CUM gives information about the preceding args and about
7474 the function being called. NAMED is nonzero if this argument is a
7475 named parameter (otherwise it is an extra parameter matching an
7476 ellipsis). */
7477
7478 static rtx
7479 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7480 const_tree type, bool named)
7481 {
7482 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7483 enum machine_mode mode = omode;
7484 HOST_WIDE_INT bytes, words;
7485 rtx arg;
7486
7487 if (mode == BLKmode)
7488 bytes = int_size_in_bytes (type);
7489 else
7490 bytes = GET_MODE_SIZE (mode);
7491 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7492
7493 /* To simplify the code below, represent vector types with a vector mode
7494 even if MMX/SSE are not active. */
7495 if (type && TREE_CODE (type) == VECTOR_TYPE)
7496 mode = type_natural_mode (type, cum, false);
7497
7498 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7499 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7500 else if (TARGET_64BIT)
7501 arg = function_arg_64 (cum, mode, omode, type, named);
7502 else
7503 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7504
7505 return arg;
7506 }
7507
7508 /* A C expression that indicates when an argument must be passed by
7509 reference. If nonzero for an argument, a copy of that argument is
7510 made in memory and a pointer to the argument is passed instead of
7511 the argument itself. The pointer is passed in whatever way is
7512 appropriate for passing a pointer to that type. */
7513
7514 static bool
7515 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7516 const_tree type, bool named ATTRIBUTE_UNUSED)
7517 {
7518 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7519
7520 /* See Windows x64 Software Convention. */
7521 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7522 {
7523 int msize = (int) GET_MODE_SIZE (mode);
7524 if (type)
7525 {
7526 /* Arrays are passed by reference. */
7527 if (TREE_CODE (type) == ARRAY_TYPE)
7528 return true;
7529
7530 if (AGGREGATE_TYPE_P (type))
7531 {
7532 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7533 are passed by reference. */
7534 msize = int_size_in_bytes (type);
7535 }
7536 }
7537
7538 /* __m128 is passed by reference. */
7539 switch (msize) {
7540 case 1: case 2: case 4: case 8:
7541 break;
7542 default:
7543 return true;
7544 }
7545 }
7546 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7547 return 1;
7548
7549 return 0;
7550 }
7551
7552 /* Return true when TYPE should be 128bit aligned for 32bit argument
7553 passing ABI. XXX: This function is obsolete and is only used for
7554 checking psABI compatibility with previous versions of GCC. */
7555
7556 static bool
7557 ix86_compat_aligned_value_p (const_tree type)
7558 {
7559 enum machine_mode mode = TYPE_MODE (type);
7560 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7561 || mode == TDmode
7562 || mode == TFmode
7563 || mode == TCmode)
7564 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7565 return true;
7566 if (TYPE_ALIGN (type) < 128)
7567 return false;
7568
7569 if (AGGREGATE_TYPE_P (type))
7570 {
7571 /* Walk the aggregates recursively. */
7572 switch (TREE_CODE (type))
7573 {
7574 case RECORD_TYPE:
7575 case UNION_TYPE:
7576 case QUAL_UNION_TYPE:
7577 {
7578 tree field;
7579
7580 /* Walk all the structure fields. */
7581 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7582 {
7583 if (TREE_CODE (field) == FIELD_DECL
7584 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7585 return true;
7586 }
7587 break;
7588 }
7589
7590 case ARRAY_TYPE:
7591 /* Just for use if some languages passes arrays by value. */
7592 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7593 return true;
7594 break;
7595
7596 default:
7597 gcc_unreachable ();
7598 }
7599 }
7600 return false;
7601 }
7602
7603 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7604 XXX: This function is obsolete and is only used for checking psABI
7605 compatibility with previous versions of GCC. */
7606
7607 static unsigned int
7608 ix86_compat_function_arg_boundary (enum machine_mode mode,
7609 const_tree type, unsigned int align)
7610 {
7611 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7612 natural boundaries. */
7613 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7614 {
7615 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7616 make an exception for SSE modes since these require 128bit
7617 alignment.
7618
7619 The handling here differs from field_alignment. ICC aligns MMX
7620 arguments to 4 byte boundaries, while structure fields are aligned
7621 to 8 byte boundaries. */
7622 if (!type)
7623 {
7624 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7625 align = PARM_BOUNDARY;
7626 }
7627 else
7628 {
7629 if (!ix86_compat_aligned_value_p (type))
7630 align = PARM_BOUNDARY;
7631 }
7632 }
7633 if (align > BIGGEST_ALIGNMENT)
7634 align = BIGGEST_ALIGNMENT;
7635 return align;
7636 }
7637
7638 /* Return true when TYPE should be 128bit aligned for 32bit argument
7639 passing ABI. */
7640
7641 static bool
7642 ix86_contains_aligned_value_p (const_tree type)
7643 {
7644 enum machine_mode mode = TYPE_MODE (type);
7645
7646 if (mode == XFmode || mode == XCmode)
7647 return false;
7648
7649 if (TYPE_ALIGN (type) < 128)
7650 return false;
7651
7652 if (AGGREGATE_TYPE_P (type))
7653 {
7654 /* Walk the aggregates recursively. */
7655 switch (TREE_CODE (type))
7656 {
7657 case RECORD_TYPE:
7658 case UNION_TYPE:
7659 case QUAL_UNION_TYPE:
7660 {
7661 tree field;
7662
7663 /* Walk all the structure fields. */
7664 for (field = TYPE_FIELDS (type);
7665 field;
7666 field = DECL_CHAIN (field))
7667 {
7668 if (TREE_CODE (field) == FIELD_DECL
7669 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7670 return true;
7671 }
7672 break;
7673 }
7674
7675 case ARRAY_TYPE:
7676 /* Just for use if some languages passes arrays by value. */
7677 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7678 return true;
7679 break;
7680
7681 default:
7682 gcc_unreachable ();
7683 }
7684 }
7685 else
7686 return TYPE_ALIGN (type) >= 128;
7687
7688 return false;
7689 }
7690
7691 /* Gives the alignment boundary, in bits, of an argument with the
7692 specified mode and type. */
7693
7694 static unsigned int
7695 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7696 {
7697 unsigned int align;
7698 if (type)
7699 {
7700 /* Since the main variant type is used for call, we convert it to
7701 the main variant type. */
7702 type = TYPE_MAIN_VARIANT (type);
7703 align = TYPE_ALIGN (type);
7704 }
7705 else
7706 align = GET_MODE_ALIGNMENT (mode);
7707 if (align < PARM_BOUNDARY)
7708 align = PARM_BOUNDARY;
7709 else
7710 {
7711 static bool warned;
7712 unsigned int saved_align = align;
7713
7714 if (!TARGET_64BIT)
7715 {
7716 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7717 if (!type)
7718 {
7719 if (mode == XFmode || mode == XCmode)
7720 align = PARM_BOUNDARY;
7721 }
7722 else if (!ix86_contains_aligned_value_p (type))
7723 align = PARM_BOUNDARY;
7724
7725 if (align < 128)
7726 align = PARM_BOUNDARY;
7727 }
7728
7729 if (warn_psabi
7730 && !warned
7731 && align != ix86_compat_function_arg_boundary (mode, type,
7732 saved_align))
7733 {
7734 warned = true;
7735 inform (input_location,
7736 "The ABI for passing parameters with %d-byte"
7737 " alignment has changed in GCC 4.6",
7738 align / BITS_PER_UNIT);
7739 }
7740 }
7741
7742 return align;
7743 }
7744
7745 /* Return true if N is a possible register number of function value. */
7746
7747 static bool
7748 ix86_function_value_regno_p (const unsigned int regno)
7749 {
7750 switch (regno)
7751 {
7752 case AX_REG:
7753 case DX_REG:
7754 return true;
7755 case DI_REG:
7756 case SI_REG:
7757 return TARGET_64BIT && ix86_abi != MS_ABI;
7758
7759 /* Complex values are returned in %st(0)/%st(1) pair. */
7760 case ST0_REG:
7761 case ST1_REG:
7762 /* TODO: The function should depend on current function ABI but
7763 builtins.c would need updating then. Therefore we use the
7764 default ABI. */
7765 if (TARGET_64BIT && ix86_abi == MS_ABI)
7766 return false;
7767 return TARGET_FLOAT_RETURNS_IN_80387;
7768
7769 /* Complex values are returned in %xmm0/%xmm1 pair. */
7770 case XMM0_REG:
7771 case XMM1_REG:
7772 return TARGET_SSE;
7773
7774 case MM0_REG:
7775 if (TARGET_MACHO || TARGET_64BIT)
7776 return false;
7777 return TARGET_MMX;
7778 }
7779
7780 return false;
7781 }
7782
7783 /* Define how to find the value returned by a function.
7784 VALTYPE is the data type of the value (as a tree).
7785 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7786 otherwise, FUNC is 0. */
7787
7788 static rtx
7789 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7790 const_tree fntype, const_tree fn)
7791 {
7792 unsigned int regno;
7793
7794 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7795 we normally prevent this case when mmx is not available. However
7796 some ABIs may require the result to be returned like DImode. */
7797 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7798 regno = FIRST_MMX_REG;
7799
7800 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7801 we prevent this case when sse is not available. However some ABIs
7802 may require the result to be returned like integer TImode. */
7803 else if (mode == TImode
7804 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7805 regno = FIRST_SSE_REG;
7806
7807 /* 32-byte vector modes in %ymm0. */
7808 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7809 regno = FIRST_SSE_REG;
7810
7811 /* 64-byte vector modes in %zmm0. */
7812 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7813 regno = FIRST_SSE_REG;
7814
7815 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7816 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7817 regno = FIRST_FLOAT_REG;
7818 else
7819 /* Most things go in %eax. */
7820 regno = AX_REG;
7821
7822 /* Override FP return register with %xmm0 for local functions when
7823 SSE math is enabled or for functions with sseregparm attribute. */
7824 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7825 {
7826 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7827 if ((sse_level >= 1 && mode == SFmode)
7828 || (sse_level == 2 && mode == DFmode))
7829 regno = FIRST_SSE_REG;
7830 }
7831
7832 /* OImode shouldn't be used directly. */
7833 gcc_assert (mode != OImode);
7834
7835 return gen_rtx_REG (orig_mode, regno);
7836 }
7837
7838 static rtx
7839 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7840 const_tree valtype)
7841 {
7842 rtx ret;
7843
7844 /* Handle libcalls, which don't provide a type node. */
7845 if (valtype == NULL)
7846 {
7847 unsigned int regno;
7848
7849 switch (mode)
7850 {
7851 case SFmode:
7852 case SCmode:
7853 case DFmode:
7854 case DCmode:
7855 case TFmode:
7856 case SDmode:
7857 case DDmode:
7858 case TDmode:
7859 regno = FIRST_SSE_REG;
7860 break;
7861 case XFmode:
7862 case XCmode:
7863 regno = FIRST_FLOAT_REG;
7864 break;
7865 case TCmode:
7866 return NULL;
7867 default:
7868 regno = AX_REG;
7869 }
7870
7871 return gen_rtx_REG (mode, regno);
7872 }
7873 else if (POINTER_TYPE_P (valtype))
7874 {
7875 /* Pointers are always returned in word_mode. */
7876 mode = word_mode;
7877 }
7878
7879 ret = construct_container (mode, orig_mode, valtype, 1,
7880 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7881 x86_64_int_return_registers, 0);
7882
7883 /* For zero sized structures, construct_container returns NULL, but we
7884 need to keep rest of compiler happy by returning meaningful value. */
7885 if (!ret)
7886 ret = gen_rtx_REG (orig_mode, AX_REG);
7887
7888 return ret;
7889 }
7890
7891 static rtx
7892 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7893 const_tree valtype)
7894 {
7895 unsigned int regno = AX_REG;
7896
7897 if (TARGET_SSE)
7898 {
7899 switch (GET_MODE_SIZE (mode))
7900 {
7901 case 16:
7902 if (valtype != NULL_TREE
7903 && !VECTOR_INTEGER_TYPE_P (valtype)
7904 && !VECTOR_INTEGER_TYPE_P (valtype)
7905 && !INTEGRAL_TYPE_P (valtype)
7906 && !VECTOR_FLOAT_TYPE_P (valtype))
7907 break;
7908 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7909 && !COMPLEX_MODE_P (mode))
7910 regno = FIRST_SSE_REG;
7911 break;
7912 case 8:
7913 case 4:
7914 if (mode == SFmode || mode == DFmode)
7915 regno = FIRST_SSE_REG;
7916 break;
7917 default:
7918 break;
7919 }
7920 }
7921 return gen_rtx_REG (orig_mode, regno);
7922 }
7923
7924 static rtx
7925 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7926 enum machine_mode orig_mode, enum machine_mode mode)
7927 {
7928 const_tree fn, fntype;
7929
7930 fn = NULL_TREE;
7931 if (fntype_or_decl && DECL_P (fntype_or_decl))
7932 fn = fntype_or_decl;
7933 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7934
7935 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7936 return function_value_ms_64 (orig_mode, mode, valtype);
7937 else if (TARGET_64BIT)
7938 return function_value_64 (orig_mode, mode, valtype);
7939 else
7940 return function_value_32 (orig_mode, mode, fntype, fn);
7941 }
7942
7943 static rtx
7944 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7945 bool outgoing ATTRIBUTE_UNUSED)
7946 {
7947 enum machine_mode mode, orig_mode;
7948
7949 orig_mode = TYPE_MODE (valtype);
7950 mode = type_natural_mode (valtype, NULL, true);
7951 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7952 }
7953
7954 /* Pointer function arguments and return values are promoted to
7955 word_mode. */
7956
7957 static enum machine_mode
7958 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7959 int *punsignedp, const_tree fntype,
7960 int for_return)
7961 {
7962 if (type != NULL_TREE && POINTER_TYPE_P (type))
7963 {
7964 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7965 return word_mode;
7966 }
7967 return default_promote_function_mode (type, mode, punsignedp, fntype,
7968 for_return);
7969 }
7970
7971 /* Return true if a structure, union or array with MODE containing FIELD
7972 should be accessed using BLKmode. */
7973
7974 static bool
7975 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
7976 {
7977 /* Union with XFmode must be in BLKmode. */
7978 return (mode == XFmode
7979 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
7980 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
7981 }
7982
7983 rtx
7984 ix86_libcall_value (enum machine_mode mode)
7985 {
7986 return ix86_function_value_1 (NULL, NULL, mode, mode);
7987 }
7988
7989 /* Return true iff type is returned in memory. */
7990
7991 static bool
7992 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7993 {
7994 #ifdef SUBTARGET_RETURN_IN_MEMORY
7995 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7996 #else
7997 const enum machine_mode mode = type_natural_mode (type, NULL, true);
7998 HOST_WIDE_INT size;
7999
8000 if (TARGET_64BIT)
8001 {
8002 if (ix86_function_type_abi (fntype) == MS_ABI)
8003 {
8004 size = int_size_in_bytes (type);
8005
8006 /* __m128 is returned in xmm0. */
8007 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8008 || INTEGRAL_TYPE_P (type)
8009 || VECTOR_FLOAT_TYPE_P (type))
8010 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8011 && !COMPLEX_MODE_P (mode)
8012 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8013 return false;
8014
8015 /* Otherwise, the size must be exactly in [1248]. */
8016 return size != 1 && size != 2 && size != 4 && size != 8;
8017 }
8018 else
8019 {
8020 int needed_intregs, needed_sseregs;
8021
8022 return examine_argument (mode, type, 1,
8023 &needed_intregs, &needed_sseregs);
8024 }
8025 }
8026 else
8027 {
8028 if (mode == BLKmode)
8029 return true;
8030
8031 size = int_size_in_bytes (type);
8032
8033 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8034 return false;
8035
8036 if (VECTOR_MODE_P (mode) || mode == TImode)
8037 {
8038 /* User-created vectors small enough to fit in EAX. */
8039 if (size < 8)
8040 return false;
8041
8042 /* Unless ABI prescibes otherwise,
8043 MMX/3dNow values are returned in MM0 if available. */
8044
8045 if (size == 8)
8046 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8047
8048 /* SSE values are returned in XMM0 if available. */
8049 if (size == 16)
8050 return !TARGET_SSE;
8051
8052 /* AVX values are returned in YMM0 if available. */
8053 if (size == 32)
8054 return !TARGET_AVX;
8055
8056 /* AVX512F values are returned in ZMM0 if available. */
8057 if (size == 64)
8058 return !TARGET_AVX512F;
8059 }
8060
8061 if (mode == XFmode)
8062 return false;
8063
8064 if (size > 12)
8065 return true;
8066
8067 /* OImode shouldn't be used directly. */
8068 gcc_assert (mode != OImode);
8069
8070 return false;
8071 }
8072 #endif
8073 }
8074
8075 \f
8076 /* Create the va_list data type. */
8077
8078 /* Returns the calling convention specific va_list date type.
8079 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8080
8081 static tree
8082 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8083 {
8084 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8085
8086 /* For i386 we use plain pointer to argument area. */
8087 if (!TARGET_64BIT || abi == MS_ABI)
8088 return build_pointer_type (char_type_node);
8089
8090 record = lang_hooks.types.make_type (RECORD_TYPE);
8091 type_decl = build_decl (BUILTINS_LOCATION,
8092 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8093
8094 f_gpr = build_decl (BUILTINS_LOCATION,
8095 FIELD_DECL, get_identifier ("gp_offset"),
8096 unsigned_type_node);
8097 f_fpr = build_decl (BUILTINS_LOCATION,
8098 FIELD_DECL, get_identifier ("fp_offset"),
8099 unsigned_type_node);
8100 f_ovf = build_decl (BUILTINS_LOCATION,
8101 FIELD_DECL, get_identifier ("overflow_arg_area"),
8102 ptr_type_node);
8103 f_sav = build_decl (BUILTINS_LOCATION,
8104 FIELD_DECL, get_identifier ("reg_save_area"),
8105 ptr_type_node);
8106
8107 va_list_gpr_counter_field = f_gpr;
8108 va_list_fpr_counter_field = f_fpr;
8109
8110 DECL_FIELD_CONTEXT (f_gpr) = record;
8111 DECL_FIELD_CONTEXT (f_fpr) = record;
8112 DECL_FIELD_CONTEXT (f_ovf) = record;
8113 DECL_FIELD_CONTEXT (f_sav) = record;
8114
8115 TYPE_STUB_DECL (record) = type_decl;
8116 TYPE_NAME (record) = type_decl;
8117 TYPE_FIELDS (record) = f_gpr;
8118 DECL_CHAIN (f_gpr) = f_fpr;
8119 DECL_CHAIN (f_fpr) = f_ovf;
8120 DECL_CHAIN (f_ovf) = f_sav;
8121
8122 layout_type (record);
8123
8124 /* The correct type is an array type of one element. */
8125 return build_array_type (record, build_index_type (size_zero_node));
8126 }
8127
8128 /* Setup the builtin va_list data type and for 64-bit the additional
8129 calling convention specific va_list data types. */
8130
8131 static tree
8132 ix86_build_builtin_va_list (void)
8133 {
8134 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8135
8136 /* Initialize abi specific va_list builtin types. */
8137 if (TARGET_64BIT)
8138 {
8139 tree t;
8140 if (ix86_abi == MS_ABI)
8141 {
8142 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8143 if (TREE_CODE (t) != RECORD_TYPE)
8144 t = build_variant_type_copy (t);
8145 sysv_va_list_type_node = t;
8146 }
8147 else
8148 {
8149 t = ret;
8150 if (TREE_CODE (t) != RECORD_TYPE)
8151 t = build_variant_type_copy (t);
8152 sysv_va_list_type_node = t;
8153 }
8154 if (ix86_abi != MS_ABI)
8155 {
8156 t = ix86_build_builtin_va_list_abi (MS_ABI);
8157 if (TREE_CODE (t) != RECORD_TYPE)
8158 t = build_variant_type_copy (t);
8159 ms_va_list_type_node = t;
8160 }
8161 else
8162 {
8163 t = ret;
8164 if (TREE_CODE (t) != RECORD_TYPE)
8165 t = build_variant_type_copy (t);
8166 ms_va_list_type_node = t;
8167 }
8168 }
8169
8170 return ret;
8171 }
8172
8173 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8174
8175 static void
8176 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8177 {
8178 rtx save_area, mem;
8179 alias_set_type set;
8180 int i, max;
8181
8182 /* GPR size of varargs save area. */
8183 if (cfun->va_list_gpr_size)
8184 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8185 else
8186 ix86_varargs_gpr_size = 0;
8187
8188 /* FPR size of varargs save area. We don't need it if we don't pass
8189 anything in SSE registers. */
8190 if (TARGET_SSE && cfun->va_list_fpr_size)
8191 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8192 else
8193 ix86_varargs_fpr_size = 0;
8194
8195 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8196 return;
8197
8198 save_area = frame_pointer_rtx;
8199 set = get_varargs_alias_set ();
8200
8201 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8202 if (max > X86_64_REGPARM_MAX)
8203 max = X86_64_REGPARM_MAX;
8204
8205 for (i = cum->regno; i < max; i++)
8206 {
8207 mem = gen_rtx_MEM (word_mode,
8208 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8209 MEM_NOTRAP_P (mem) = 1;
8210 set_mem_alias_set (mem, set);
8211 emit_move_insn (mem,
8212 gen_rtx_REG (word_mode,
8213 x86_64_int_parameter_registers[i]));
8214 }
8215
8216 if (ix86_varargs_fpr_size)
8217 {
8218 enum machine_mode smode;
8219 rtx label, test;
8220
8221 /* Now emit code to save SSE registers. The AX parameter contains number
8222 of SSE parameter registers used to call this function, though all we
8223 actually check here is the zero/non-zero status. */
8224
8225 label = gen_label_rtx ();
8226 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8227 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8228 label));
8229
8230 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8231 we used movdqa (i.e. TImode) instead? Perhaps even better would
8232 be if we could determine the real mode of the data, via a hook
8233 into pass_stdarg. Ignore all that for now. */
8234 smode = V4SFmode;
8235 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8236 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8237
8238 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8239 if (max > X86_64_SSE_REGPARM_MAX)
8240 max = X86_64_SSE_REGPARM_MAX;
8241
8242 for (i = cum->sse_regno; i < max; ++i)
8243 {
8244 mem = plus_constant (Pmode, save_area,
8245 i * 16 + ix86_varargs_gpr_size);
8246 mem = gen_rtx_MEM (smode, mem);
8247 MEM_NOTRAP_P (mem) = 1;
8248 set_mem_alias_set (mem, set);
8249 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8250
8251 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8252 }
8253
8254 emit_label (label);
8255 }
8256 }
8257
8258 static void
8259 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8260 {
8261 alias_set_type set = get_varargs_alias_set ();
8262 int i;
8263
8264 /* Reset to zero, as there might be a sysv vaarg used
8265 before. */
8266 ix86_varargs_gpr_size = 0;
8267 ix86_varargs_fpr_size = 0;
8268
8269 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8270 {
8271 rtx reg, mem;
8272
8273 mem = gen_rtx_MEM (Pmode,
8274 plus_constant (Pmode, virtual_incoming_args_rtx,
8275 i * UNITS_PER_WORD));
8276 MEM_NOTRAP_P (mem) = 1;
8277 set_mem_alias_set (mem, set);
8278
8279 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8280 emit_move_insn (mem, reg);
8281 }
8282 }
8283
8284 static void
8285 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8286 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8287 int no_rtl)
8288 {
8289 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8290 CUMULATIVE_ARGS next_cum;
8291 tree fntype;
8292
8293 /* This argument doesn't appear to be used anymore. Which is good,
8294 because the old code here didn't suppress rtl generation. */
8295 gcc_assert (!no_rtl);
8296
8297 if (!TARGET_64BIT)
8298 return;
8299
8300 fntype = TREE_TYPE (current_function_decl);
8301
8302 /* For varargs, we do not want to skip the dummy va_dcl argument.
8303 For stdargs, we do want to skip the last named argument. */
8304 next_cum = *cum;
8305 if (stdarg_p (fntype))
8306 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8307 true);
8308
8309 if (cum->call_abi == MS_ABI)
8310 setup_incoming_varargs_ms_64 (&next_cum);
8311 else
8312 setup_incoming_varargs_64 (&next_cum);
8313 }
8314
8315 /* Checks if TYPE is of kind va_list char *. */
8316
8317 static bool
8318 is_va_list_char_pointer (tree type)
8319 {
8320 tree canonic;
8321
8322 /* For 32-bit it is always true. */
8323 if (!TARGET_64BIT)
8324 return true;
8325 canonic = ix86_canonical_va_list_type (type);
8326 return (canonic == ms_va_list_type_node
8327 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8328 }
8329
8330 /* Implement va_start. */
8331
8332 static void
8333 ix86_va_start (tree valist, rtx nextarg)
8334 {
8335 HOST_WIDE_INT words, n_gpr, n_fpr;
8336 tree f_gpr, f_fpr, f_ovf, f_sav;
8337 tree gpr, fpr, ovf, sav, t;
8338 tree type;
8339 rtx ovf_rtx;
8340
8341 if (flag_split_stack
8342 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8343 {
8344 unsigned int scratch_regno;
8345
8346 /* When we are splitting the stack, we can't refer to the stack
8347 arguments using internal_arg_pointer, because they may be on
8348 the old stack. The split stack prologue will arrange to
8349 leave a pointer to the old stack arguments in a scratch
8350 register, which we here copy to a pseudo-register. The split
8351 stack prologue can't set the pseudo-register directly because
8352 it (the prologue) runs before any registers have been saved. */
8353
8354 scratch_regno = split_stack_prologue_scratch_regno ();
8355 if (scratch_regno != INVALID_REGNUM)
8356 {
8357 rtx reg, seq;
8358
8359 reg = gen_reg_rtx (Pmode);
8360 cfun->machine->split_stack_varargs_pointer = reg;
8361
8362 start_sequence ();
8363 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8364 seq = get_insns ();
8365 end_sequence ();
8366
8367 push_topmost_sequence ();
8368 emit_insn_after (seq, entry_of_function ());
8369 pop_topmost_sequence ();
8370 }
8371 }
8372
8373 /* Only 64bit target needs something special. */
8374 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8375 {
8376 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8377 std_expand_builtin_va_start (valist, nextarg);
8378 else
8379 {
8380 rtx va_r, next;
8381
8382 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8383 next = expand_binop (ptr_mode, add_optab,
8384 cfun->machine->split_stack_varargs_pointer,
8385 crtl->args.arg_offset_rtx,
8386 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8387 convert_move (va_r, next, 0);
8388 }
8389 return;
8390 }
8391
8392 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8393 f_fpr = DECL_CHAIN (f_gpr);
8394 f_ovf = DECL_CHAIN (f_fpr);
8395 f_sav = DECL_CHAIN (f_ovf);
8396
8397 valist = build_simple_mem_ref (valist);
8398 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8399 /* The following should be folded into the MEM_REF offset. */
8400 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8401 f_gpr, NULL_TREE);
8402 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8403 f_fpr, NULL_TREE);
8404 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8405 f_ovf, NULL_TREE);
8406 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8407 f_sav, NULL_TREE);
8408
8409 /* Count number of gp and fp argument registers used. */
8410 words = crtl->args.info.words;
8411 n_gpr = crtl->args.info.regno;
8412 n_fpr = crtl->args.info.sse_regno;
8413
8414 if (cfun->va_list_gpr_size)
8415 {
8416 type = TREE_TYPE (gpr);
8417 t = build2 (MODIFY_EXPR, type,
8418 gpr, build_int_cst (type, n_gpr * 8));
8419 TREE_SIDE_EFFECTS (t) = 1;
8420 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8421 }
8422
8423 if (TARGET_SSE && cfun->va_list_fpr_size)
8424 {
8425 type = TREE_TYPE (fpr);
8426 t = build2 (MODIFY_EXPR, type, fpr,
8427 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8428 TREE_SIDE_EFFECTS (t) = 1;
8429 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8430 }
8431
8432 /* Find the overflow area. */
8433 type = TREE_TYPE (ovf);
8434 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8435 ovf_rtx = crtl->args.internal_arg_pointer;
8436 else
8437 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8438 t = make_tree (type, ovf_rtx);
8439 if (words != 0)
8440 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8441 t = build2 (MODIFY_EXPR, type, ovf, t);
8442 TREE_SIDE_EFFECTS (t) = 1;
8443 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8444
8445 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8446 {
8447 /* Find the register save area.
8448 Prologue of the function save it right above stack frame. */
8449 type = TREE_TYPE (sav);
8450 t = make_tree (type, frame_pointer_rtx);
8451 if (!ix86_varargs_gpr_size)
8452 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8453 t = build2 (MODIFY_EXPR, type, sav, t);
8454 TREE_SIDE_EFFECTS (t) = 1;
8455 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8456 }
8457 }
8458
8459 /* Implement va_arg. */
8460
8461 static tree
8462 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8463 gimple_seq *post_p)
8464 {
8465 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8466 tree f_gpr, f_fpr, f_ovf, f_sav;
8467 tree gpr, fpr, ovf, sav, t;
8468 int size, rsize;
8469 tree lab_false, lab_over = NULL_TREE;
8470 tree addr, t2;
8471 rtx container;
8472 int indirect_p = 0;
8473 tree ptrtype;
8474 enum machine_mode nat_mode;
8475 unsigned int arg_boundary;
8476
8477 /* Only 64bit target needs something special. */
8478 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8479 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8480
8481 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8482 f_fpr = DECL_CHAIN (f_gpr);
8483 f_ovf = DECL_CHAIN (f_fpr);
8484 f_sav = DECL_CHAIN (f_ovf);
8485
8486 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8487 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8488 valist = build_va_arg_indirect_ref (valist);
8489 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8490 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8491 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8492
8493 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8494 if (indirect_p)
8495 type = build_pointer_type (type);
8496 size = int_size_in_bytes (type);
8497 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8498
8499 nat_mode = type_natural_mode (type, NULL, false);
8500 switch (nat_mode)
8501 {
8502 case V8SFmode:
8503 case V8SImode:
8504 case V32QImode:
8505 case V16HImode:
8506 case V4DFmode:
8507 case V4DImode:
8508 case V16SFmode:
8509 case V16SImode:
8510 case V64QImode:
8511 case V32HImode:
8512 case V8DFmode:
8513 case V8DImode:
8514 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8515 if (!TARGET_64BIT_MS_ABI)
8516 {
8517 container = NULL;
8518 break;
8519 }
8520
8521 default:
8522 container = construct_container (nat_mode, TYPE_MODE (type),
8523 type, 0, X86_64_REGPARM_MAX,
8524 X86_64_SSE_REGPARM_MAX, intreg,
8525 0);
8526 break;
8527 }
8528
8529 /* Pull the value out of the saved registers. */
8530
8531 addr = create_tmp_var (ptr_type_node, "addr");
8532
8533 if (container)
8534 {
8535 int needed_intregs, needed_sseregs;
8536 bool need_temp;
8537 tree int_addr, sse_addr;
8538
8539 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8540 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8541
8542 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8543
8544 need_temp = (!REG_P (container)
8545 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8546 || TYPE_ALIGN (type) > 128));
8547
8548 /* In case we are passing structure, verify that it is consecutive block
8549 on the register save area. If not we need to do moves. */
8550 if (!need_temp && !REG_P (container))
8551 {
8552 /* Verify that all registers are strictly consecutive */
8553 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8554 {
8555 int i;
8556
8557 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8558 {
8559 rtx slot = XVECEXP (container, 0, i);
8560 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8561 || INTVAL (XEXP (slot, 1)) != i * 16)
8562 need_temp = 1;
8563 }
8564 }
8565 else
8566 {
8567 int i;
8568
8569 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8570 {
8571 rtx slot = XVECEXP (container, 0, i);
8572 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8573 || INTVAL (XEXP (slot, 1)) != i * 8)
8574 need_temp = 1;
8575 }
8576 }
8577 }
8578 if (!need_temp)
8579 {
8580 int_addr = addr;
8581 sse_addr = addr;
8582 }
8583 else
8584 {
8585 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8586 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8587 }
8588
8589 /* First ensure that we fit completely in registers. */
8590 if (needed_intregs)
8591 {
8592 t = build_int_cst (TREE_TYPE (gpr),
8593 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8594 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8595 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8596 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8597 gimplify_and_add (t, pre_p);
8598 }
8599 if (needed_sseregs)
8600 {
8601 t = build_int_cst (TREE_TYPE (fpr),
8602 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8603 + X86_64_REGPARM_MAX * 8);
8604 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8605 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8606 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8607 gimplify_and_add (t, pre_p);
8608 }
8609
8610 /* Compute index to start of area used for integer regs. */
8611 if (needed_intregs)
8612 {
8613 /* int_addr = gpr + sav; */
8614 t = fold_build_pointer_plus (sav, gpr);
8615 gimplify_assign (int_addr, t, pre_p);
8616 }
8617 if (needed_sseregs)
8618 {
8619 /* sse_addr = fpr + sav; */
8620 t = fold_build_pointer_plus (sav, fpr);
8621 gimplify_assign (sse_addr, t, pre_p);
8622 }
8623 if (need_temp)
8624 {
8625 int i, prev_size = 0;
8626 tree temp = create_tmp_var (type, "va_arg_tmp");
8627
8628 /* addr = &temp; */
8629 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8630 gimplify_assign (addr, t, pre_p);
8631
8632 for (i = 0; i < XVECLEN (container, 0); i++)
8633 {
8634 rtx slot = XVECEXP (container, 0, i);
8635 rtx reg = XEXP (slot, 0);
8636 enum machine_mode mode = GET_MODE (reg);
8637 tree piece_type;
8638 tree addr_type;
8639 tree daddr_type;
8640 tree src_addr, src;
8641 int src_offset;
8642 tree dest_addr, dest;
8643 int cur_size = GET_MODE_SIZE (mode);
8644
8645 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8646 prev_size = INTVAL (XEXP (slot, 1));
8647 if (prev_size + cur_size > size)
8648 {
8649 cur_size = size - prev_size;
8650 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8651 if (mode == BLKmode)
8652 mode = QImode;
8653 }
8654 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8655 if (mode == GET_MODE (reg))
8656 addr_type = build_pointer_type (piece_type);
8657 else
8658 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8659 true);
8660 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8661 true);
8662
8663 if (SSE_REGNO_P (REGNO (reg)))
8664 {
8665 src_addr = sse_addr;
8666 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8667 }
8668 else
8669 {
8670 src_addr = int_addr;
8671 src_offset = REGNO (reg) * 8;
8672 }
8673 src_addr = fold_convert (addr_type, src_addr);
8674 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8675
8676 dest_addr = fold_convert (daddr_type, addr);
8677 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8678 if (cur_size == GET_MODE_SIZE (mode))
8679 {
8680 src = build_va_arg_indirect_ref (src_addr);
8681 dest = build_va_arg_indirect_ref (dest_addr);
8682
8683 gimplify_assign (dest, src, pre_p);
8684 }
8685 else
8686 {
8687 tree copy
8688 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8689 3, dest_addr, src_addr,
8690 size_int (cur_size));
8691 gimplify_and_add (copy, pre_p);
8692 }
8693 prev_size += cur_size;
8694 }
8695 }
8696
8697 if (needed_intregs)
8698 {
8699 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8700 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8701 gimplify_assign (gpr, t, pre_p);
8702 }
8703
8704 if (needed_sseregs)
8705 {
8706 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8707 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8708 gimplify_assign (fpr, t, pre_p);
8709 }
8710
8711 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8712
8713 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8714 }
8715
8716 /* ... otherwise out of the overflow area. */
8717
8718 /* When we align parameter on stack for caller, if the parameter
8719 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8720 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8721 here with caller. */
8722 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8723 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8724 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8725
8726 /* Care for on-stack alignment if needed. */
8727 if (arg_boundary <= 64 || size == 0)
8728 t = ovf;
8729 else
8730 {
8731 HOST_WIDE_INT align = arg_boundary / 8;
8732 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8733 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8734 build_int_cst (TREE_TYPE (t), -align));
8735 }
8736
8737 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8738 gimplify_assign (addr, t, pre_p);
8739
8740 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8741 gimplify_assign (unshare_expr (ovf), t, pre_p);
8742
8743 if (container)
8744 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8745
8746 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8747 addr = fold_convert (ptrtype, addr);
8748
8749 if (indirect_p)
8750 addr = build_va_arg_indirect_ref (addr);
8751 return build_va_arg_indirect_ref (addr);
8752 }
8753 \f
8754 /* Return true if OPNUM's MEM should be matched
8755 in movabs* patterns. */
8756
8757 bool
8758 ix86_check_movabs (rtx insn, int opnum)
8759 {
8760 rtx set, mem;
8761
8762 set = PATTERN (insn);
8763 if (GET_CODE (set) == PARALLEL)
8764 set = XVECEXP (set, 0, 0);
8765 gcc_assert (GET_CODE (set) == SET);
8766 mem = XEXP (set, opnum);
8767 while (GET_CODE (mem) == SUBREG)
8768 mem = SUBREG_REG (mem);
8769 gcc_assert (MEM_P (mem));
8770 return volatile_ok || !MEM_VOLATILE_P (mem);
8771 }
8772 \f
8773 /* Initialize the table of extra 80387 mathematical constants. */
8774
8775 static void
8776 init_ext_80387_constants (void)
8777 {
8778 static const char * cst[5] =
8779 {
8780 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8781 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8782 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8783 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8784 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8785 };
8786 int i;
8787
8788 for (i = 0; i < 5; i++)
8789 {
8790 real_from_string (&ext_80387_constants_table[i], cst[i]);
8791 /* Ensure each constant is rounded to XFmode precision. */
8792 real_convert (&ext_80387_constants_table[i],
8793 XFmode, &ext_80387_constants_table[i]);
8794 }
8795
8796 ext_80387_constants_init = 1;
8797 }
8798
8799 /* Return non-zero if the constant is something that
8800 can be loaded with a special instruction. */
8801
8802 int
8803 standard_80387_constant_p (rtx x)
8804 {
8805 enum machine_mode mode = GET_MODE (x);
8806
8807 REAL_VALUE_TYPE r;
8808
8809 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8810 return -1;
8811
8812 if (x == CONST0_RTX (mode))
8813 return 1;
8814 if (x == CONST1_RTX (mode))
8815 return 2;
8816
8817 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8818
8819 /* For XFmode constants, try to find a special 80387 instruction when
8820 optimizing for size or on those CPUs that benefit from them. */
8821 if (mode == XFmode
8822 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8823 {
8824 int i;
8825
8826 if (! ext_80387_constants_init)
8827 init_ext_80387_constants ();
8828
8829 for (i = 0; i < 5; i++)
8830 if (real_identical (&r, &ext_80387_constants_table[i]))
8831 return i + 3;
8832 }
8833
8834 /* Load of the constant -0.0 or -1.0 will be split as
8835 fldz;fchs or fld1;fchs sequence. */
8836 if (real_isnegzero (&r))
8837 return 8;
8838 if (real_identical (&r, &dconstm1))
8839 return 9;
8840
8841 return 0;
8842 }
8843
8844 /* Return the opcode of the special instruction to be used to load
8845 the constant X. */
8846
8847 const char *
8848 standard_80387_constant_opcode (rtx x)
8849 {
8850 switch (standard_80387_constant_p (x))
8851 {
8852 case 1:
8853 return "fldz";
8854 case 2:
8855 return "fld1";
8856 case 3:
8857 return "fldlg2";
8858 case 4:
8859 return "fldln2";
8860 case 5:
8861 return "fldl2e";
8862 case 6:
8863 return "fldl2t";
8864 case 7:
8865 return "fldpi";
8866 case 8:
8867 case 9:
8868 return "#";
8869 default:
8870 gcc_unreachable ();
8871 }
8872 }
8873
8874 /* Return the CONST_DOUBLE representing the 80387 constant that is
8875 loaded by the specified special instruction. The argument IDX
8876 matches the return value from standard_80387_constant_p. */
8877
8878 rtx
8879 standard_80387_constant_rtx (int idx)
8880 {
8881 int i;
8882
8883 if (! ext_80387_constants_init)
8884 init_ext_80387_constants ();
8885
8886 switch (idx)
8887 {
8888 case 3:
8889 case 4:
8890 case 5:
8891 case 6:
8892 case 7:
8893 i = idx - 3;
8894 break;
8895
8896 default:
8897 gcc_unreachable ();
8898 }
8899
8900 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8901 XFmode);
8902 }
8903
8904 /* Return 1 if X is all 0s and 2 if x is all 1s
8905 in supported SSE/AVX vector mode. */
8906
8907 int
8908 standard_sse_constant_p (rtx x)
8909 {
8910 enum machine_mode mode = GET_MODE (x);
8911
8912 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8913 return 1;
8914 if (vector_all_ones_operand (x, mode))
8915 switch (mode)
8916 {
8917 case V16QImode:
8918 case V8HImode:
8919 case V4SImode:
8920 case V2DImode:
8921 if (TARGET_SSE2)
8922 return 2;
8923 case V32QImode:
8924 case V16HImode:
8925 case V8SImode:
8926 case V4DImode:
8927 if (TARGET_AVX2)
8928 return 2;
8929 case V64QImode:
8930 case V32HImode:
8931 case V16SImode:
8932 case V8DImode:
8933 if (TARGET_AVX512F)
8934 return 2;
8935 default:
8936 break;
8937 }
8938
8939 return 0;
8940 }
8941
8942 /* Return the opcode of the special instruction to be used to load
8943 the constant X. */
8944
8945 const char *
8946 standard_sse_constant_opcode (rtx insn, rtx x)
8947 {
8948 switch (standard_sse_constant_p (x))
8949 {
8950 case 1:
8951 switch (get_attr_mode (insn))
8952 {
8953 case MODE_XI:
8954 case MODE_V16SF:
8955 return "vpxord\t%g0, %g0, %g0";
8956 case MODE_V8DF:
8957 return "vpxorq\t%g0, %g0, %g0";
8958 case MODE_TI:
8959 return "%vpxor\t%0, %d0";
8960 case MODE_V2DF:
8961 return "%vxorpd\t%0, %d0";
8962 case MODE_V4SF:
8963 return "%vxorps\t%0, %d0";
8964
8965 case MODE_OI:
8966 return "vpxor\t%x0, %x0, %x0";
8967 case MODE_V4DF:
8968 return "vxorpd\t%x0, %x0, %x0";
8969 case MODE_V8SF:
8970 return "vxorps\t%x0, %x0, %x0";
8971
8972 default:
8973 break;
8974 }
8975
8976 case 2:
8977 if (get_attr_mode (insn) == MODE_XI
8978 || get_attr_mode (insn) == MODE_V8DF
8979 || get_attr_mode (insn) == MODE_V16SF)
8980 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
8981 if (TARGET_AVX)
8982 return "vpcmpeqd\t%0, %0, %0";
8983 else
8984 return "pcmpeqd\t%0, %0";
8985
8986 default:
8987 break;
8988 }
8989 gcc_unreachable ();
8990 }
8991
8992 /* Returns true if OP contains a symbol reference */
8993
8994 bool
8995 symbolic_reference_mentioned_p (rtx op)
8996 {
8997 const char *fmt;
8998 int i;
8999
9000 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9001 return true;
9002
9003 fmt = GET_RTX_FORMAT (GET_CODE (op));
9004 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9005 {
9006 if (fmt[i] == 'E')
9007 {
9008 int j;
9009
9010 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9011 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9012 return true;
9013 }
9014
9015 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9016 return true;
9017 }
9018
9019 return false;
9020 }
9021
9022 /* Return true if it is appropriate to emit `ret' instructions in the
9023 body of a function. Do this only if the epilogue is simple, needing a
9024 couple of insns. Prior to reloading, we can't tell how many registers
9025 must be saved, so return false then. Return false if there is no frame
9026 marker to de-allocate. */
9027
9028 bool
9029 ix86_can_use_return_insn_p (void)
9030 {
9031 struct ix86_frame frame;
9032
9033 if (! reload_completed || frame_pointer_needed)
9034 return 0;
9035
9036 /* Don't allow more than 32k pop, since that's all we can do
9037 with one instruction. */
9038 if (crtl->args.pops_args && crtl->args.size >= 32768)
9039 return 0;
9040
9041 ix86_compute_frame_layout (&frame);
9042 return (frame.stack_pointer_offset == UNITS_PER_WORD
9043 && (frame.nregs + frame.nsseregs) == 0);
9044 }
9045 \f
9046 /* Value should be nonzero if functions must have frame pointers.
9047 Zero means the frame pointer need not be set up (and parms may
9048 be accessed via the stack pointer) in functions that seem suitable. */
9049
9050 static bool
9051 ix86_frame_pointer_required (void)
9052 {
9053 /* If we accessed previous frames, then the generated code expects
9054 to be able to access the saved ebp value in our frame. */
9055 if (cfun->machine->accesses_prev_frame)
9056 return true;
9057
9058 /* Several x86 os'es need a frame pointer for other reasons,
9059 usually pertaining to setjmp. */
9060 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9061 return true;
9062
9063 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9064 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9065 return true;
9066
9067 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9068 allocation is 4GB. */
9069 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9070 return true;
9071
9072 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9073 turns off the frame pointer by default. Turn it back on now if
9074 we've not got a leaf function. */
9075 if (TARGET_OMIT_LEAF_FRAME_POINTER
9076 && (!crtl->is_leaf
9077 || ix86_current_function_calls_tls_descriptor))
9078 return true;
9079
9080 if (crtl->profile && !flag_fentry)
9081 return true;
9082
9083 return false;
9084 }
9085
9086 /* Record that the current function accesses previous call frames. */
9087
9088 void
9089 ix86_setup_frame_addresses (void)
9090 {
9091 cfun->machine->accesses_prev_frame = 1;
9092 }
9093 \f
9094 #ifndef USE_HIDDEN_LINKONCE
9095 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9096 # define USE_HIDDEN_LINKONCE 1
9097 # else
9098 # define USE_HIDDEN_LINKONCE 0
9099 # endif
9100 #endif
9101
9102 static int pic_labels_used;
9103
9104 /* Fills in the label name that should be used for a pc thunk for
9105 the given register. */
9106
9107 static void
9108 get_pc_thunk_name (char name[32], unsigned int regno)
9109 {
9110 gcc_assert (!TARGET_64BIT);
9111
9112 if (USE_HIDDEN_LINKONCE)
9113 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9114 else
9115 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9116 }
9117
9118
9119 /* This function generates code for -fpic that loads %ebx with
9120 the return address of the caller and then returns. */
9121
9122 static void
9123 ix86_code_end (void)
9124 {
9125 rtx xops[2];
9126 int regno;
9127
9128 for (regno = AX_REG; regno <= SP_REG; regno++)
9129 {
9130 char name[32];
9131 tree decl;
9132
9133 if (!(pic_labels_used & (1 << regno)))
9134 continue;
9135
9136 get_pc_thunk_name (name, regno);
9137
9138 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9139 get_identifier (name),
9140 build_function_type_list (void_type_node, NULL_TREE));
9141 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9142 NULL_TREE, void_type_node);
9143 TREE_PUBLIC (decl) = 1;
9144 TREE_STATIC (decl) = 1;
9145 DECL_IGNORED_P (decl) = 1;
9146
9147 #if TARGET_MACHO
9148 if (TARGET_MACHO)
9149 {
9150 switch_to_section (darwin_sections[text_coal_section]);
9151 fputs ("\t.weak_definition\t", asm_out_file);
9152 assemble_name (asm_out_file, name);
9153 fputs ("\n\t.private_extern\t", asm_out_file);
9154 assemble_name (asm_out_file, name);
9155 putc ('\n', asm_out_file);
9156 ASM_OUTPUT_LABEL (asm_out_file, name);
9157 DECL_WEAK (decl) = 1;
9158 }
9159 else
9160 #endif
9161 if (USE_HIDDEN_LINKONCE)
9162 {
9163 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
9164
9165 targetm.asm_out.unique_section (decl, 0);
9166 switch_to_section (get_named_section (decl, NULL, 0));
9167
9168 targetm.asm_out.globalize_label (asm_out_file, name);
9169 fputs ("\t.hidden\t", asm_out_file);
9170 assemble_name (asm_out_file, name);
9171 putc ('\n', asm_out_file);
9172 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9173 }
9174 else
9175 {
9176 switch_to_section (text_section);
9177 ASM_OUTPUT_LABEL (asm_out_file, name);
9178 }
9179
9180 DECL_INITIAL (decl) = make_node (BLOCK);
9181 current_function_decl = decl;
9182 init_function_start (decl);
9183 first_function_block_is_cold = false;
9184 /* Make sure unwind info is emitted for the thunk if needed. */
9185 final_start_function (emit_barrier (), asm_out_file, 1);
9186
9187 /* Pad stack IP move with 4 instructions (two NOPs count
9188 as one instruction). */
9189 if (TARGET_PAD_SHORT_FUNCTION)
9190 {
9191 int i = 8;
9192
9193 while (i--)
9194 fputs ("\tnop\n", asm_out_file);
9195 }
9196
9197 xops[0] = gen_rtx_REG (Pmode, regno);
9198 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9199 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9200 fputs ("\tret\n", asm_out_file);
9201 final_end_function ();
9202 init_insn_lengths ();
9203 free_after_compilation (cfun);
9204 set_cfun (NULL);
9205 current_function_decl = NULL;
9206 }
9207
9208 if (flag_split_stack)
9209 file_end_indicate_split_stack ();
9210 }
9211
9212 /* Emit code for the SET_GOT patterns. */
9213
9214 const char *
9215 output_set_got (rtx dest, rtx label)
9216 {
9217 rtx xops[3];
9218
9219 xops[0] = dest;
9220
9221 if (TARGET_VXWORKS_RTP && flag_pic)
9222 {
9223 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9224 xops[2] = gen_rtx_MEM (Pmode,
9225 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9226 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9227
9228 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9229 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9230 an unadorned address. */
9231 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9232 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9233 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9234 return "";
9235 }
9236
9237 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9238
9239 if (!flag_pic)
9240 {
9241 if (TARGET_MACHO)
9242 /* We don't need a pic base, we're not producing pic. */
9243 gcc_unreachable ();
9244
9245 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9246 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9247 targetm.asm_out.internal_label (asm_out_file, "L",
9248 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9249 }
9250 else
9251 {
9252 char name[32];
9253 get_pc_thunk_name (name, REGNO (dest));
9254 pic_labels_used |= 1 << REGNO (dest);
9255
9256 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9257 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9258 output_asm_insn ("call\t%X2", xops);
9259
9260 #if TARGET_MACHO
9261 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9262 This is what will be referenced by the Mach-O PIC subsystem. */
9263 if (machopic_should_output_picbase_label () || !label)
9264 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9265
9266 /* When we are restoring the pic base at the site of a nonlocal label,
9267 and we decided to emit the pic base above, we will still output a
9268 local label used for calculating the correction offset (even though
9269 the offset will be 0 in that case). */
9270 if (label)
9271 targetm.asm_out.internal_label (asm_out_file, "L",
9272 CODE_LABEL_NUMBER (label));
9273 #endif
9274 }
9275
9276 if (!TARGET_MACHO)
9277 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9278
9279 return "";
9280 }
9281
9282 /* Generate an "push" pattern for input ARG. */
9283
9284 static rtx
9285 gen_push (rtx arg)
9286 {
9287 struct machine_function *m = cfun->machine;
9288
9289 if (m->fs.cfa_reg == stack_pointer_rtx)
9290 m->fs.cfa_offset += UNITS_PER_WORD;
9291 m->fs.sp_offset += UNITS_PER_WORD;
9292
9293 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9294 arg = gen_rtx_REG (word_mode, REGNO (arg));
9295
9296 return gen_rtx_SET (VOIDmode,
9297 gen_rtx_MEM (word_mode,
9298 gen_rtx_PRE_DEC (Pmode,
9299 stack_pointer_rtx)),
9300 arg);
9301 }
9302
9303 /* Generate an "pop" pattern for input ARG. */
9304
9305 static rtx
9306 gen_pop (rtx arg)
9307 {
9308 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9309 arg = gen_rtx_REG (word_mode, REGNO (arg));
9310
9311 return gen_rtx_SET (VOIDmode,
9312 arg,
9313 gen_rtx_MEM (word_mode,
9314 gen_rtx_POST_INC (Pmode,
9315 stack_pointer_rtx)));
9316 }
9317
9318 /* Return >= 0 if there is an unused call-clobbered register available
9319 for the entire function. */
9320
9321 static unsigned int
9322 ix86_select_alt_pic_regnum (void)
9323 {
9324 if (crtl->is_leaf
9325 && !crtl->profile
9326 && !ix86_current_function_calls_tls_descriptor)
9327 {
9328 int i, drap;
9329 /* Can't use the same register for both PIC and DRAP. */
9330 if (crtl->drap_reg)
9331 drap = REGNO (crtl->drap_reg);
9332 else
9333 drap = -1;
9334 for (i = 2; i >= 0; --i)
9335 if (i != drap && !df_regs_ever_live_p (i))
9336 return i;
9337 }
9338
9339 return INVALID_REGNUM;
9340 }
9341
9342 /* Return TRUE if we need to save REGNO. */
9343
9344 static bool
9345 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9346 {
9347 if (pic_offset_table_rtx
9348 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9349 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9350 || crtl->profile
9351 || crtl->calls_eh_return
9352 || crtl->uses_const_pool
9353 || cfun->has_nonlocal_label))
9354 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9355
9356 if (crtl->calls_eh_return && maybe_eh_return)
9357 {
9358 unsigned i;
9359 for (i = 0; ; i++)
9360 {
9361 unsigned test = EH_RETURN_DATA_REGNO (i);
9362 if (test == INVALID_REGNUM)
9363 break;
9364 if (test == regno)
9365 return true;
9366 }
9367 }
9368
9369 if (crtl->drap_reg
9370 && regno == REGNO (crtl->drap_reg)
9371 && !cfun->machine->no_drap_save_restore)
9372 return true;
9373
9374 return (df_regs_ever_live_p (regno)
9375 && !call_used_regs[regno]
9376 && !fixed_regs[regno]
9377 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9378 }
9379
9380 /* Return number of saved general prupose registers. */
9381
9382 static int
9383 ix86_nsaved_regs (void)
9384 {
9385 int nregs = 0;
9386 int regno;
9387
9388 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9389 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9390 nregs ++;
9391 return nregs;
9392 }
9393
9394 /* Return number of saved SSE registrers. */
9395
9396 static int
9397 ix86_nsaved_sseregs (void)
9398 {
9399 int nregs = 0;
9400 int regno;
9401
9402 if (!TARGET_64BIT_MS_ABI)
9403 return 0;
9404 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9405 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9406 nregs ++;
9407 return nregs;
9408 }
9409
9410 /* Given FROM and TO register numbers, say whether this elimination is
9411 allowed. If stack alignment is needed, we can only replace argument
9412 pointer with hard frame pointer, or replace frame pointer with stack
9413 pointer. Otherwise, frame pointer elimination is automatically
9414 handled and all other eliminations are valid. */
9415
9416 static bool
9417 ix86_can_eliminate (const int from, const int to)
9418 {
9419 if (stack_realign_fp)
9420 return ((from == ARG_POINTER_REGNUM
9421 && to == HARD_FRAME_POINTER_REGNUM)
9422 || (from == FRAME_POINTER_REGNUM
9423 && to == STACK_POINTER_REGNUM));
9424 else
9425 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9426 }
9427
9428 /* Return the offset between two registers, one to be eliminated, and the other
9429 its replacement, at the start of a routine. */
9430
9431 HOST_WIDE_INT
9432 ix86_initial_elimination_offset (int from, int to)
9433 {
9434 struct ix86_frame frame;
9435 ix86_compute_frame_layout (&frame);
9436
9437 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9438 return frame.hard_frame_pointer_offset;
9439 else if (from == FRAME_POINTER_REGNUM
9440 && to == HARD_FRAME_POINTER_REGNUM)
9441 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9442 else
9443 {
9444 gcc_assert (to == STACK_POINTER_REGNUM);
9445
9446 if (from == ARG_POINTER_REGNUM)
9447 return frame.stack_pointer_offset;
9448
9449 gcc_assert (from == FRAME_POINTER_REGNUM);
9450 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9451 }
9452 }
9453
9454 /* In a dynamically-aligned function, we can't know the offset from
9455 stack pointer to frame pointer, so we must ensure that setjmp
9456 eliminates fp against the hard fp (%ebp) rather than trying to
9457 index from %esp up to the top of the frame across a gap that is
9458 of unknown (at compile-time) size. */
9459 static rtx
9460 ix86_builtin_setjmp_frame_value (void)
9461 {
9462 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9463 }
9464
9465 /* When using -fsplit-stack, the allocation routines set a field in
9466 the TCB to the bottom of the stack plus this much space, measured
9467 in bytes. */
9468
9469 #define SPLIT_STACK_AVAILABLE 256
9470
9471 /* Fill structure ix86_frame about frame of currently computed function. */
9472
9473 static void
9474 ix86_compute_frame_layout (struct ix86_frame *frame)
9475 {
9476 unsigned HOST_WIDE_INT stack_alignment_needed;
9477 HOST_WIDE_INT offset;
9478 unsigned HOST_WIDE_INT preferred_alignment;
9479 HOST_WIDE_INT size = get_frame_size ();
9480 HOST_WIDE_INT to_allocate;
9481
9482 frame->nregs = ix86_nsaved_regs ();
9483 frame->nsseregs = ix86_nsaved_sseregs ();
9484
9485 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9486 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9487
9488 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9489 function prologues and leaf. */
9490 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9491 && (!crtl->is_leaf || cfun->calls_alloca != 0
9492 || ix86_current_function_calls_tls_descriptor))
9493 {
9494 preferred_alignment = 16;
9495 stack_alignment_needed = 16;
9496 crtl->preferred_stack_boundary = 128;
9497 crtl->stack_alignment_needed = 128;
9498 }
9499
9500 gcc_assert (!size || stack_alignment_needed);
9501 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9502 gcc_assert (preferred_alignment <= stack_alignment_needed);
9503
9504 /* For SEH we have to limit the amount of code movement into the prologue.
9505 At present we do this via a BLOCKAGE, at which point there's very little
9506 scheduling that can be done, which means that there's very little point
9507 in doing anything except PUSHs. */
9508 if (TARGET_SEH)
9509 cfun->machine->use_fast_prologue_epilogue = false;
9510
9511 /* During reload iteration the amount of registers saved can change.
9512 Recompute the value as needed. Do not recompute when amount of registers
9513 didn't change as reload does multiple calls to the function and does not
9514 expect the decision to change within single iteration. */
9515 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9516 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9517 {
9518 int count = frame->nregs;
9519 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9520
9521 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9522
9523 /* The fast prologue uses move instead of push to save registers. This
9524 is significantly longer, but also executes faster as modern hardware
9525 can execute the moves in parallel, but can't do that for push/pop.
9526
9527 Be careful about choosing what prologue to emit: When function takes
9528 many instructions to execute we may use slow version as well as in
9529 case function is known to be outside hot spot (this is known with
9530 feedback only). Weight the size of function by number of registers
9531 to save as it is cheap to use one or two push instructions but very
9532 slow to use many of them. */
9533 if (count)
9534 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9535 if (node->frequency < NODE_FREQUENCY_NORMAL
9536 || (flag_branch_probabilities
9537 && node->frequency < NODE_FREQUENCY_HOT))
9538 cfun->machine->use_fast_prologue_epilogue = false;
9539 else
9540 cfun->machine->use_fast_prologue_epilogue
9541 = !expensive_function_p (count);
9542 }
9543
9544 frame->save_regs_using_mov
9545 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9546 /* If static stack checking is enabled and done with probes,
9547 the registers need to be saved before allocating the frame. */
9548 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9549
9550 /* Skip return address. */
9551 offset = UNITS_PER_WORD;
9552
9553 /* Skip pushed static chain. */
9554 if (ix86_static_chain_on_stack)
9555 offset += UNITS_PER_WORD;
9556
9557 /* Skip saved base pointer. */
9558 if (frame_pointer_needed)
9559 offset += UNITS_PER_WORD;
9560 frame->hfp_save_offset = offset;
9561
9562 /* The traditional frame pointer location is at the top of the frame. */
9563 frame->hard_frame_pointer_offset = offset;
9564
9565 /* Register save area */
9566 offset += frame->nregs * UNITS_PER_WORD;
9567 frame->reg_save_offset = offset;
9568
9569 /* On SEH target, registers are pushed just before the frame pointer
9570 location. */
9571 if (TARGET_SEH)
9572 frame->hard_frame_pointer_offset = offset;
9573
9574 /* Align and set SSE register save area. */
9575 if (frame->nsseregs)
9576 {
9577 /* The only ABI that has saved SSE registers (Win64) also has a
9578 16-byte aligned default stack, and thus we don't need to be
9579 within the re-aligned local stack frame to save them. */
9580 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9581 offset = (offset + 16 - 1) & -16;
9582 offset += frame->nsseregs * 16;
9583 }
9584 frame->sse_reg_save_offset = offset;
9585
9586 /* The re-aligned stack starts here. Values before this point are not
9587 directly comparable with values below this point. In order to make
9588 sure that no value happens to be the same before and after, force
9589 the alignment computation below to add a non-zero value. */
9590 if (stack_realign_fp)
9591 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9592
9593 /* Va-arg area */
9594 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9595 offset += frame->va_arg_size;
9596
9597 /* Align start of frame for local function. */
9598 if (stack_realign_fp
9599 || offset != frame->sse_reg_save_offset
9600 || size != 0
9601 || !crtl->is_leaf
9602 || cfun->calls_alloca
9603 || ix86_current_function_calls_tls_descriptor)
9604 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9605
9606 /* Frame pointer points here. */
9607 frame->frame_pointer_offset = offset;
9608
9609 offset += size;
9610
9611 /* Add outgoing arguments area. Can be skipped if we eliminated
9612 all the function calls as dead code.
9613 Skipping is however impossible when function calls alloca. Alloca
9614 expander assumes that last crtl->outgoing_args_size
9615 of stack frame are unused. */
9616 if (ACCUMULATE_OUTGOING_ARGS
9617 && (!crtl->is_leaf || cfun->calls_alloca
9618 || ix86_current_function_calls_tls_descriptor))
9619 {
9620 offset += crtl->outgoing_args_size;
9621 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9622 }
9623 else
9624 frame->outgoing_arguments_size = 0;
9625
9626 /* Align stack boundary. Only needed if we're calling another function
9627 or using alloca. */
9628 if (!crtl->is_leaf || cfun->calls_alloca
9629 || ix86_current_function_calls_tls_descriptor)
9630 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9631
9632 /* We've reached end of stack frame. */
9633 frame->stack_pointer_offset = offset;
9634
9635 /* Size prologue needs to allocate. */
9636 to_allocate = offset - frame->sse_reg_save_offset;
9637
9638 if ((!to_allocate && frame->nregs <= 1)
9639 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9640 frame->save_regs_using_mov = false;
9641
9642 if (ix86_using_red_zone ()
9643 && crtl->sp_is_unchanging
9644 && crtl->is_leaf
9645 && !ix86_current_function_calls_tls_descriptor)
9646 {
9647 frame->red_zone_size = to_allocate;
9648 if (frame->save_regs_using_mov)
9649 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9650 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9651 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9652 }
9653 else
9654 frame->red_zone_size = 0;
9655 frame->stack_pointer_offset -= frame->red_zone_size;
9656
9657 /* The SEH frame pointer location is near the bottom of the frame.
9658 This is enforced by the fact that the difference between the
9659 stack pointer and the frame pointer is limited to 240 bytes in
9660 the unwind data structure. */
9661 if (TARGET_SEH)
9662 {
9663 HOST_WIDE_INT diff;
9664
9665 /* If we can leave the frame pointer where it is, do so. Also, returns
9666 the establisher frame for __builtin_frame_address (0). */
9667 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9668 if (diff <= SEH_MAX_FRAME_SIZE
9669 && (diff > 240 || (diff & 15) != 0)
9670 && !crtl->accesses_prior_frames)
9671 {
9672 /* Ideally we'd determine what portion of the local stack frame
9673 (within the constraint of the lowest 240) is most heavily used.
9674 But without that complication, simply bias the frame pointer
9675 by 128 bytes so as to maximize the amount of the local stack
9676 frame that is addressable with 8-bit offsets. */
9677 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9678 }
9679 }
9680 }
9681
9682 /* This is semi-inlined memory_address_length, but simplified
9683 since we know that we're always dealing with reg+offset, and
9684 to avoid having to create and discard all that rtl. */
9685
9686 static inline int
9687 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9688 {
9689 int len = 4;
9690
9691 if (offset == 0)
9692 {
9693 /* EBP and R13 cannot be encoded without an offset. */
9694 len = (regno == BP_REG || regno == R13_REG);
9695 }
9696 else if (IN_RANGE (offset, -128, 127))
9697 len = 1;
9698
9699 /* ESP and R12 must be encoded with a SIB byte. */
9700 if (regno == SP_REG || regno == R12_REG)
9701 len++;
9702
9703 return len;
9704 }
9705
9706 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9707 The valid base registers are taken from CFUN->MACHINE->FS. */
9708
9709 static rtx
9710 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9711 {
9712 const struct machine_function *m = cfun->machine;
9713 rtx base_reg = NULL;
9714 HOST_WIDE_INT base_offset = 0;
9715
9716 if (m->use_fast_prologue_epilogue)
9717 {
9718 /* Choose the base register most likely to allow the most scheduling
9719 opportunities. Generally FP is valid throughout the function,
9720 while DRAP must be reloaded within the epilogue. But choose either
9721 over the SP due to increased encoding size. */
9722
9723 if (m->fs.fp_valid)
9724 {
9725 base_reg = hard_frame_pointer_rtx;
9726 base_offset = m->fs.fp_offset - cfa_offset;
9727 }
9728 else if (m->fs.drap_valid)
9729 {
9730 base_reg = crtl->drap_reg;
9731 base_offset = 0 - cfa_offset;
9732 }
9733 else if (m->fs.sp_valid)
9734 {
9735 base_reg = stack_pointer_rtx;
9736 base_offset = m->fs.sp_offset - cfa_offset;
9737 }
9738 }
9739 else
9740 {
9741 HOST_WIDE_INT toffset;
9742 int len = 16, tlen;
9743
9744 /* Choose the base register with the smallest address encoding.
9745 With a tie, choose FP > DRAP > SP. */
9746 if (m->fs.sp_valid)
9747 {
9748 base_reg = stack_pointer_rtx;
9749 base_offset = m->fs.sp_offset - cfa_offset;
9750 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9751 }
9752 if (m->fs.drap_valid)
9753 {
9754 toffset = 0 - cfa_offset;
9755 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9756 if (tlen <= len)
9757 {
9758 base_reg = crtl->drap_reg;
9759 base_offset = toffset;
9760 len = tlen;
9761 }
9762 }
9763 if (m->fs.fp_valid)
9764 {
9765 toffset = m->fs.fp_offset - cfa_offset;
9766 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9767 if (tlen <= len)
9768 {
9769 base_reg = hard_frame_pointer_rtx;
9770 base_offset = toffset;
9771 len = tlen;
9772 }
9773 }
9774 }
9775 gcc_assert (base_reg != NULL);
9776
9777 return plus_constant (Pmode, base_reg, base_offset);
9778 }
9779
9780 /* Emit code to save registers in the prologue. */
9781
9782 static void
9783 ix86_emit_save_regs (void)
9784 {
9785 unsigned int regno;
9786 rtx insn;
9787
9788 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9789 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9790 {
9791 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9792 RTX_FRAME_RELATED_P (insn) = 1;
9793 }
9794 }
9795
9796 /* Emit a single register save at CFA - CFA_OFFSET. */
9797
9798 static void
9799 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9800 HOST_WIDE_INT cfa_offset)
9801 {
9802 struct machine_function *m = cfun->machine;
9803 rtx reg = gen_rtx_REG (mode, regno);
9804 rtx mem, addr, base, insn;
9805
9806 addr = choose_baseaddr (cfa_offset);
9807 mem = gen_frame_mem (mode, addr);
9808
9809 /* For SSE saves, we need to indicate the 128-bit alignment. */
9810 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9811
9812 insn = emit_move_insn (mem, reg);
9813 RTX_FRAME_RELATED_P (insn) = 1;
9814
9815 base = addr;
9816 if (GET_CODE (base) == PLUS)
9817 base = XEXP (base, 0);
9818 gcc_checking_assert (REG_P (base));
9819
9820 /* When saving registers into a re-aligned local stack frame, avoid
9821 any tricky guessing by dwarf2out. */
9822 if (m->fs.realigned)
9823 {
9824 gcc_checking_assert (stack_realign_drap);
9825
9826 if (regno == REGNO (crtl->drap_reg))
9827 {
9828 /* A bit of a hack. We force the DRAP register to be saved in
9829 the re-aligned stack frame, which provides us with a copy
9830 of the CFA that will last past the prologue. Install it. */
9831 gcc_checking_assert (cfun->machine->fs.fp_valid);
9832 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9833 cfun->machine->fs.fp_offset - cfa_offset);
9834 mem = gen_rtx_MEM (mode, addr);
9835 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9836 }
9837 else
9838 {
9839 /* The frame pointer is a stable reference within the
9840 aligned frame. Use it. */
9841 gcc_checking_assert (cfun->machine->fs.fp_valid);
9842 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9843 cfun->machine->fs.fp_offset - cfa_offset);
9844 mem = gen_rtx_MEM (mode, addr);
9845 add_reg_note (insn, REG_CFA_EXPRESSION,
9846 gen_rtx_SET (VOIDmode, mem, reg));
9847 }
9848 }
9849
9850 /* The memory may not be relative to the current CFA register,
9851 which means that we may need to generate a new pattern for
9852 use by the unwind info. */
9853 else if (base != m->fs.cfa_reg)
9854 {
9855 addr = plus_constant (Pmode, m->fs.cfa_reg,
9856 m->fs.cfa_offset - cfa_offset);
9857 mem = gen_rtx_MEM (mode, addr);
9858 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9859 }
9860 }
9861
9862 /* Emit code to save registers using MOV insns.
9863 First register is stored at CFA - CFA_OFFSET. */
9864 static void
9865 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9866 {
9867 unsigned int regno;
9868
9869 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9870 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9871 {
9872 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9873 cfa_offset -= UNITS_PER_WORD;
9874 }
9875 }
9876
9877 /* Emit code to save SSE registers using MOV insns.
9878 First register is stored at CFA - CFA_OFFSET. */
9879 static void
9880 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9881 {
9882 unsigned int regno;
9883
9884 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9885 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9886 {
9887 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9888 cfa_offset -= 16;
9889 }
9890 }
9891
9892 static GTY(()) rtx queued_cfa_restores;
9893
9894 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9895 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9896 Don't add the note if the previously saved value will be left untouched
9897 within stack red-zone till return, as unwinders can find the same value
9898 in the register and on the stack. */
9899
9900 static void
9901 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9902 {
9903 if (!crtl->shrink_wrapped
9904 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9905 return;
9906
9907 if (insn)
9908 {
9909 add_reg_note (insn, REG_CFA_RESTORE, reg);
9910 RTX_FRAME_RELATED_P (insn) = 1;
9911 }
9912 else
9913 queued_cfa_restores
9914 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9915 }
9916
9917 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9918
9919 static void
9920 ix86_add_queued_cfa_restore_notes (rtx insn)
9921 {
9922 rtx last;
9923 if (!queued_cfa_restores)
9924 return;
9925 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9926 ;
9927 XEXP (last, 1) = REG_NOTES (insn);
9928 REG_NOTES (insn) = queued_cfa_restores;
9929 queued_cfa_restores = NULL_RTX;
9930 RTX_FRAME_RELATED_P (insn) = 1;
9931 }
9932
9933 /* Expand prologue or epilogue stack adjustment.
9934 The pattern exist to put a dependency on all ebp-based memory accesses.
9935 STYLE should be negative if instructions should be marked as frame related,
9936 zero if %r11 register is live and cannot be freely used and positive
9937 otherwise. */
9938
9939 static void
9940 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9941 int style, bool set_cfa)
9942 {
9943 struct machine_function *m = cfun->machine;
9944 rtx insn;
9945 bool add_frame_related_expr = false;
9946
9947 if (Pmode == SImode)
9948 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9949 else if (x86_64_immediate_operand (offset, DImode))
9950 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9951 else
9952 {
9953 rtx tmp;
9954 /* r11 is used by indirect sibcall return as well, set before the
9955 epilogue and used after the epilogue. */
9956 if (style)
9957 tmp = gen_rtx_REG (DImode, R11_REG);
9958 else
9959 {
9960 gcc_assert (src != hard_frame_pointer_rtx
9961 && dest != hard_frame_pointer_rtx);
9962 tmp = hard_frame_pointer_rtx;
9963 }
9964 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9965 if (style < 0)
9966 add_frame_related_expr = true;
9967
9968 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9969 }
9970
9971 insn = emit_insn (insn);
9972 if (style >= 0)
9973 ix86_add_queued_cfa_restore_notes (insn);
9974
9975 if (set_cfa)
9976 {
9977 rtx r;
9978
9979 gcc_assert (m->fs.cfa_reg == src);
9980 m->fs.cfa_offset += INTVAL (offset);
9981 m->fs.cfa_reg = dest;
9982
9983 r = gen_rtx_PLUS (Pmode, src, offset);
9984 r = gen_rtx_SET (VOIDmode, dest, r);
9985 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9986 RTX_FRAME_RELATED_P (insn) = 1;
9987 }
9988 else if (style < 0)
9989 {
9990 RTX_FRAME_RELATED_P (insn) = 1;
9991 if (add_frame_related_expr)
9992 {
9993 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9994 r = gen_rtx_SET (VOIDmode, dest, r);
9995 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9996 }
9997 }
9998
9999 if (dest == stack_pointer_rtx)
10000 {
10001 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10002 bool valid = m->fs.sp_valid;
10003
10004 if (src == hard_frame_pointer_rtx)
10005 {
10006 valid = m->fs.fp_valid;
10007 ooffset = m->fs.fp_offset;
10008 }
10009 else if (src == crtl->drap_reg)
10010 {
10011 valid = m->fs.drap_valid;
10012 ooffset = 0;
10013 }
10014 else
10015 {
10016 /* Else there are two possibilities: SP itself, which we set
10017 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10018 taken care of this by hand along the eh_return path. */
10019 gcc_checking_assert (src == stack_pointer_rtx
10020 || offset == const0_rtx);
10021 }
10022
10023 m->fs.sp_offset = ooffset - INTVAL (offset);
10024 m->fs.sp_valid = valid;
10025 }
10026 }
10027
10028 /* Find an available register to be used as dynamic realign argument
10029 pointer regsiter. Such a register will be written in prologue and
10030 used in begin of body, so it must not be
10031 1. parameter passing register.
10032 2. GOT pointer.
10033 We reuse static-chain register if it is available. Otherwise, we
10034 use DI for i386 and R13 for x86-64. We chose R13 since it has
10035 shorter encoding.
10036
10037 Return: the regno of chosen register. */
10038
10039 static unsigned int
10040 find_drap_reg (void)
10041 {
10042 tree decl = cfun->decl;
10043
10044 if (TARGET_64BIT)
10045 {
10046 /* Use R13 for nested function or function need static chain.
10047 Since function with tail call may use any caller-saved
10048 registers in epilogue, DRAP must not use caller-saved
10049 register in such case. */
10050 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10051 return R13_REG;
10052
10053 return R10_REG;
10054 }
10055 else
10056 {
10057 /* Use DI for nested function or function need static chain.
10058 Since function with tail call may use any caller-saved
10059 registers in epilogue, DRAP must not use caller-saved
10060 register in such case. */
10061 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10062 return DI_REG;
10063
10064 /* Reuse static chain register if it isn't used for parameter
10065 passing. */
10066 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10067 {
10068 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10069 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10070 return CX_REG;
10071 }
10072 return DI_REG;
10073 }
10074 }
10075
10076 /* Return minimum incoming stack alignment. */
10077
10078 static unsigned int
10079 ix86_minimum_incoming_stack_boundary (bool sibcall)
10080 {
10081 unsigned int incoming_stack_boundary;
10082
10083 /* Prefer the one specified at command line. */
10084 if (ix86_user_incoming_stack_boundary)
10085 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10086 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10087 if -mstackrealign is used, it isn't used for sibcall check and
10088 estimated stack alignment is 128bit. */
10089 else if (!sibcall
10090 && !TARGET_64BIT
10091 && ix86_force_align_arg_pointer
10092 && crtl->stack_alignment_estimated == 128)
10093 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10094 else
10095 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10096
10097 /* Incoming stack alignment can be changed on individual functions
10098 via force_align_arg_pointer attribute. We use the smallest
10099 incoming stack boundary. */
10100 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10101 && lookup_attribute (ix86_force_align_arg_pointer_string,
10102 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10103 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10104
10105 /* The incoming stack frame has to be aligned at least at
10106 parm_stack_boundary. */
10107 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10108 incoming_stack_boundary = crtl->parm_stack_boundary;
10109
10110 /* Stack at entrance of main is aligned by runtime. We use the
10111 smallest incoming stack boundary. */
10112 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10113 && DECL_NAME (current_function_decl)
10114 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10115 && DECL_FILE_SCOPE_P (current_function_decl))
10116 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10117
10118 return incoming_stack_boundary;
10119 }
10120
10121 /* Update incoming stack boundary and estimated stack alignment. */
10122
10123 static void
10124 ix86_update_stack_boundary (void)
10125 {
10126 ix86_incoming_stack_boundary
10127 = ix86_minimum_incoming_stack_boundary (false);
10128
10129 /* x86_64 vararg needs 16byte stack alignment for register save
10130 area. */
10131 if (TARGET_64BIT
10132 && cfun->stdarg
10133 && crtl->stack_alignment_estimated < 128)
10134 crtl->stack_alignment_estimated = 128;
10135 }
10136
10137 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10138 needed or an rtx for DRAP otherwise. */
10139
10140 static rtx
10141 ix86_get_drap_rtx (void)
10142 {
10143 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10144 crtl->need_drap = true;
10145
10146 if (stack_realign_drap)
10147 {
10148 /* Assign DRAP to vDRAP and returns vDRAP */
10149 unsigned int regno = find_drap_reg ();
10150 rtx drap_vreg;
10151 rtx arg_ptr;
10152 rtx seq, insn;
10153
10154 arg_ptr = gen_rtx_REG (Pmode, regno);
10155 crtl->drap_reg = arg_ptr;
10156
10157 start_sequence ();
10158 drap_vreg = copy_to_reg (arg_ptr);
10159 seq = get_insns ();
10160 end_sequence ();
10161
10162 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10163 if (!optimize)
10164 {
10165 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10166 RTX_FRAME_RELATED_P (insn) = 1;
10167 }
10168 return drap_vreg;
10169 }
10170 else
10171 return NULL;
10172 }
10173
10174 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10175
10176 static rtx
10177 ix86_internal_arg_pointer (void)
10178 {
10179 return virtual_incoming_args_rtx;
10180 }
10181
10182 struct scratch_reg {
10183 rtx reg;
10184 bool saved;
10185 };
10186
10187 /* Return a short-lived scratch register for use on function entry.
10188 In 32-bit mode, it is valid only after the registers are saved
10189 in the prologue. This register must be released by means of
10190 release_scratch_register_on_entry once it is dead. */
10191
10192 static void
10193 get_scratch_register_on_entry (struct scratch_reg *sr)
10194 {
10195 int regno;
10196
10197 sr->saved = false;
10198
10199 if (TARGET_64BIT)
10200 {
10201 /* We always use R11 in 64-bit mode. */
10202 regno = R11_REG;
10203 }
10204 else
10205 {
10206 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10207 bool fastcall_p
10208 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10209 bool thiscall_p
10210 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10211 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10212 int regparm = ix86_function_regparm (fntype, decl);
10213 int drap_regno
10214 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10215
10216 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10217 for the static chain register. */
10218 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10219 && drap_regno != AX_REG)
10220 regno = AX_REG;
10221 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10222 for the static chain register. */
10223 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10224 regno = AX_REG;
10225 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10226 regno = DX_REG;
10227 /* ecx is the static chain register. */
10228 else if (regparm < 3 && !fastcall_p && !thiscall_p
10229 && !static_chain_p
10230 && drap_regno != CX_REG)
10231 regno = CX_REG;
10232 else if (ix86_save_reg (BX_REG, true))
10233 regno = BX_REG;
10234 /* esi is the static chain register. */
10235 else if (!(regparm == 3 && static_chain_p)
10236 && ix86_save_reg (SI_REG, true))
10237 regno = SI_REG;
10238 else if (ix86_save_reg (DI_REG, true))
10239 regno = DI_REG;
10240 else
10241 {
10242 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10243 sr->saved = true;
10244 }
10245 }
10246
10247 sr->reg = gen_rtx_REG (Pmode, regno);
10248 if (sr->saved)
10249 {
10250 rtx insn = emit_insn (gen_push (sr->reg));
10251 RTX_FRAME_RELATED_P (insn) = 1;
10252 }
10253 }
10254
10255 /* Release a scratch register obtained from the preceding function. */
10256
10257 static void
10258 release_scratch_register_on_entry (struct scratch_reg *sr)
10259 {
10260 if (sr->saved)
10261 {
10262 struct machine_function *m = cfun->machine;
10263 rtx x, insn = emit_insn (gen_pop (sr->reg));
10264
10265 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10266 RTX_FRAME_RELATED_P (insn) = 1;
10267 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10268 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10269 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10270 m->fs.sp_offset -= UNITS_PER_WORD;
10271 }
10272 }
10273
10274 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10275
10276 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10277
10278 static void
10279 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10280 {
10281 /* We skip the probe for the first interval + a small dope of 4 words and
10282 probe that many bytes past the specified size to maintain a protection
10283 area at the botton of the stack. */
10284 const int dope = 4 * UNITS_PER_WORD;
10285 rtx size_rtx = GEN_INT (size), last;
10286
10287 /* See if we have a constant small number of probes to generate. If so,
10288 that's the easy case. The run-time loop is made up of 11 insns in the
10289 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10290 for n # of intervals. */
10291 if (size <= 5 * PROBE_INTERVAL)
10292 {
10293 HOST_WIDE_INT i, adjust;
10294 bool first_probe = true;
10295
10296 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10297 values of N from 1 until it exceeds SIZE. If only one probe is
10298 needed, this will not generate any code. Then adjust and probe
10299 to PROBE_INTERVAL + SIZE. */
10300 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10301 {
10302 if (first_probe)
10303 {
10304 adjust = 2 * PROBE_INTERVAL + dope;
10305 first_probe = false;
10306 }
10307 else
10308 adjust = PROBE_INTERVAL;
10309
10310 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10311 plus_constant (Pmode, stack_pointer_rtx,
10312 -adjust)));
10313 emit_stack_probe (stack_pointer_rtx);
10314 }
10315
10316 if (first_probe)
10317 adjust = size + PROBE_INTERVAL + dope;
10318 else
10319 adjust = size + PROBE_INTERVAL - i;
10320
10321 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10322 plus_constant (Pmode, stack_pointer_rtx,
10323 -adjust)));
10324 emit_stack_probe (stack_pointer_rtx);
10325
10326 /* Adjust back to account for the additional first interval. */
10327 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10328 plus_constant (Pmode, stack_pointer_rtx,
10329 PROBE_INTERVAL + dope)));
10330 }
10331
10332 /* Otherwise, do the same as above, but in a loop. Note that we must be
10333 extra careful with variables wrapping around because we might be at
10334 the very top (or the very bottom) of the address space and we have
10335 to be able to handle this case properly; in particular, we use an
10336 equality test for the loop condition. */
10337 else
10338 {
10339 HOST_WIDE_INT rounded_size;
10340 struct scratch_reg sr;
10341
10342 get_scratch_register_on_entry (&sr);
10343
10344
10345 /* Step 1: round SIZE to the previous multiple of the interval. */
10346
10347 rounded_size = size & -PROBE_INTERVAL;
10348
10349
10350 /* Step 2: compute initial and final value of the loop counter. */
10351
10352 /* SP = SP_0 + PROBE_INTERVAL. */
10353 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10354 plus_constant (Pmode, stack_pointer_rtx,
10355 - (PROBE_INTERVAL + dope))));
10356
10357 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10358 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10359 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10360 gen_rtx_PLUS (Pmode, sr.reg,
10361 stack_pointer_rtx)));
10362
10363
10364 /* Step 3: the loop
10365
10366 while (SP != LAST_ADDR)
10367 {
10368 SP = SP + PROBE_INTERVAL
10369 probe at SP
10370 }
10371
10372 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10373 values of N from 1 until it is equal to ROUNDED_SIZE. */
10374
10375 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10376
10377
10378 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10379 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10380
10381 if (size != rounded_size)
10382 {
10383 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10384 plus_constant (Pmode, stack_pointer_rtx,
10385 rounded_size - size)));
10386 emit_stack_probe (stack_pointer_rtx);
10387 }
10388
10389 /* Adjust back to account for the additional first interval. */
10390 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10391 plus_constant (Pmode, stack_pointer_rtx,
10392 PROBE_INTERVAL + dope)));
10393
10394 release_scratch_register_on_entry (&sr);
10395 }
10396
10397 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10398
10399 /* Even if the stack pointer isn't the CFA register, we need to correctly
10400 describe the adjustments made to it, in particular differentiate the
10401 frame-related ones from the frame-unrelated ones. */
10402 if (size > 0)
10403 {
10404 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10405 XVECEXP (expr, 0, 0)
10406 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10407 plus_constant (Pmode, stack_pointer_rtx, -size));
10408 XVECEXP (expr, 0, 1)
10409 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10410 plus_constant (Pmode, stack_pointer_rtx,
10411 PROBE_INTERVAL + dope + size));
10412 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10413 RTX_FRAME_RELATED_P (last) = 1;
10414
10415 cfun->machine->fs.sp_offset += size;
10416 }
10417
10418 /* Make sure nothing is scheduled before we are done. */
10419 emit_insn (gen_blockage ());
10420 }
10421
10422 /* Adjust the stack pointer up to REG while probing it. */
10423
10424 const char *
10425 output_adjust_stack_and_probe (rtx reg)
10426 {
10427 static int labelno = 0;
10428 char loop_lab[32], end_lab[32];
10429 rtx xops[2];
10430
10431 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10432 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10433
10434 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10435
10436 /* Jump to END_LAB if SP == LAST_ADDR. */
10437 xops[0] = stack_pointer_rtx;
10438 xops[1] = reg;
10439 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10440 fputs ("\tje\t", asm_out_file);
10441 assemble_name_raw (asm_out_file, end_lab);
10442 fputc ('\n', asm_out_file);
10443
10444 /* SP = SP + PROBE_INTERVAL. */
10445 xops[1] = GEN_INT (PROBE_INTERVAL);
10446 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10447
10448 /* Probe at SP. */
10449 xops[1] = const0_rtx;
10450 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10451
10452 fprintf (asm_out_file, "\tjmp\t");
10453 assemble_name_raw (asm_out_file, loop_lab);
10454 fputc ('\n', asm_out_file);
10455
10456 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10457
10458 return "";
10459 }
10460
10461 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10462 inclusive. These are offsets from the current stack pointer. */
10463
10464 static void
10465 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10466 {
10467 /* See if we have a constant small number of probes to generate. If so,
10468 that's the easy case. The run-time loop is made up of 7 insns in the
10469 generic case while the compile-time loop is made up of n insns for n #
10470 of intervals. */
10471 if (size <= 7 * PROBE_INTERVAL)
10472 {
10473 HOST_WIDE_INT i;
10474
10475 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10476 it exceeds SIZE. If only one probe is needed, this will not
10477 generate any code. Then probe at FIRST + SIZE. */
10478 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10479 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10480 -(first + i)));
10481
10482 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10483 -(first + size)));
10484 }
10485
10486 /* Otherwise, do the same as above, but in a loop. Note that we must be
10487 extra careful with variables wrapping around because we might be at
10488 the very top (or the very bottom) of the address space and we have
10489 to be able to handle this case properly; in particular, we use an
10490 equality test for the loop condition. */
10491 else
10492 {
10493 HOST_WIDE_INT rounded_size, last;
10494 struct scratch_reg sr;
10495
10496 get_scratch_register_on_entry (&sr);
10497
10498
10499 /* Step 1: round SIZE to the previous multiple of the interval. */
10500
10501 rounded_size = size & -PROBE_INTERVAL;
10502
10503
10504 /* Step 2: compute initial and final value of the loop counter. */
10505
10506 /* TEST_OFFSET = FIRST. */
10507 emit_move_insn (sr.reg, GEN_INT (-first));
10508
10509 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10510 last = first + rounded_size;
10511
10512
10513 /* Step 3: the loop
10514
10515 while (TEST_ADDR != LAST_ADDR)
10516 {
10517 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10518 probe at TEST_ADDR
10519 }
10520
10521 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10522 until it is equal to ROUNDED_SIZE. */
10523
10524 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10525
10526
10527 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10528 that SIZE is equal to ROUNDED_SIZE. */
10529
10530 if (size != rounded_size)
10531 emit_stack_probe (plus_constant (Pmode,
10532 gen_rtx_PLUS (Pmode,
10533 stack_pointer_rtx,
10534 sr.reg),
10535 rounded_size - size));
10536
10537 release_scratch_register_on_entry (&sr);
10538 }
10539
10540 /* Make sure nothing is scheduled before we are done. */
10541 emit_insn (gen_blockage ());
10542 }
10543
10544 /* Probe a range of stack addresses from REG to END, inclusive. These are
10545 offsets from the current stack pointer. */
10546
10547 const char *
10548 output_probe_stack_range (rtx reg, rtx end)
10549 {
10550 static int labelno = 0;
10551 char loop_lab[32], end_lab[32];
10552 rtx xops[3];
10553
10554 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10555 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10556
10557 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10558
10559 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10560 xops[0] = reg;
10561 xops[1] = end;
10562 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10563 fputs ("\tje\t", asm_out_file);
10564 assemble_name_raw (asm_out_file, end_lab);
10565 fputc ('\n', asm_out_file);
10566
10567 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10568 xops[1] = GEN_INT (PROBE_INTERVAL);
10569 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10570
10571 /* Probe at TEST_ADDR. */
10572 xops[0] = stack_pointer_rtx;
10573 xops[1] = reg;
10574 xops[2] = const0_rtx;
10575 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10576
10577 fprintf (asm_out_file, "\tjmp\t");
10578 assemble_name_raw (asm_out_file, loop_lab);
10579 fputc ('\n', asm_out_file);
10580
10581 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10582
10583 return "";
10584 }
10585
10586 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10587 to be generated in correct form. */
10588 static void
10589 ix86_finalize_stack_realign_flags (void)
10590 {
10591 /* Check if stack realign is really needed after reload, and
10592 stores result in cfun */
10593 unsigned int incoming_stack_boundary
10594 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10595 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10596 unsigned int stack_realign = (incoming_stack_boundary
10597 < (crtl->is_leaf
10598 ? crtl->max_used_stack_slot_alignment
10599 : crtl->stack_alignment_needed));
10600
10601 if (crtl->stack_realign_finalized)
10602 {
10603 /* After stack_realign_needed is finalized, we can't no longer
10604 change it. */
10605 gcc_assert (crtl->stack_realign_needed == stack_realign);
10606 return;
10607 }
10608
10609 /* If the only reason for frame_pointer_needed is that we conservatively
10610 assumed stack realignment might be needed, but in the end nothing that
10611 needed the stack alignment had been spilled, clear frame_pointer_needed
10612 and say we don't need stack realignment. */
10613 if (stack_realign
10614 && frame_pointer_needed
10615 && crtl->is_leaf
10616 && flag_omit_frame_pointer
10617 && crtl->sp_is_unchanging
10618 && !ix86_current_function_calls_tls_descriptor
10619 && !crtl->accesses_prior_frames
10620 && !cfun->calls_alloca
10621 && !crtl->calls_eh_return
10622 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10623 && !ix86_frame_pointer_required ()
10624 && get_frame_size () == 0
10625 && ix86_nsaved_sseregs () == 0
10626 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10627 {
10628 HARD_REG_SET set_up_by_prologue, prologue_used;
10629 basic_block bb;
10630
10631 CLEAR_HARD_REG_SET (prologue_used);
10632 CLEAR_HARD_REG_SET (set_up_by_prologue);
10633 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10634 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10635 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10636 HARD_FRAME_POINTER_REGNUM);
10637 FOR_EACH_BB_FN (bb, cfun)
10638 {
10639 rtx insn;
10640 FOR_BB_INSNS (bb, insn)
10641 if (NONDEBUG_INSN_P (insn)
10642 && requires_stack_frame_p (insn, prologue_used,
10643 set_up_by_prologue))
10644 {
10645 crtl->stack_realign_needed = stack_realign;
10646 crtl->stack_realign_finalized = true;
10647 return;
10648 }
10649 }
10650
10651 /* If drap has been set, but it actually isn't live at the start
10652 of the function, there is no reason to set it up. */
10653 if (crtl->drap_reg)
10654 {
10655 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10656 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10657 {
10658 crtl->drap_reg = NULL_RTX;
10659 crtl->need_drap = false;
10660 }
10661 }
10662 else
10663 cfun->machine->no_drap_save_restore = true;
10664
10665 frame_pointer_needed = false;
10666 stack_realign = false;
10667 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10668 crtl->stack_alignment_needed = incoming_stack_boundary;
10669 crtl->stack_alignment_estimated = incoming_stack_boundary;
10670 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10671 crtl->preferred_stack_boundary = incoming_stack_boundary;
10672 df_finish_pass (true);
10673 df_scan_alloc (NULL);
10674 df_scan_blocks ();
10675 df_compute_regs_ever_live (true);
10676 df_analyze ();
10677 }
10678
10679 crtl->stack_realign_needed = stack_realign;
10680 crtl->stack_realign_finalized = true;
10681 }
10682
10683 /* Expand the prologue into a bunch of separate insns. */
10684
10685 void
10686 ix86_expand_prologue (void)
10687 {
10688 struct machine_function *m = cfun->machine;
10689 rtx insn, t;
10690 bool pic_reg_used;
10691 struct ix86_frame frame;
10692 HOST_WIDE_INT allocate;
10693 bool int_registers_saved;
10694 bool sse_registers_saved;
10695
10696 ix86_finalize_stack_realign_flags ();
10697
10698 /* DRAP should not coexist with stack_realign_fp */
10699 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10700
10701 memset (&m->fs, 0, sizeof (m->fs));
10702
10703 /* Initialize CFA state for before the prologue. */
10704 m->fs.cfa_reg = stack_pointer_rtx;
10705 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10706
10707 /* Track SP offset to the CFA. We continue tracking this after we've
10708 swapped the CFA register away from SP. In the case of re-alignment
10709 this is fudged; we're interested to offsets within the local frame. */
10710 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10711 m->fs.sp_valid = true;
10712
10713 ix86_compute_frame_layout (&frame);
10714
10715 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10716 {
10717 /* We should have already generated an error for any use of
10718 ms_hook on a nested function. */
10719 gcc_checking_assert (!ix86_static_chain_on_stack);
10720
10721 /* Check if profiling is active and we shall use profiling before
10722 prologue variant. If so sorry. */
10723 if (crtl->profile && flag_fentry != 0)
10724 sorry ("ms_hook_prologue attribute isn%'t compatible "
10725 "with -mfentry for 32-bit");
10726
10727 /* In ix86_asm_output_function_label we emitted:
10728 8b ff movl.s %edi,%edi
10729 55 push %ebp
10730 8b ec movl.s %esp,%ebp
10731
10732 This matches the hookable function prologue in Win32 API
10733 functions in Microsoft Windows XP Service Pack 2 and newer.
10734 Wine uses this to enable Windows apps to hook the Win32 API
10735 functions provided by Wine.
10736
10737 What that means is that we've already set up the frame pointer. */
10738
10739 if (frame_pointer_needed
10740 && !(crtl->drap_reg && crtl->stack_realign_needed))
10741 {
10742 rtx push, mov;
10743
10744 /* We've decided to use the frame pointer already set up.
10745 Describe this to the unwinder by pretending that both
10746 push and mov insns happen right here.
10747
10748 Putting the unwind info here at the end of the ms_hook
10749 is done so that we can make absolutely certain we get
10750 the required byte sequence at the start of the function,
10751 rather than relying on an assembler that can produce
10752 the exact encoding required.
10753
10754 However it does mean (in the unpatched case) that we have
10755 a 1 insn window where the asynchronous unwind info is
10756 incorrect. However, if we placed the unwind info at
10757 its correct location we would have incorrect unwind info
10758 in the patched case. Which is probably all moot since
10759 I don't expect Wine generates dwarf2 unwind info for the
10760 system libraries that use this feature. */
10761
10762 insn = emit_insn (gen_blockage ());
10763
10764 push = gen_push (hard_frame_pointer_rtx);
10765 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10766 stack_pointer_rtx);
10767 RTX_FRAME_RELATED_P (push) = 1;
10768 RTX_FRAME_RELATED_P (mov) = 1;
10769
10770 RTX_FRAME_RELATED_P (insn) = 1;
10771 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10772 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10773
10774 /* Note that gen_push incremented m->fs.cfa_offset, even
10775 though we didn't emit the push insn here. */
10776 m->fs.cfa_reg = hard_frame_pointer_rtx;
10777 m->fs.fp_offset = m->fs.cfa_offset;
10778 m->fs.fp_valid = true;
10779 }
10780 else
10781 {
10782 /* The frame pointer is not needed so pop %ebp again.
10783 This leaves us with a pristine state. */
10784 emit_insn (gen_pop (hard_frame_pointer_rtx));
10785 }
10786 }
10787
10788 /* The first insn of a function that accepts its static chain on the
10789 stack is to push the register that would be filled in by a direct
10790 call. This insn will be skipped by the trampoline. */
10791 else if (ix86_static_chain_on_stack)
10792 {
10793 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10794 emit_insn (gen_blockage ());
10795
10796 /* We don't want to interpret this push insn as a register save,
10797 only as a stack adjustment. The real copy of the register as
10798 a save will be done later, if needed. */
10799 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10800 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10801 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10802 RTX_FRAME_RELATED_P (insn) = 1;
10803 }
10804
10805 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10806 of DRAP is needed and stack realignment is really needed after reload */
10807 if (stack_realign_drap)
10808 {
10809 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10810
10811 /* Only need to push parameter pointer reg if it is caller saved. */
10812 if (!call_used_regs[REGNO (crtl->drap_reg)])
10813 {
10814 /* Push arg pointer reg */
10815 insn = emit_insn (gen_push (crtl->drap_reg));
10816 RTX_FRAME_RELATED_P (insn) = 1;
10817 }
10818
10819 /* Grab the argument pointer. */
10820 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10821 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10822 RTX_FRAME_RELATED_P (insn) = 1;
10823 m->fs.cfa_reg = crtl->drap_reg;
10824 m->fs.cfa_offset = 0;
10825
10826 /* Align the stack. */
10827 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10828 stack_pointer_rtx,
10829 GEN_INT (-align_bytes)));
10830 RTX_FRAME_RELATED_P (insn) = 1;
10831
10832 /* Replicate the return address on the stack so that return
10833 address can be reached via (argp - 1) slot. This is needed
10834 to implement macro RETURN_ADDR_RTX and intrinsic function
10835 expand_builtin_return_addr etc. */
10836 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10837 t = gen_frame_mem (word_mode, t);
10838 insn = emit_insn (gen_push (t));
10839 RTX_FRAME_RELATED_P (insn) = 1;
10840
10841 /* For the purposes of frame and register save area addressing,
10842 we've started over with a new frame. */
10843 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10844 m->fs.realigned = true;
10845 }
10846
10847 int_registers_saved = (frame.nregs == 0);
10848 sse_registers_saved = (frame.nsseregs == 0);
10849
10850 if (frame_pointer_needed && !m->fs.fp_valid)
10851 {
10852 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10853 slower on all targets. Also sdb doesn't like it. */
10854 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10855 RTX_FRAME_RELATED_P (insn) = 1;
10856
10857 /* Push registers now, before setting the frame pointer
10858 on SEH target. */
10859 if (!int_registers_saved
10860 && TARGET_SEH
10861 && !frame.save_regs_using_mov)
10862 {
10863 ix86_emit_save_regs ();
10864 int_registers_saved = true;
10865 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10866 }
10867
10868 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10869 {
10870 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10871 RTX_FRAME_RELATED_P (insn) = 1;
10872
10873 if (m->fs.cfa_reg == stack_pointer_rtx)
10874 m->fs.cfa_reg = hard_frame_pointer_rtx;
10875 m->fs.fp_offset = m->fs.sp_offset;
10876 m->fs.fp_valid = true;
10877 }
10878 }
10879
10880 if (!int_registers_saved)
10881 {
10882 /* If saving registers via PUSH, do so now. */
10883 if (!frame.save_regs_using_mov)
10884 {
10885 ix86_emit_save_regs ();
10886 int_registers_saved = true;
10887 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10888 }
10889
10890 /* When using red zone we may start register saving before allocating
10891 the stack frame saving one cycle of the prologue. However, avoid
10892 doing this if we have to probe the stack; at least on x86_64 the
10893 stack probe can turn into a call that clobbers a red zone location. */
10894 else if (ix86_using_red_zone ()
10895 && (! TARGET_STACK_PROBE
10896 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10897 {
10898 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10899 int_registers_saved = true;
10900 }
10901 }
10902
10903 if (stack_realign_fp)
10904 {
10905 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10906 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10907
10908 /* The computation of the size of the re-aligned stack frame means
10909 that we must allocate the size of the register save area before
10910 performing the actual alignment. Otherwise we cannot guarantee
10911 that there's enough storage above the realignment point. */
10912 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10913 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10914 GEN_INT (m->fs.sp_offset
10915 - frame.sse_reg_save_offset),
10916 -1, false);
10917
10918 /* Align the stack. */
10919 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10920 stack_pointer_rtx,
10921 GEN_INT (-align_bytes)));
10922
10923 /* For the purposes of register save area addressing, the stack
10924 pointer is no longer valid. As for the value of sp_offset,
10925 see ix86_compute_frame_layout, which we need to match in order
10926 to pass verification of stack_pointer_offset at the end. */
10927 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10928 m->fs.sp_valid = false;
10929 }
10930
10931 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10932
10933 if (flag_stack_usage_info)
10934 {
10935 /* We start to count from ARG_POINTER. */
10936 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10937
10938 /* If it was realigned, take into account the fake frame. */
10939 if (stack_realign_drap)
10940 {
10941 if (ix86_static_chain_on_stack)
10942 stack_size += UNITS_PER_WORD;
10943
10944 if (!call_used_regs[REGNO (crtl->drap_reg)])
10945 stack_size += UNITS_PER_WORD;
10946
10947 /* This over-estimates by 1 minimal-stack-alignment-unit but
10948 mitigates that by counting in the new return address slot. */
10949 current_function_dynamic_stack_size
10950 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10951 }
10952
10953 current_function_static_stack_size = stack_size;
10954 }
10955
10956 /* On SEH target with very large frame size, allocate an area to save
10957 SSE registers (as the very large allocation won't be described). */
10958 if (TARGET_SEH
10959 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10960 && !sse_registers_saved)
10961 {
10962 HOST_WIDE_INT sse_size =
10963 frame.sse_reg_save_offset - frame.reg_save_offset;
10964
10965 gcc_assert (int_registers_saved);
10966
10967 /* No need to do stack checking as the area will be immediately
10968 written. */
10969 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10970 GEN_INT (-sse_size), -1,
10971 m->fs.cfa_reg == stack_pointer_rtx);
10972 allocate -= sse_size;
10973 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10974 sse_registers_saved = true;
10975 }
10976
10977 /* The stack has already been decremented by the instruction calling us
10978 so probe if the size is non-negative to preserve the protection area. */
10979 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10980 {
10981 /* We expect the registers to be saved when probes are used. */
10982 gcc_assert (int_registers_saved);
10983
10984 if (STACK_CHECK_MOVING_SP)
10985 {
10986 if (!(crtl->is_leaf && !cfun->calls_alloca
10987 && allocate <= PROBE_INTERVAL))
10988 {
10989 ix86_adjust_stack_and_probe (allocate);
10990 allocate = 0;
10991 }
10992 }
10993 else
10994 {
10995 HOST_WIDE_INT size = allocate;
10996
10997 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10998 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10999
11000 if (TARGET_STACK_PROBE)
11001 {
11002 if (crtl->is_leaf && !cfun->calls_alloca)
11003 {
11004 if (size > PROBE_INTERVAL)
11005 ix86_emit_probe_stack_range (0, size);
11006 }
11007 else
11008 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11009 }
11010 else
11011 {
11012 if (crtl->is_leaf && !cfun->calls_alloca)
11013 {
11014 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11015 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11016 size - STACK_CHECK_PROTECT);
11017 }
11018 else
11019 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11020 }
11021 }
11022 }
11023
11024 if (allocate == 0)
11025 ;
11026 else if (!ix86_target_stack_probe ()
11027 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11028 {
11029 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11030 GEN_INT (-allocate), -1,
11031 m->fs.cfa_reg == stack_pointer_rtx);
11032 }
11033 else
11034 {
11035 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11036 rtx r10 = NULL;
11037 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11038 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11039 bool eax_live = ix86_eax_live_at_start_p ();
11040 bool r10_live = false;
11041
11042 if (TARGET_64BIT)
11043 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11044
11045 if (eax_live)
11046 {
11047 insn = emit_insn (gen_push (eax));
11048 allocate -= UNITS_PER_WORD;
11049 /* Note that SEH directives need to continue tracking the stack
11050 pointer even after the frame pointer has been set up. */
11051 if (sp_is_cfa_reg || TARGET_SEH)
11052 {
11053 if (sp_is_cfa_reg)
11054 m->fs.cfa_offset += UNITS_PER_WORD;
11055 RTX_FRAME_RELATED_P (insn) = 1;
11056 }
11057 }
11058
11059 if (r10_live)
11060 {
11061 r10 = gen_rtx_REG (Pmode, R10_REG);
11062 insn = emit_insn (gen_push (r10));
11063 allocate -= UNITS_PER_WORD;
11064 if (sp_is_cfa_reg || TARGET_SEH)
11065 {
11066 if (sp_is_cfa_reg)
11067 m->fs.cfa_offset += UNITS_PER_WORD;
11068 RTX_FRAME_RELATED_P (insn) = 1;
11069 }
11070 }
11071
11072 emit_move_insn (eax, GEN_INT (allocate));
11073 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11074
11075 /* Use the fact that AX still contains ALLOCATE. */
11076 adjust_stack_insn = (Pmode == DImode
11077 ? gen_pro_epilogue_adjust_stack_di_sub
11078 : gen_pro_epilogue_adjust_stack_si_sub);
11079
11080 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11081 stack_pointer_rtx, eax));
11082
11083 if (sp_is_cfa_reg || TARGET_SEH)
11084 {
11085 if (sp_is_cfa_reg)
11086 m->fs.cfa_offset += allocate;
11087 RTX_FRAME_RELATED_P (insn) = 1;
11088 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11089 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11090 plus_constant (Pmode, stack_pointer_rtx,
11091 -allocate)));
11092 }
11093 m->fs.sp_offset += allocate;
11094
11095 /* Use stack_pointer_rtx for relative addressing so that code
11096 works for realigned stack, too. */
11097 if (r10_live && eax_live)
11098 {
11099 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11100 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11101 gen_frame_mem (word_mode, t));
11102 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11103 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11104 gen_frame_mem (word_mode, t));
11105 }
11106 else if (eax_live || r10_live)
11107 {
11108 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11109 emit_move_insn (gen_rtx_REG (word_mode,
11110 (eax_live ? AX_REG : R10_REG)),
11111 gen_frame_mem (word_mode, t));
11112 }
11113 }
11114 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11115
11116 /* If we havn't already set up the frame pointer, do so now. */
11117 if (frame_pointer_needed && !m->fs.fp_valid)
11118 {
11119 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11120 GEN_INT (frame.stack_pointer_offset
11121 - frame.hard_frame_pointer_offset));
11122 insn = emit_insn (insn);
11123 RTX_FRAME_RELATED_P (insn) = 1;
11124 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11125
11126 if (m->fs.cfa_reg == stack_pointer_rtx)
11127 m->fs.cfa_reg = hard_frame_pointer_rtx;
11128 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11129 m->fs.fp_valid = true;
11130 }
11131
11132 if (!int_registers_saved)
11133 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11134 if (!sse_registers_saved)
11135 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11136
11137 pic_reg_used = false;
11138 /* We don't use pic-register for pe-coff target. */
11139 if (pic_offset_table_rtx
11140 && !TARGET_PECOFF
11141 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11142 || crtl->profile))
11143 {
11144 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11145
11146 if (alt_pic_reg_used != INVALID_REGNUM)
11147 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11148
11149 pic_reg_used = true;
11150 }
11151
11152 if (pic_reg_used)
11153 {
11154 if (TARGET_64BIT)
11155 {
11156 if (ix86_cmodel == CM_LARGE_PIC)
11157 {
11158 rtx label, tmp_reg;
11159
11160 gcc_assert (Pmode == DImode);
11161 label = gen_label_rtx ();
11162 emit_label (label);
11163 LABEL_PRESERVE_P (label) = 1;
11164 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11165 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11166 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11167 label));
11168 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11169 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11170 pic_offset_table_rtx, tmp_reg));
11171 }
11172 else
11173 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11174 }
11175 else
11176 {
11177 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11178 RTX_FRAME_RELATED_P (insn) = 1;
11179 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11180 }
11181 }
11182
11183 /* In the pic_reg_used case, make sure that the got load isn't deleted
11184 when mcount needs it. Blockage to avoid call movement across mcount
11185 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11186 note. */
11187 if (crtl->profile && !flag_fentry && pic_reg_used)
11188 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11189
11190 if (crtl->drap_reg && !crtl->stack_realign_needed)
11191 {
11192 /* vDRAP is setup but after reload it turns out stack realign
11193 isn't necessary, here we will emit prologue to setup DRAP
11194 without stack realign adjustment */
11195 t = choose_baseaddr (0);
11196 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11197 }
11198
11199 /* Prevent instructions from being scheduled into register save push
11200 sequence when access to the redzone area is done through frame pointer.
11201 The offset between the frame pointer and the stack pointer is calculated
11202 relative to the value of the stack pointer at the end of the function
11203 prologue, and moving instructions that access redzone area via frame
11204 pointer inside push sequence violates this assumption. */
11205 if (frame_pointer_needed && frame.red_zone_size)
11206 emit_insn (gen_memory_blockage ());
11207
11208 /* Emit cld instruction if stringops are used in the function. */
11209 if (TARGET_CLD && ix86_current_function_needs_cld)
11210 emit_insn (gen_cld ());
11211
11212 /* SEH requires that the prologue end within 256 bytes of the start of
11213 the function. Prevent instruction schedules that would extend that.
11214 Further, prevent alloca modifications to the stack pointer from being
11215 combined with prologue modifications. */
11216 if (TARGET_SEH)
11217 emit_insn (gen_prologue_use (stack_pointer_rtx));
11218 }
11219
11220 /* Emit code to restore REG using a POP insn. */
11221
11222 static void
11223 ix86_emit_restore_reg_using_pop (rtx reg)
11224 {
11225 struct machine_function *m = cfun->machine;
11226 rtx insn = emit_insn (gen_pop (reg));
11227
11228 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11229 m->fs.sp_offset -= UNITS_PER_WORD;
11230
11231 if (m->fs.cfa_reg == crtl->drap_reg
11232 && REGNO (reg) == REGNO (crtl->drap_reg))
11233 {
11234 /* Previously we'd represented the CFA as an expression
11235 like *(%ebp - 8). We've just popped that value from
11236 the stack, which means we need to reset the CFA to
11237 the drap register. This will remain until we restore
11238 the stack pointer. */
11239 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11240 RTX_FRAME_RELATED_P (insn) = 1;
11241
11242 /* This means that the DRAP register is valid for addressing too. */
11243 m->fs.drap_valid = true;
11244 return;
11245 }
11246
11247 if (m->fs.cfa_reg == stack_pointer_rtx)
11248 {
11249 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11250 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11251 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11252 RTX_FRAME_RELATED_P (insn) = 1;
11253
11254 m->fs.cfa_offset -= UNITS_PER_WORD;
11255 }
11256
11257 /* When the frame pointer is the CFA, and we pop it, we are
11258 swapping back to the stack pointer as the CFA. This happens
11259 for stack frames that don't allocate other data, so we assume
11260 the stack pointer is now pointing at the return address, i.e.
11261 the function entry state, which makes the offset be 1 word. */
11262 if (reg == hard_frame_pointer_rtx)
11263 {
11264 m->fs.fp_valid = false;
11265 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11266 {
11267 m->fs.cfa_reg = stack_pointer_rtx;
11268 m->fs.cfa_offset -= UNITS_PER_WORD;
11269
11270 add_reg_note (insn, REG_CFA_DEF_CFA,
11271 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11272 GEN_INT (m->fs.cfa_offset)));
11273 RTX_FRAME_RELATED_P (insn) = 1;
11274 }
11275 }
11276 }
11277
11278 /* Emit code to restore saved registers using POP insns. */
11279
11280 static void
11281 ix86_emit_restore_regs_using_pop (void)
11282 {
11283 unsigned int regno;
11284
11285 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11286 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11287 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11288 }
11289
11290 /* Emit code and notes for the LEAVE instruction. */
11291
11292 static void
11293 ix86_emit_leave (void)
11294 {
11295 struct machine_function *m = cfun->machine;
11296 rtx insn = emit_insn (ix86_gen_leave ());
11297
11298 ix86_add_queued_cfa_restore_notes (insn);
11299
11300 gcc_assert (m->fs.fp_valid);
11301 m->fs.sp_valid = true;
11302 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11303 m->fs.fp_valid = false;
11304
11305 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11306 {
11307 m->fs.cfa_reg = stack_pointer_rtx;
11308 m->fs.cfa_offset = m->fs.sp_offset;
11309
11310 add_reg_note (insn, REG_CFA_DEF_CFA,
11311 plus_constant (Pmode, stack_pointer_rtx,
11312 m->fs.sp_offset));
11313 RTX_FRAME_RELATED_P (insn) = 1;
11314 }
11315 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11316 m->fs.fp_offset);
11317 }
11318
11319 /* Emit code to restore saved registers using MOV insns.
11320 First register is restored from CFA - CFA_OFFSET. */
11321 static void
11322 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11323 bool maybe_eh_return)
11324 {
11325 struct machine_function *m = cfun->machine;
11326 unsigned int regno;
11327
11328 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11329 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11330 {
11331 rtx reg = gen_rtx_REG (word_mode, regno);
11332 rtx insn, mem;
11333
11334 mem = choose_baseaddr (cfa_offset);
11335 mem = gen_frame_mem (word_mode, mem);
11336 insn = emit_move_insn (reg, mem);
11337
11338 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11339 {
11340 /* Previously we'd represented the CFA as an expression
11341 like *(%ebp - 8). We've just popped that value from
11342 the stack, which means we need to reset the CFA to
11343 the drap register. This will remain until we restore
11344 the stack pointer. */
11345 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11346 RTX_FRAME_RELATED_P (insn) = 1;
11347
11348 /* This means that the DRAP register is valid for addressing. */
11349 m->fs.drap_valid = true;
11350 }
11351 else
11352 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11353
11354 cfa_offset -= UNITS_PER_WORD;
11355 }
11356 }
11357
11358 /* Emit code to restore saved registers using MOV insns.
11359 First register is restored from CFA - CFA_OFFSET. */
11360 static void
11361 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11362 bool maybe_eh_return)
11363 {
11364 unsigned int regno;
11365
11366 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11367 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11368 {
11369 rtx reg = gen_rtx_REG (V4SFmode, regno);
11370 rtx mem;
11371
11372 mem = choose_baseaddr (cfa_offset);
11373 mem = gen_rtx_MEM (V4SFmode, mem);
11374 set_mem_align (mem, 128);
11375 emit_move_insn (reg, mem);
11376
11377 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11378
11379 cfa_offset -= 16;
11380 }
11381 }
11382
11383 /* Restore function stack, frame, and registers. */
11384
11385 void
11386 ix86_expand_epilogue (int style)
11387 {
11388 struct machine_function *m = cfun->machine;
11389 struct machine_frame_state frame_state_save = m->fs;
11390 struct ix86_frame frame;
11391 bool restore_regs_via_mov;
11392 bool using_drap;
11393
11394 ix86_finalize_stack_realign_flags ();
11395 ix86_compute_frame_layout (&frame);
11396
11397 m->fs.sp_valid = (!frame_pointer_needed
11398 || (crtl->sp_is_unchanging
11399 && !stack_realign_fp));
11400 gcc_assert (!m->fs.sp_valid
11401 || m->fs.sp_offset == frame.stack_pointer_offset);
11402
11403 /* The FP must be valid if the frame pointer is present. */
11404 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11405 gcc_assert (!m->fs.fp_valid
11406 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11407
11408 /* We must have *some* valid pointer to the stack frame. */
11409 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11410
11411 /* The DRAP is never valid at this point. */
11412 gcc_assert (!m->fs.drap_valid);
11413
11414 /* See the comment about red zone and frame
11415 pointer usage in ix86_expand_prologue. */
11416 if (frame_pointer_needed && frame.red_zone_size)
11417 emit_insn (gen_memory_blockage ());
11418
11419 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11420 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11421
11422 /* Determine the CFA offset of the end of the red-zone. */
11423 m->fs.red_zone_offset = 0;
11424 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11425 {
11426 /* The red-zone begins below the return address. */
11427 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11428
11429 /* When the register save area is in the aligned portion of
11430 the stack, determine the maximum runtime displacement that
11431 matches up with the aligned frame. */
11432 if (stack_realign_drap)
11433 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11434 + UNITS_PER_WORD);
11435 }
11436
11437 /* Special care must be taken for the normal return case of a function
11438 using eh_return: the eax and edx registers are marked as saved, but
11439 not restored along this path. Adjust the save location to match. */
11440 if (crtl->calls_eh_return && style != 2)
11441 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11442
11443 /* EH_RETURN requires the use of moves to function properly. */
11444 if (crtl->calls_eh_return)
11445 restore_regs_via_mov = true;
11446 /* SEH requires the use of pops to identify the epilogue. */
11447 else if (TARGET_SEH)
11448 restore_regs_via_mov = false;
11449 /* If we're only restoring one register and sp is not valid then
11450 using a move instruction to restore the register since it's
11451 less work than reloading sp and popping the register. */
11452 else if (!m->fs.sp_valid && frame.nregs <= 1)
11453 restore_regs_via_mov = true;
11454 else if (TARGET_EPILOGUE_USING_MOVE
11455 && cfun->machine->use_fast_prologue_epilogue
11456 && (frame.nregs > 1
11457 || m->fs.sp_offset != frame.reg_save_offset))
11458 restore_regs_via_mov = true;
11459 else if (frame_pointer_needed
11460 && !frame.nregs
11461 && m->fs.sp_offset != frame.reg_save_offset)
11462 restore_regs_via_mov = true;
11463 else if (frame_pointer_needed
11464 && TARGET_USE_LEAVE
11465 && cfun->machine->use_fast_prologue_epilogue
11466 && frame.nregs == 1)
11467 restore_regs_via_mov = true;
11468 else
11469 restore_regs_via_mov = false;
11470
11471 if (restore_regs_via_mov || frame.nsseregs)
11472 {
11473 /* Ensure that the entire register save area is addressable via
11474 the stack pointer, if we will restore via sp. */
11475 if (TARGET_64BIT
11476 && m->fs.sp_offset > 0x7fffffff
11477 && !(m->fs.fp_valid || m->fs.drap_valid)
11478 && (frame.nsseregs + frame.nregs) != 0)
11479 {
11480 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11481 GEN_INT (m->fs.sp_offset
11482 - frame.sse_reg_save_offset),
11483 style,
11484 m->fs.cfa_reg == stack_pointer_rtx);
11485 }
11486 }
11487
11488 /* If there are any SSE registers to restore, then we have to do it
11489 via moves, since there's obviously no pop for SSE regs. */
11490 if (frame.nsseregs)
11491 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11492 style == 2);
11493
11494 if (restore_regs_via_mov)
11495 {
11496 rtx t;
11497
11498 if (frame.nregs)
11499 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11500
11501 /* eh_return epilogues need %ecx added to the stack pointer. */
11502 if (style == 2)
11503 {
11504 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11505
11506 /* Stack align doesn't work with eh_return. */
11507 gcc_assert (!stack_realign_drap);
11508 /* Neither does regparm nested functions. */
11509 gcc_assert (!ix86_static_chain_on_stack);
11510
11511 if (frame_pointer_needed)
11512 {
11513 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11514 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11515 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11516
11517 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11518 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11519
11520 /* Note that we use SA as a temporary CFA, as the return
11521 address is at the proper place relative to it. We
11522 pretend this happens at the FP restore insn because
11523 prior to this insn the FP would be stored at the wrong
11524 offset relative to SA, and after this insn we have no
11525 other reasonable register to use for the CFA. We don't
11526 bother resetting the CFA to the SP for the duration of
11527 the return insn. */
11528 add_reg_note (insn, REG_CFA_DEF_CFA,
11529 plus_constant (Pmode, sa, UNITS_PER_WORD));
11530 ix86_add_queued_cfa_restore_notes (insn);
11531 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11532 RTX_FRAME_RELATED_P (insn) = 1;
11533
11534 m->fs.cfa_reg = sa;
11535 m->fs.cfa_offset = UNITS_PER_WORD;
11536 m->fs.fp_valid = false;
11537
11538 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11539 const0_rtx, style, false);
11540 }
11541 else
11542 {
11543 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11544 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11545 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11546 ix86_add_queued_cfa_restore_notes (insn);
11547
11548 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11549 if (m->fs.cfa_offset != UNITS_PER_WORD)
11550 {
11551 m->fs.cfa_offset = UNITS_PER_WORD;
11552 add_reg_note (insn, REG_CFA_DEF_CFA,
11553 plus_constant (Pmode, stack_pointer_rtx,
11554 UNITS_PER_WORD));
11555 RTX_FRAME_RELATED_P (insn) = 1;
11556 }
11557 }
11558 m->fs.sp_offset = UNITS_PER_WORD;
11559 m->fs.sp_valid = true;
11560 }
11561 }
11562 else
11563 {
11564 /* SEH requires that the function end with (1) a stack adjustment
11565 if necessary, (2) a sequence of pops, and (3) a return or
11566 jump instruction. Prevent insns from the function body from
11567 being scheduled into this sequence. */
11568 if (TARGET_SEH)
11569 {
11570 /* Prevent a catch region from being adjacent to the standard
11571 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11572 several other flags that would be interesting to test are
11573 not yet set up. */
11574 if (flag_non_call_exceptions)
11575 emit_insn (gen_nops (const1_rtx));
11576 else
11577 emit_insn (gen_blockage ());
11578 }
11579
11580 /* First step is to deallocate the stack frame so that we can
11581 pop the registers. Also do it on SEH target for very large
11582 frame as the emitted instructions aren't allowed by the ABI in
11583 epilogues. */
11584 if (!m->fs.sp_valid
11585 || (TARGET_SEH
11586 && (m->fs.sp_offset - frame.reg_save_offset
11587 >= SEH_MAX_FRAME_SIZE)))
11588 {
11589 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11590 GEN_INT (m->fs.fp_offset
11591 - frame.reg_save_offset),
11592 style, false);
11593 }
11594 else if (m->fs.sp_offset != frame.reg_save_offset)
11595 {
11596 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11597 GEN_INT (m->fs.sp_offset
11598 - frame.reg_save_offset),
11599 style,
11600 m->fs.cfa_reg == stack_pointer_rtx);
11601 }
11602
11603 ix86_emit_restore_regs_using_pop ();
11604 }
11605
11606 /* If we used a stack pointer and haven't already got rid of it,
11607 then do so now. */
11608 if (m->fs.fp_valid)
11609 {
11610 /* If the stack pointer is valid and pointing at the frame
11611 pointer store address, then we only need a pop. */
11612 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11613 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11614 /* Leave results in shorter dependency chains on CPUs that are
11615 able to grok it fast. */
11616 else if (TARGET_USE_LEAVE
11617 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11618 || !cfun->machine->use_fast_prologue_epilogue)
11619 ix86_emit_leave ();
11620 else
11621 {
11622 pro_epilogue_adjust_stack (stack_pointer_rtx,
11623 hard_frame_pointer_rtx,
11624 const0_rtx, style, !using_drap);
11625 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11626 }
11627 }
11628
11629 if (using_drap)
11630 {
11631 int param_ptr_offset = UNITS_PER_WORD;
11632 rtx insn;
11633
11634 gcc_assert (stack_realign_drap);
11635
11636 if (ix86_static_chain_on_stack)
11637 param_ptr_offset += UNITS_PER_WORD;
11638 if (!call_used_regs[REGNO (crtl->drap_reg)])
11639 param_ptr_offset += UNITS_PER_WORD;
11640
11641 insn = emit_insn (gen_rtx_SET
11642 (VOIDmode, stack_pointer_rtx,
11643 gen_rtx_PLUS (Pmode,
11644 crtl->drap_reg,
11645 GEN_INT (-param_ptr_offset))));
11646 m->fs.cfa_reg = stack_pointer_rtx;
11647 m->fs.cfa_offset = param_ptr_offset;
11648 m->fs.sp_offset = param_ptr_offset;
11649 m->fs.realigned = false;
11650
11651 add_reg_note (insn, REG_CFA_DEF_CFA,
11652 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11653 GEN_INT (param_ptr_offset)));
11654 RTX_FRAME_RELATED_P (insn) = 1;
11655
11656 if (!call_used_regs[REGNO (crtl->drap_reg)])
11657 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11658 }
11659
11660 /* At this point the stack pointer must be valid, and we must have
11661 restored all of the registers. We may not have deallocated the
11662 entire stack frame. We've delayed this until now because it may
11663 be possible to merge the local stack deallocation with the
11664 deallocation forced by ix86_static_chain_on_stack. */
11665 gcc_assert (m->fs.sp_valid);
11666 gcc_assert (!m->fs.fp_valid);
11667 gcc_assert (!m->fs.realigned);
11668 if (m->fs.sp_offset != UNITS_PER_WORD)
11669 {
11670 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11671 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11672 style, true);
11673 }
11674 else
11675 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11676
11677 /* Sibcall epilogues don't want a return instruction. */
11678 if (style == 0)
11679 {
11680 m->fs = frame_state_save;
11681 return;
11682 }
11683
11684 if (crtl->args.pops_args && crtl->args.size)
11685 {
11686 rtx popc = GEN_INT (crtl->args.pops_args);
11687
11688 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11689 address, do explicit add, and jump indirectly to the caller. */
11690
11691 if (crtl->args.pops_args >= 65536)
11692 {
11693 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11694 rtx insn;
11695
11696 /* There is no "pascal" calling convention in any 64bit ABI. */
11697 gcc_assert (!TARGET_64BIT);
11698
11699 insn = emit_insn (gen_pop (ecx));
11700 m->fs.cfa_offset -= UNITS_PER_WORD;
11701 m->fs.sp_offset -= UNITS_PER_WORD;
11702
11703 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11704 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11705 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11706 add_reg_note (insn, REG_CFA_REGISTER,
11707 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11708 RTX_FRAME_RELATED_P (insn) = 1;
11709
11710 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11711 popc, -1, true);
11712 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11713 }
11714 else
11715 emit_jump_insn (gen_simple_return_pop_internal (popc));
11716 }
11717 else
11718 emit_jump_insn (gen_simple_return_internal ());
11719
11720 /* Restore the state back to the state from the prologue,
11721 so that it's correct for the next epilogue. */
11722 m->fs = frame_state_save;
11723 }
11724
11725 /* Reset from the function's potential modifications. */
11726
11727 static void
11728 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11729 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11730 {
11731 if (pic_offset_table_rtx)
11732 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11733 #if TARGET_MACHO
11734 /* Mach-O doesn't support labels at the end of objects, so if
11735 it looks like we might want one, insert a NOP. */
11736 {
11737 rtx insn = get_last_insn ();
11738 rtx deleted_debug_label = NULL_RTX;
11739 while (insn
11740 && NOTE_P (insn)
11741 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11742 {
11743 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11744 notes only, instead set their CODE_LABEL_NUMBER to -1,
11745 otherwise there would be code generation differences
11746 in between -g and -g0. */
11747 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11748 deleted_debug_label = insn;
11749 insn = PREV_INSN (insn);
11750 }
11751 if (insn
11752 && (LABEL_P (insn)
11753 || (NOTE_P (insn)
11754 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11755 fputs ("\tnop\n", file);
11756 else if (deleted_debug_label)
11757 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11758 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11759 CODE_LABEL_NUMBER (insn) = -1;
11760 }
11761 #endif
11762
11763 }
11764
11765 /* Return a scratch register to use in the split stack prologue. The
11766 split stack prologue is used for -fsplit-stack. It is the first
11767 instructions in the function, even before the regular prologue.
11768 The scratch register can be any caller-saved register which is not
11769 used for parameters or for the static chain. */
11770
11771 static unsigned int
11772 split_stack_prologue_scratch_regno (void)
11773 {
11774 if (TARGET_64BIT)
11775 return R11_REG;
11776 else
11777 {
11778 bool is_fastcall, is_thiscall;
11779 int regparm;
11780
11781 is_fastcall = (lookup_attribute ("fastcall",
11782 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11783 != NULL);
11784 is_thiscall = (lookup_attribute ("thiscall",
11785 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11786 != NULL);
11787 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11788
11789 if (is_fastcall)
11790 {
11791 if (DECL_STATIC_CHAIN (cfun->decl))
11792 {
11793 sorry ("-fsplit-stack does not support fastcall with "
11794 "nested function");
11795 return INVALID_REGNUM;
11796 }
11797 return AX_REG;
11798 }
11799 else if (is_thiscall)
11800 {
11801 if (!DECL_STATIC_CHAIN (cfun->decl))
11802 return DX_REG;
11803 return AX_REG;
11804 }
11805 else if (regparm < 3)
11806 {
11807 if (!DECL_STATIC_CHAIN (cfun->decl))
11808 return CX_REG;
11809 else
11810 {
11811 if (regparm >= 2)
11812 {
11813 sorry ("-fsplit-stack does not support 2 register "
11814 " parameters for a nested function");
11815 return INVALID_REGNUM;
11816 }
11817 return DX_REG;
11818 }
11819 }
11820 else
11821 {
11822 /* FIXME: We could make this work by pushing a register
11823 around the addition and comparison. */
11824 sorry ("-fsplit-stack does not support 3 register parameters");
11825 return INVALID_REGNUM;
11826 }
11827 }
11828 }
11829
11830 /* A SYMBOL_REF for the function which allocates new stackspace for
11831 -fsplit-stack. */
11832
11833 static GTY(()) rtx split_stack_fn;
11834
11835 /* A SYMBOL_REF for the more stack function when using the large
11836 model. */
11837
11838 static GTY(()) rtx split_stack_fn_large;
11839
11840 /* Handle -fsplit-stack. These are the first instructions in the
11841 function, even before the regular prologue. */
11842
11843 void
11844 ix86_expand_split_stack_prologue (void)
11845 {
11846 struct ix86_frame frame;
11847 HOST_WIDE_INT allocate;
11848 unsigned HOST_WIDE_INT args_size;
11849 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11850 rtx scratch_reg = NULL_RTX;
11851 rtx varargs_label = NULL_RTX;
11852 rtx fn;
11853
11854 gcc_assert (flag_split_stack && reload_completed);
11855
11856 ix86_finalize_stack_realign_flags ();
11857 ix86_compute_frame_layout (&frame);
11858 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11859
11860 /* This is the label we will branch to if we have enough stack
11861 space. We expect the basic block reordering pass to reverse this
11862 branch if optimizing, so that we branch in the unlikely case. */
11863 label = gen_label_rtx ();
11864
11865 /* We need to compare the stack pointer minus the frame size with
11866 the stack boundary in the TCB. The stack boundary always gives
11867 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11868 can compare directly. Otherwise we need to do an addition. */
11869
11870 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11871 UNSPEC_STACK_CHECK);
11872 limit = gen_rtx_CONST (Pmode, limit);
11873 limit = gen_rtx_MEM (Pmode, limit);
11874 if (allocate < SPLIT_STACK_AVAILABLE)
11875 current = stack_pointer_rtx;
11876 else
11877 {
11878 unsigned int scratch_regno;
11879 rtx offset;
11880
11881 /* We need a scratch register to hold the stack pointer minus
11882 the required frame size. Since this is the very start of the
11883 function, the scratch register can be any caller-saved
11884 register which is not used for parameters. */
11885 offset = GEN_INT (- allocate);
11886 scratch_regno = split_stack_prologue_scratch_regno ();
11887 if (scratch_regno == INVALID_REGNUM)
11888 return;
11889 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11890 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11891 {
11892 /* We don't use ix86_gen_add3 in this case because it will
11893 want to split to lea, but when not optimizing the insn
11894 will not be split after this point. */
11895 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11896 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11897 offset)));
11898 }
11899 else
11900 {
11901 emit_move_insn (scratch_reg, offset);
11902 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11903 stack_pointer_rtx));
11904 }
11905 current = scratch_reg;
11906 }
11907
11908 ix86_expand_branch (GEU, current, limit, label);
11909 jump_insn = get_last_insn ();
11910 JUMP_LABEL (jump_insn) = label;
11911
11912 /* Mark the jump as very likely to be taken. */
11913 add_int_reg_note (jump_insn, REG_BR_PROB,
11914 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11915
11916 if (split_stack_fn == NULL_RTX)
11917 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11918 fn = split_stack_fn;
11919
11920 /* Get more stack space. We pass in the desired stack space and the
11921 size of the arguments to copy to the new stack. In 32-bit mode
11922 we push the parameters; __morestack will return on a new stack
11923 anyhow. In 64-bit mode we pass the parameters in r10 and
11924 r11. */
11925 allocate_rtx = GEN_INT (allocate);
11926 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11927 call_fusage = NULL_RTX;
11928 if (TARGET_64BIT)
11929 {
11930 rtx reg10, reg11;
11931
11932 reg10 = gen_rtx_REG (Pmode, R10_REG);
11933 reg11 = gen_rtx_REG (Pmode, R11_REG);
11934
11935 /* If this function uses a static chain, it will be in %r10.
11936 Preserve it across the call to __morestack. */
11937 if (DECL_STATIC_CHAIN (cfun->decl))
11938 {
11939 rtx rax;
11940
11941 rax = gen_rtx_REG (word_mode, AX_REG);
11942 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11943 use_reg (&call_fusage, rax);
11944 }
11945
11946 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11947 && !TARGET_PECOFF)
11948 {
11949 HOST_WIDE_INT argval;
11950
11951 gcc_assert (Pmode == DImode);
11952 /* When using the large model we need to load the address
11953 into a register, and we've run out of registers. So we
11954 switch to a different calling convention, and we call a
11955 different function: __morestack_large. We pass the
11956 argument size in the upper 32 bits of r10 and pass the
11957 frame size in the lower 32 bits. */
11958 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11959 gcc_assert ((args_size & 0xffffffff) == args_size);
11960
11961 if (split_stack_fn_large == NULL_RTX)
11962 split_stack_fn_large =
11963 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11964
11965 if (ix86_cmodel == CM_LARGE_PIC)
11966 {
11967 rtx label, x;
11968
11969 label = gen_label_rtx ();
11970 emit_label (label);
11971 LABEL_PRESERVE_P (label) = 1;
11972 emit_insn (gen_set_rip_rex64 (reg10, label));
11973 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11974 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
11975 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11976 UNSPEC_GOT);
11977 x = gen_rtx_CONST (Pmode, x);
11978 emit_move_insn (reg11, x);
11979 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11980 x = gen_const_mem (Pmode, x);
11981 emit_move_insn (reg11, x);
11982 }
11983 else
11984 emit_move_insn (reg11, split_stack_fn_large);
11985
11986 fn = reg11;
11987
11988 argval = ((args_size << 16) << 16) + allocate;
11989 emit_move_insn (reg10, GEN_INT (argval));
11990 }
11991 else
11992 {
11993 emit_move_insn (reg10, allocate_rtx);
11994 emit_move_insn (reg11, GEN_INT (args_size));
11995 use_reg (&call_fusage, reg11);
11996 }
11997
11998 use_reg (&call_fusage, reg10);
11999 }
12000 else
12001 {
12002 emit_insn (gen_push (GEN_INT (args_size)));
12003 emit_insn (gen_push (allocate_rtx));
12004 }
12005 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12006 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12007 NULL_RTX, false);
12008 add_function_usage_to (call_insn, call_fusage);
12009
12010 /* In order to make call/return prediction work right, we now need
12011 to execute a return instruction. See
12012 libgcc/config/i386/morestack.S for the details on how this works.
12013
12014 For flow purposes gcc must not see this as a return
12015 instruction--we need control flow to continue at the subsequent
12016 label. Therefore, we use an unspec. */
12017 gcc_assert (crtl->args.pops_args < 65536);
12018 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12019
12020 /* If we are in 64-bit mode and this function uses a static chain,
12021 we saved %r10 in %rax before calling _morestack. */
12022 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12023 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12024 gen_rtx_REG (word_mode, AX_REG));
12025
12026 /* If this function calls va_start, we need to store a pointer to
12027 the arguments on the old stack, because they may not have been
12028 all copied to the new stack. At this point the old stack can be
12029 found at the frame pointer value used by __morestack, because
12030 __morestack has set that up before calling back to us. Here we
12031 store that pointer in a scratch register, and in
12032 ix86_expand_prologue we store the scratch register in a stack
12033 slot. */
12034 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12035 {
12036 unsigned int scratch_regno;
12037 rtx frame_reg;
12038 int words;
12039
12040 scratch_regno = split_stack_prologue_scratch_regno ();
12041 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12042 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12043
12044 /* 64-bit:
12045 fp -> old fp value
12046 return address within this function
12047 return address of caller of this function
12048 stack arguments
12049 So we add three words to get to the stack arguments.
12050
12051 32-bit:
12052 fp -> old fp value
12053 return address within this function
12054 first argument to __morestack
12055 second argument to __morestack
12056 return address of caller of this function
12057 stack arguments
12058 So we add five words to get to the stack arguments.
12059 */
12060 words = TARGET_64BIT ? 3 : 5;
12061 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12062 gen_rtx_PLUS (Pmode, frame_reg,
12063 GEN_INT (words * UNITS_PER_WORD))));
12064
12065 varargs_label = gen_label_rtx ();
12066 emit_jump_insn (gen_jump (varargs_label));
12067 JUMP_LABEL (get_last_insn ()) = varargs_label;
12068
12069 emit_barrier ();
12070 }
12071
12072 emit_label (label);
12073 LABEL_NUSES (label) = 1;
12074
12075 /* If this function calls va_start, we now have to set the scratch
12076 register for the case where we do not call __morestack. In this
12077 case we need to set it based on the stack pointer. */
12078 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12079 {
12080 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12081 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12082 GEN_INT (UNITS_PER_WORD))));
12083
12084 emit_label (varargs_label);
12085 LABEL_NUSES (varargs_label) = 1;
12086 }
12087 }
12088
12089 /* We may have to tell the dataflow pass that the split stack prologue
12090 is initializing a scratch register. */
12091
12092 static void
12093 ix86_live_on_entry (bitmap regs)
12094 {
12095 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12096 {
12097 gcc_assert (flag_split_stack);
12098 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12099 }
12100 }
12101 \f
12102 /* Extract the parts of an RTL expression that is a valid memory address
12103 for an instruction. Return 0 if the structure of the address is
12104 grossly off. Return -1 if the address contains ASHIFT, so it is not
12105 strictly valid, but still used for computing length of lea instruction. */
12106
12107 int
12108 ix86_decompose_address (rtx addr, struct ix86_address *out)
12109 {
12110 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12111 rtx base_reg, index_reg;
12112 HOST_WIDE_INT scale = 1;
12113 rtx scale_rtx = NULL_RTX;
12114 rtx tmp;
12115 int retval = 1;
12116 enum ix86_address_seg seg = SEG_DEFAULT;
12117
12118 /* Allow zero-extended SImode addresses,
12119 they will be emitted with addr32 prefix. */
12120 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12121 {
12122 if (GET_CODE (addr) == ZERO_EXTEND
12123 && GET_MODE (XEXP (addr, 0)) == SImode)
12124 {
12125 addr = XEXP (addr, 0);
12126 if (CONST_INT_P (addr))
12127 return 0;
12128 }
12129 else if (GET_CODE (addr) == AND
12130 && const_32bit_mask (XEXP (addr, 1), DImode))
12131 {
12132 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12133 if (addr == NULL_RTX)
12134 return 0;
12135
12136 if (CONST_INT_P (addr))
12137 return 0;
12138 }
12139 }
12140
12141 /* Allow SImode subregs of DImode addresses,
12142 they will be emitted with addr32 prefix. */
12143 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12144 {
12145 if (GET_CODE (addr) == SUBREG
12146 && GET_MODE (SUBREG_REG (addr)) == DImode)
12147 {
12148 addr = SUBREG_REG (addr);
12149 if (CONST_INT_P (addr))
12150 return 0;
12151 }
12152 }
12153
12154 if (REG_P (addr))
12155 base = addr;
12156 else if (GET_CODE (addr) == SUBREG)
12157 {
12158 if (REG_P (SUBREG_REG (addr)))
12159 base = addr;
12160 else
12161 return 0;
12162 }
12163 else if (GET_CODE (addr) == PLUS)
12164 {
12165 rtx addends[4], op;
12166 int n = 0, i;
12167
12168 op = addr;
12169 do
12170 {
12171 if (n >= 4)
12172 return 0;
12173 addends[n++] = XEXP (op, 1);
12174 op = XEXP (op, 0);
12175 }
12176 while (GET_CODE (op) == PLUS);
12177 if (n >= 4)
12178 return 0;
12179 addends[n] = op;
12180
12181 for (i = n; i >= 0; --i)
12182 {
12183 op = addends[i];
12184 switch (GET_CODE (op))
12185 {
12186 case MULT:
12187 if (index)
12188 return 0;
12189 index = XEXP (op, 0);
12190 scale_rtx = XEXP (op, 1);
12191 break;
12192
12193 case ASHIFT:
12194 if (index)
12195 return 0;
12196 index = XEXP (op, 0);
12197 tmp = XEXP (op, 1);
12198 if (!CONST_INT_P (tmp))
12199 return 0;
12200 scale = INTVAL (tmp);
12201 if ((unsigned HOST_WIDE_INT) scale > 3)
12202 return 0;
12203 scale = 1 << scale;
12204 break;
12205
12206 case ZERO_EXTEND:
12207 op = XEXP (op, 0);
12208 if (GET_CODE (op) != UNSPEC)
12209 return 0;
12210 /* FALLTHRU */
12211
12212 case UNSPEC:
12213 if (XINT (op, 1) == UNSPEC_TP
12214 && TARGET_TLS_DIRECT_SEG_REFS
12215 && seg == SEG_DEFAULT)
12216 seg = DEFAULT_TLS_SEG_REG;
12217 else
12218 return 0;
12219 break;
12220
12221 case SUBREG:
12222 if (!REG_P (SUBREG_REG (op)))
12223 return 0;
12224 /* FALLTHRU */
12225
12226 case REG:
12227 if (!base)
12228 base = op;
12229 else if (!index)
12230 index = op;
12231 else
12232 return 0;
12233 break;
12234
12235 case CONST:
12236 case CONST_INT:
12237 case SYMBOL_REF:
12238 case LABEL_REF:
12239 if (disp)
12240 return 0;
12241 disp = op;
12242 break;
12243
12244 default:
12245 return 0;
12246 }
12247 }
12248 }
12249 else if (GET_CODE (addr) == MULT)
12250 {
12251 index = XEXP (addr, 0); /* index*scale */
12252 scale_rtx = XEXP (addr, 1);
12253 }
12254 else if (GET_CODE (addr) == ASHIFT)
12255 {
12256 /* We're called for lea too, which implements ashift on occasion. */
12257 index = XEXP (addr, 0);
12258 tmp = XEXP (addr, 1);
12259 if (!CONST_INT_P (tmp))
12260 return 0;
12261 scale = INTVAL (tmp);
12262 if ((unsigned HOST_WIDE_INT) scale > 3)
12263 return 0;
12264 scale = 1 << scale;
12265 retval = -1;
12266 }
12267 else
12268 disp = addr; /* displacement */
12269
12270 if (index)
12271 {
12272 if (REG_P (index))
12273 ;
12274 else if (GET_CODE (index) == SUBREG
12275 && REG_P (SUBREG_REG (index)))
12276 ;
12277 else
12278 return 0;
12279 }
12280
12281 /* Extract the integral value of scale. */
12282 if (scale_rtx)
12283 {
12284 if (!CONST_INT_P (scale_rtx))
12285 return 0;
12286 scale = INTVAL (scale_rtx);
12287 }
12288
12289 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12290 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12291
12292 /* Avoid useless 0 displacement. */
12293 if (disp == const0_rtx && (base || index))
12294 disp = NULL_RTX;
12295
12296 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12297 if (base_reg && index_reg && scale == 1
12298 && (index_reg == arg_pointer_rtx
12299 || index_reg == frame_pointer_rtx
12300 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12301 {
12302 rtx tmp;
12303 tmp = base, base = index, index = tmp;
12304 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12305 }
12306
12307 /* Special case: %ebp cannot be encoded as a base without a displacement.
12308 Similarly %r13. */
12309 if (!disp
12310 && base_reg
12311 && (base_reg == hard_frame_pointer_rtx
12312 || base_reg == frame_pointer_rtx
12313 || base_reg == arg_pointer_rtx
12314 || (REG_P (base_reg)
12315 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12316 || REGNO (base_reg) == R13_REG))))
12317 disp = const0_rtx;
12318
12319 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12320 Avoid this by transforming to [%esi+0].
12321 Reload calls address legitimization without cfun defined, so we need
12322 to test cfun for being non-NULL. */
12323 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12324 && base_reg && !index_reg && !disp
12325 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12326 disp = const0_rtx;
12327
12328 /* Special case: encode reg+reg instead of reg*2. */
12329 if (!base && index && scale == 2)
12330 base = index, base_reg = index_reg, scale = 1;
12331
12332 /* Special case: scaling cannot be encoded without base or displacement. */
12333 if (!base && !disp && index && scale != 1)
12334 disp = const0_rtx;
12335
12336 out->base = base;
12337 out->index = index;
12338 out->disp = disp;
12339 out->scale = scale;
12340 out->seg = seg;
12341
12342 return retval;
12343 }
12344 \f
12345 /* Return cost of the memory address x.
12346 For i386, it is better to use a complex address than let gcc copy
12347 the address into a reg and make a new pseudo. But not if the address
12348 requires to two regs - that would mean more pseudos with longer
12349 lifetimes. */
12350 static int
12351 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12352 addr_space_t as ATTRIBUTE_UNUSED,
12353 bool speed ATTRIBUTE_UNUSED)
12354 {
12355 struct ix86_address parts;
12356 int cost = 1;
12357 int ok = ix86_decompose_address (x, &parts);
12358
12359 gcc_assert (ok);
12360
12361 if (parts.base && GET_CODE (parts.base) == SUBREG)
12362 parts.base = SUBREG_REG (parts.base);
12363 if (parts.index && GET_CODE (parts.index) == SUBREG)
12364 parts.index = SUBREG_REG (parts.index);
12365
12366 /* Attempt to minimize number of registers in the address. */
12367 if ((parts.base
12368 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12369 || (parts.index
12370 && (!REG_P (parts.index)
12371 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12372 cost++;
12373
12374 if (parts.base
12375 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12376 && parts.index
12377 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12378 && parts.base != parts.index)
12379 cost++;
12380
12381 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12382 since it's predecode logic can't detect the length of instructions
12383 and it degenerates to vector decoded. Increase cost of such
12384 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12385 to split such addresses or even refuse such addresses at all.
12386
12387 Following addressing modes are affected:
12388 [base+scale*index]
12389 [scale*index+disp]
12390 [base+index]
12391
12392 The first and last case may be avoidable by explicitly coding the zero in
12393 memory address, but I don't have AMD-K6 machine handy to check this
12394 theory. */
12395
12396 if (TARGET_K6
12397 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12398 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12399 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12400 cost += 10;
12401
12402 return cost;
12403 }
12404 \f
12405 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12406 this is used for to form addresses to local data when -fPIC is in
12407 use. */
12408
12409 static bool
12410 darwin_local_data_pic (rtx disp)
12411 {
12412 return (GET_CODE (disp) == UNSPEC
12413 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12414 }
12415
12416 /* Determine if a given RTX is a valid constant. We already know this
12417 satisfies CONSTANT_P. */
12418
12419 static bool
12420 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12421 {
12422 switch (GET_CODE (x))
12423 {
12424 case CONST:
12425 x = XEXP (x, 0);
12426
12427 if (GET_CODE (x) == PLUS)
12428 {
12429 if (!CONST_INT_P (XEXP (x, 1)))
12430 return false;
12431 x = XEXP (x, 0);
12432 }
12433
12434 if (TARGET_MACHO && darwin_local_data_pic (x))
12435 return true;
12436
12437 /* Only some unspecs are valid as "constants". */
12438 if (GET_CODE (x) == UNSPEC)
12439 switch (XINT (x, 1))
12440 {
12441 case UNSPEC_GOT:
12442 case UNSPEC_GOTOFF:
12443 case UNSPEC_PLTOFF:
12444 return TARGET_64BIT;
12445 case UNSPEC_TPOFF:
12446 case UNSPEC_NTPOFF:
12447 x = XVECEXP (x, 0, 0);
12448 return (GET_CODE (x) == SYMBOL_REF
12449 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12450 case UNSPEC_DTPOFF:
12451 x = XVECEXP (x, 0, 0);
12452 return (GET_CODE (x) == SYMBOL_REF
12453 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12454 default:
12455 return false;
12456 }
12457
12458 /* We must have drilled down to a symbol. */
12459 if (GET_CODE (x) == LABEL_REF)
12460 return true;
12461 if (GET_CODE (x) != SYMBOL_REF)
12462 return false;
12463 /* FALLTHRU */
12464
12465 case SYMBOL_REF:
12466 /* TLS symbols are never valid. */
12467 if (SYMBOL_REF_TLS_MODEL (x))
12468 return false;
12469
12470 /* DLLIMPORT symbols are never valid. */
12471 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12472 && SYMBOL_REF_DLLIMPORT_P (x))
12473 return false;
12474
12475 #if TARGET_MACHO
12476 /* mdynamic-no-pic */
12477 if (MACHO_DYNAMIC_NO_PIC_P)
12478 return machopic_symbol_defined_p (x);
12479 #endif
12480 break;
12481
12482 case CONST_DOUBLE:
12483 if (GET_MODE (x) == TImode
12484 && x != CONST0_RTX (TImode)
12485 && !TARGET_64BIT)
12486 return false;
12487 break;
12488
12489 case CONST_VECTOR:
12490 if (!standard_sse_constant_p (x))
12491 return false;
12492
12493 default:
12494 break;
12495 }
12496
12497 /* Otherwise we handle everything else in the move patterns. */
12498 return true;
12499 }
12500
12501 /* Determine if it's legal to put X into the constant pool. This
12502 is not possible for the address of thread-local symbols, which
12503 is checked above. */
12504
12505 static bool
12506 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12507 {
12508 /* We can always put integral constants and vectors in memory. */
12509 switch (GET_CODE (x))
12510 {
12511 case CONST_INT:
12512 case CONST_DOUBLE:
12513 case CONST_VECTOR:
12514 return false;
12515
12516 default:
12517 break;
12518 }
12519 return !ix86_legitimate_constant_p (mode, x);
12520 }
12521
12522 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12523 otherwise zero. */
12524
12525 static bool
12526 is_imported_p (rtx x)
12527 {
12528 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12529 || GET_CODE (x) != SYMBOL_REF)
12530 return false;
12531
12532 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12533 }
12534
12535
12536 /* Nonzero if the constant value X is a legitimate general operand
12537 when generating PIC code. It is given that flag_pic is on and
12538 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12539
12540 bool
12541 legitimate_pic_operand_p (rtx x)
12542 {
12543 rtx inner;
12544
12545 switch (GET_CODE (x))
12546 {
12547 case CONST:
12548 inner = XEXP (x, 0);
12549 if (GET_CODE (inner) == PLUS
12550 && CONST_INT_P (XEXP (inner, 1)))
12551 inner = XEXP (inner, 0);
12552
12553 /* Only some unspecs are valid as "constants". */
12554 if (GET_CODE (inner) == UNSPEC)
12555 switch (XINT (inner, 1))
12556 {
12557 case UNSPEC_GOT:
12558 case UNSPEC_GOTOFF:
12559 case UNSPEC_PLTOFF:
12560 return TARGET_64BIT;
12561 case UNSPEC_TPOFF:
12562 x = XVECEXP (inner, 0, 0);
12563 return (GET_CODE (x) == SYMBOL_REF
12564 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12565 case UNSPEC_MACHOPIC_OFFSET:
12566 return legitimate_pic_address_disp_p (x);
12567 default:
12568 return false;
12569 }
12570 /* FALLTHRU */
12571
12572 case SYMBOL_REF:
12573 case LABEL_REF:
12574 return legitimate_pic_address_disp_p (x);
12575
12576 default:
12577 return true;
12578 }
12579 }
12580
12581 /* Determine if a given CONST RTX is a valid memory displacement
12582 in PIC mode. */
12583
12584 bool
12585 legitimate_pic_address_disp_p (rtx disp)
12586 {
12587 bool saw_plus;
12588
12589 /* In 64bit mode we can allow direct addresses of symbols and labels
12590 when they are not dynamic symbols. */
12591 if (TARGET_64BIT)
12592 {
12593 rtx op0 = disp, op1;
12594
12595 switch (GET_CODE (disp))
12596 {
12597 case LABEL_REF:
12598 return true;
12599
12600 case CONST:
12601 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12602 break;
12603 op0 = XEXP (XEXP (disp, 0), 0);
12604 op1 = XEXP (XEXP (disp, 0), 1);
12605 if (!CONST_INT_P (op1)
12606 || INTVAL (op1) >= 16*1024*1024
12607 || INTVAL (op1) < -16*1024*1024)
12608 break;
12609 if (GET_CODE (op0) == LABEL_REF)
12610 return true;
12611 if (GET_CODE (op0) == CONST
12612 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12613 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12614 return true;
12615 if (GET_CODE (op0) == UNSPEC
12616 && XINT (op0, 1) == UNSPEC_PCREL)
12617 return true;
12618 if (GET_CODE (op0) != SYMBOL_REF)
12619 break;
12620 /* FALLTHRU */
12621
12622 case SYMBOL_REF:
12623 /* TLS references should always be enclosed in UNSPEC.
12624 The dllimported symbol needs always to be resolved. */
12625 if (SYMBOL_REF_TLS_MODEL (op0)
12626 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12627 return false;
12628
12629 if (TARGET_PECOFF)
12630 {
12631 if (is_imported_p (op0))
12632 return true;
12633
12634 if (SYMBOL_REF_FAR_ADDR_P (op0)
12635 || !SYMBOL_REF_LOCAL_P (op0))
12636 break;
12637
12638 /* Function-symbols need to be resolved only for
12639 large-model.
12640 For the small-model we don't need to resolve anything
12641 here. */
12642 if ((ix86_cmodel != CM_LARGE_PIC
12643 && SYMBOL_REF_FUNCTION_P (op0))
12644 || ix86_cmodel == CM_SMALL_PIC)
12645 return true;
12646 /* Non-external symbols don't need to be resolved for
12647 large, and medium-model. */
12648 if ((ix86_cmodel == CM_LARGE_PIC
12649 || ix86_cmodel == CM_MEDIUM_PIC)
12650 && !SYMBOL_REF_EXTERNAL_P (op0))
12651 return true;
12652 }
12653 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12654 && SYMBOL_REF_LOCAL_P (op0)
12655 && ix86_cmodel != CM_LARGE_PIC)
12656 return true;
12657 break;
12658
12659 default:
12660 break;
12661 }
12662 }
12663 if (GET_CODE (disp) != CONST)
12664 return false;
12665 disp = XEXP (disp, 0);
12666
12667 if (TARGET_64BIT)
12668 {
12669 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12670 of GOT tables. We should not need these anyway. */
12671 if (GET_CODE (disp) != UNSPEC
12672 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12673 && XINT (disp, 1) != UNSPEC_GOTOFF
12674 && XINT (disp, 1) != UNSPEC_PCREL
12675 && XINT (disp, 1) != UNSPEC_PLTOFF))
12676 return false;
12677
12678 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12679 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12680 return false;
12681 return true;
12682 }
12683
12684 saw_plus = false;
12685 if (GET_CODE (disp) == PLUS)
12686 {
12687 if (!CONST_INT_P (XEXP (disp, 1)))
12688 return false;
12689 disp = XEXP (disp, 0);
12690 saw_plus = true;
12691 }
12692
12693 if (TARGET_MACHO && darwin_local_data_pic (disp))
12694 return true;
12695
12696 if (GET_CODE (disp) != UNSPEC)
12697 return false;
12698
12699 switch (XINT (disp, 1))
12700 {
12701 case UNSPEC_GOT:
12702 if (saw_plus)
12703 return false;
12704 /* We need to check for both symbols and labels because VxWorks loads
12705 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12706 details. */
12707 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12708 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12709 case UNSPEC_GOTOFF:
12710 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12711 While ABI specify also 32bit relocation but we don't produce it in
12712 small PIC model at all. */
12713 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12714 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12715 && !TARGET_64BIT)
12716 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12717 return false;
12718 case UNSPEC_GOTTPOFF:
12719 case UNSPEC_GOTNTPOFF:
12720 case UNSPEC_INDNTPOFF:
12721 if (saw_plus)
12722 return false;
12723 disp = XVECEXP (disp, 0, 0);
12724 return (GET_CODE (disp) == SYMBOL_REF
12725 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12726 case UNSPEC_NTPOFF:
12727 disp = XVECEXP (disp, 0, 0);
12728 return (GET_CODE (disp) == SYMBOL_REF
12729 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12730 case UNSPEC_DTPOFF:
12731 disp = XVECEXP (disp, 0, 0);
12732 return (GET_CODE (disp) == SYMBOL_REF
12733 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12734 }
12735
12736 return false;
12737 }
12738
12739 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12740 replace the input X, or the original X if no replacement is called for.
12741 The output parameter *WIN is 1 if the calling macro should goto WIN,
12742 0 if it should not. */
12743
12744 bool
12745 ix86_legitimize_reload_address (rtx x,
12746 enum machine_mode mode ATTRIBUTE_UNUSED,
12747 int opnum, int type,
12748 int ind_levels ATTRIBUTE_UNUSED)
12749 {
12750 /* Reload can generate:
12751
12752 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12753 (reg:DI 97))
12754 (reg:DI 2 cx))
12755
12756 This RTX is rejected from ix86_legitimate_address_p due to
12757 non-strictness of base register 97. Following this rejection,
12758 reload pushes all three components into separate registers,
12759 creating invalid memory address RTX.
12760
12761 Following code reloads only the invalid part of the
12762 memory address RTX. */
12763
12764 if (GET_CODE (x) == PLUS
12765 && REG_P (XEXP (x, 1))
12766 && GET_CODE (XEXP (x, 0)) == PLUS
12767 && REG_P (XEXP (XEXP (x, 0), 1)))
12768 {
12769 rtx base, index;
12770 bool something_reloaded = false;
12771
12772 base = XEXP (XEXP (x, 0), 1);
12773 if (!REG_OK_FOR_BASE_STRICT_P (base))
12774 {
12775 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12776 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12777 opnum, (enum reload_type) type);
12778 something_reloaded = true;
12779 }
12780
12781 index = XEXP (x, 1);
12782 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12783 {
12784 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12785 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12786 opnum, (enum reload_type) type);
12787 something_reloaded = true;
12788 }
12789
12790 gcc_assert (something_reloaded);
12791 return true;
12792 }
12793
12794 return false;
12795 }
12796
12797 /* Determine if op is suitable RTX for an address register.
12798 Return naked register if a register or a register subreg is
12799 found, otherwise return NULL_RTX. */
12800
12801 static rtx
12802 ix86_validate_address_register (rtx op)
12803 {
12804 enum machine_mode mode = GET_MODE (op);
12805
12806 /* Only SImode or DImode registers can form the address. */
12807 if (mode != SImode && mode != DImode)
12808 return NULL_RTX;
12809
12810 if (REG_P (op))
12811 return op;
12812 else if (GET_CODE (op) == SUBREG)
12813 {
12814 rtx reg = SUBREG_REG (op);
12815
12816 if (!REG_P (reg))
12817 return NULL_RTX;
12818
12819 mode = GET_MODE (reg);
12820
12821 /* Don't allow SUBREGs that span more than a word. It can
12822 lead to spill failures when the register is one word out
12823 of a two word structure. */
12824 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12825 return NULL_RTX;
12826
12827 /* Allow only SUBREGs of non-eliminable hard registers. */
12828 if (register_no_elim_operand (reg, mode))
12829 return reg;
12830 }
12831
12832 /* Op is not a register. */
12833 return NULL_RTX;
12834 }
12835
12836 /* Recognizes RTL expressions that are valid memory addresses for an
12837 instruction. The MODE argument is the machine mode for the MEM
12838 expression that wants to use this address.
12839
12840 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12841 convert common non-canonical forms to canonical form so that they will
12842 be recognized. */
12843
12844 static bool
12845 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12846 rtx addr, bool strict)
12847 {
12848 struct ix86_address parts;
12849 rtx base, index, disp;
12850 HOST_WIDE_INT scale;
12851 enum ix86_address_seg seg;
12852
12853 if (ix86_decompose_address (addr, &parts) <= 0)
12854 /* Decomposition failed. */
12855 return false;
12856
12857 base = parts.base;
12858 index = parts.index;
12859 disp = parts.disp;
12860 scale = parts.scale;
12861 seg = parts.seg;
12862
12863 /* Validate base register. */
12864 if (base)
12865 {
12866 rtx reg = ix86_validate_address_register (base);
12867
12868 if (reg == NULL_RTX)
12869 return false;
12870
12871 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12872 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12873 /* Base is not valid. */
12874 return false;
12875 }
12876
12877 /* Validate index register. */
12878 if (index)
12879 {
12880 rtx reg = ix86_validate_address_register (index);
12881
12882 if (reg == NULL_RTX)
12883 return false;
12884
12885 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12886 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12887 /* Index is not valid. */
12888 return false;
12889 }
12890
12891 /* Index and base should have the same mode. */
12892 if (base && index
12893 && GET_MODE (base) != GET_MODE (index))
12894 return false;
12895
12896 /* Address override works only on the (%reg) part of %fs:(%reg). */
12897 if (seg != SEG_DEFAULT
12898 && ((base && GET_MODE (base) != word_mode)
12899 || (index && GET_MODE (index) != word_mode)))
12900 return false;
12901
12902 /* Validate scale factor. */
12903 if (scale != 1)
12904 {
12905 if (!index)
12906 /* Scale without index. */
12907 return false;
12908
12909 if (scale != 2 && scale != 4 && scale != 8)
12910 /* Scale is not a valid multiplier. */
12911 return false;
12912 }
12913
12914 /* Validate displacement. */
12915 if (disp)
12916 {
12917 if (GET_CODE (disp) == CONST
12918 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12919 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12920 switch (XINT (XEXP (disp, 0), 1))
12921 {
12922 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12923 used. While ABI specify also 32bit relocations, we don't produce
12924 them at all and use IP relative instead. */
12925 case UNSPEC_GOT:
12926 case UNSPEC_GOTOFF:
12927 gcc_assert (flag_pic);
12928 if (!TARGET_64BIT)
12929 goto is_legitimate_pic;
12930
12931 /* 64bit address unspec. */
12932 return false;
12933
12934 case UNSPEC_GOTPCREL:
12935 case UNSPEC_PCREL:
12936 gcc_assert (flag_pic);
12937 goto is_legitimate_pic;
12938
12939 case UNSPEC_GOTTPOFF:
12940 case UNSPEC_GOTNTPOFF:
12941 case UNSPEC_INDNTPOFF:
12942 case UNSPEC_NTPOFF:
12943 case UNSPEC_DTPOFF:
12944 break;
12945
12946 case UNSPEC_STACK_CHECK:
12947 gcc_assert (flag_split_stack);
12948 break;
12949
12950 default:
12951 /* Invalid address unspec. */
12952 return false;
12953 }
12954
12955 else if (SYMBOLIC_CONST (disp)
12956 && (flag_pic
12957 || (TARGET_MACHO
12958 #if TARGET_MACHO
12959 && MACHOPIC_INDIRECT
12960 && !machopic_operand_p (disp)
12961 #endif
12962 )))
12963 {
12964
12965 is_legitimate_pic:
12966 if (TARGET_64BIT && (index || base))
12967 {
12968 /* foo@dtpoff(%rX) is ok. */
12969 if (GET_CODE (disp) != CONST
12970 || GET_CODE (XEXP (disp, 0)) != PLUS
12971 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12972 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12973 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12974 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12975 /* Non-constant pic memory reference. */
12976 return false;
12977 }
12978 else if ((!TARGET_MACHO || flag_pic)
12979 && ! legitimate_pic_address_disp_p (disp))
12980 /* Displacement is an invalid pic construct. */
12981 return false;
12982 #if TARGET_MACHO
12983 else if (MACHO_DYNAMIC_NO_PIC_P
12984 && !ix86_legitimate_constant_p (Pmode, disp))
12985 /* displacment must be referenced via non_lazy_pointer */
12986 return false;
12987 #endif
12988
12989 /* This code used to verify that a symbolic pic displacement
12990 includes the pic_offset_table_rtx register.
12991
12992 While this is good idea, unfortunately these constructs may
12993 be created by "adds using lea" optimization for incorrect
12994 code like:
12995
12996 int a;
12997 int foo(int i)
12998 {
12999 return *(&a+i);
13000 }
13001
13002 This code is nonsensical, but results in addressing
13003 GOT table with pic_offset_table_rtx base. We can't
13004 just refuse it easily, since it gets matched by
13005 "addsi3" pattern, that later gets split to lea in the
13006 case output register differs from input. While this
13007 can be handled by separate addsi pattern for this case
13008 that never results in lea, this seems to be easier and
13009 correct fix for crash to disable this test. */
13010 }
13011 else if (GET_CODE (disp) != LABEL_REF
13012 && !CONST_INT_P (disp)
13013 && (GET_CODE (disp) != CONST
13014 || !ix86_legitimate_constant_p (Pmode, disp))
13015 && (GET_CODE (disp) != SYMBOL_REF
13016 || !ix86_legitimate_constant_p (Pmode, disp)))
13017 /* Displacement is not constant. */
13018 return false;
13019 else if (TARGET_64BIT
13020 && !x86_64_immediate_operand (disp, VOIDmode))
13021 /* Displacement is out of range. */
13022 return false;
13023 /* In x32 mode, constant addresses are sign extended to 64bit, so
13024 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13025 else if (TARGET_X32 && !(index || base)
13026 && CONST_INT_P (disp)
13027 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13028 return false;
13029 }
13030
13031 /* Everything looks valid. */
13032 return true;
13033 }
13034
13035 /* Determine if a given RTX is a valid constant address. */
13036
13037 bool
13038 constant_address_p (rtx x)
13039 {
13040 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13041 }
13042 \f
13043 /* Return a unique alias set for the GOT. */
13044
13045 static alias_set_type
13046 ix86_GOT_alias_set (void)
13047 {
13048 static alias_set_type set = -1;
13049 if (set == -1)
13050 set = new_alias_set ();
13051 return set;
13052 }
13053
13054 /* Return a legitimate reference for ORIG (an address) using the
13055 register REG. If REG is 0, a new pseudo is generated.
13056
13057 There are two types of references that must be handled:
13058
13059 1. Global data references must load the address from the GOT, via
13060 the PIC reg. An insn is emitted to do this load, and the reg is
13061 returned.
13062
13063 2. Static data references, constant pool addresses, and code labels
13064 compute the address as an offset from the GOT, whose base is in
13065 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13066 differentiate them from global data objects. The returned
13067 address is the PIC reg + an unspec constant.
13068
13069 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13070 reg also appears in the address. */
13071
13072 static rtx
13073 legitimize_pic_address (rtx orig, rtx reg)
13074 {
13075 rtx addr = orig;
13076 rtx new_rtx = orig;
13077
13078 #if TARGET_MACHO
13079 if (TARGET_MACHO && !TARGET_64BIT)
13080 {
13081 if (reg == 0)
13082 reg = gen_reg_rtx (Pmode);
13083 /* Use the generic Mach-O PIC machinery. */
13084 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13085 }
13086 #endif
13087
13088 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13089 {
13090 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13091 if (tmp)
13092 return tmp;
13093 }
13094
13095 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13096 new_rtx = addr;
13097 else if (TARGET_64BIT && !TARGET_PECOFF
13098 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13099 {
13100 rtx tmpreg;
13101 /* This symbol may be referenced via a displacement from the PIC
13102 base address (@GOTOFF). */
13103
13104 if (reload_in_progress)
13105 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13106 if (GET_CODE (addr) == CONST)
13107 addr = XEXP (addr, 0);
13108 if (GET_CODE (addr) == PLUS)
13109 {
13110 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13111 UNSPEC_GOTOFF);
13112 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13113 }
13114 else
13115 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13116 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13117 if (!reg)
13118 tmpreg = gen_reg_rtx (Pmode);
13119 else
13120 tmpreg = reg;
13121 emit_move_insn (tmpreg, new_rtx);
13122
13123 if (reg != 0)
13124 {
13125 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13126 tmpreg, 1, OPTAB_DIRECT);
13127 new_rtx = reg;
13128 }
13129 else
13130 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13131 }
13132 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13133 {
13134 /* This symbol may be referenced via a displacement from the PIC
13135 base address (@GOTOFF). */
13136
13137 if (reload_in_progress)
13138 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13139 if (GET_CODE (addr) == CONST)
13140 addr = XEXP (addr, 0);
13141 if (GET_CODE (addr) == PLUS)
13142 {
13143 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13144 UNSPEC_GOTOFF);
13145 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13146 }
13147 else
13148 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13149 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13150 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13151
13152 if (reg != 0)
13153 {
13154 emit_move_insn (reg, new_rtx);
13155 new_rtx = reg;
13156 }
13157 }
13158 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13159 /* We can't use @GOTOFF for text labels on VxWorks;
13160 see gotoff_operand. */
13161 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13162 {
13163 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13164 if (tmp)
13165 return tmp;
13166
13167 /* For x64 PE-COFF there is no GOT table. So we use address
13168 directly. */
13169 if (TARGET_64BIT && TARGET_PECOFF)
13170 {
13171 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13172 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13173
13174 if (reg == 0)
13175 reg = gen_reg_rtx (Pmode);
13176 emit_move_insn (reg, new_rtx);
13177 new_rtx = reg;
13178 }
13179 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13180 {
13181 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13182 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13183 new_rtx = gen_const_mem (Pmode, new_rtx);
13184 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13185
13186 if (reg == 0)
13187 reg = gen_reg_rtx (Pmode);
13188 /* Use directly gen_movsi, otherwise the address is loaded
13189 into register for CSE. We don't want to CSE this addresses,
13190 instead we CSE addresses from the GOT table, so skip this. */
13191 emit_insn (gen_movsi (reg, new_rtx));
13192 new_rtx = reg;
13193 }
13194 else
13195 {
13196 /* This symbol must be referenced via a load from the
13197 Global Offset Table (@GOT). */
13198
13199 if (reload_in_progress)
13200 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13201 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13202 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13203 if (TARGET_64BIT)
13204 new_rtx = force_reg (Pmode, new_rtx);
13205 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13206 new_rtx = gen_const_mem (Pmode, new_rtx);
13207 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13208
13209 if (reg == 0)
13210 reg = gen_reg_rtx (Pmode);
13211 emit_move_insn (reg, new_rtx);
13212 new_rtx = reg;
13213 }
13214 }
13215 else
13216 {
13217 if (CONST_INT_P (addr)
13218 && !x86_64_immediate_operand (addr, VOIDmode))
13219 {
13220 if (reg)
13221 {
13222 emit_move_insn (reg, addr);
13223 new_rtx = reg;
13224 }
13225 else
13226 new_rtx = force_reg (Pmode, addr);
13227 }
13228 else if (GET_CODE (addr) == CONST)
13229 {
13230 addr = XEXP (addr, 0);
13231
13232 /* We must match stuff we generate before. Assume the only
13233 unspecs that can get here are ours. Not that we could do
13234 anything with them anyway.... */
13235 if (GET_CODE (addr) == UNSPEC
13236 || (GET_CODE (addr) == PLUS
13237 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13238 return orig;
13239 gcc_assert (GET_CODE (addr) == PLUS);
13240 }
13241 if (GET_CODE (addr) == PLUS)
13242 {
13243 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13244
13245 /* Check first to see if this is a constant offset from a @GOTOFF
13246 symbol reference. */
13247 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13248 && CONST_INT_P (op1))
13249 {
13250 if (!TARGET_64BIT)
13251 {
13252 if (reload_in_progress)
13253 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13254 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13255 UNSPEC_GOTOFF);
13256 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13257 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13258 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13259
13260 if (reg != 0)
13261 {
13262 emit_move_insn (reg, new_rtx);
13263 new_rtx = reg;
13264 }
13265 }
13266 else
13267 {
13268 if (INTVAL (op1) < -16*1024*1024
13269 || INTVAL (op1) >= 16*1024*1024)
13270 {
13271 if (!x86_64_immediate_operand (op1, Pmode))
13272 op1 = force_reg (Pmode, op1);
13273 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13274 }
13275 }
13276 }
13277 else
13278 {
13279 rtx base = legitimize_pic_address (op0, reg);
13280 enum machine_mode mode = GET_MODE (base);
13281 new_rtx
13282 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13283
13284 if (CONST_INT_P (new_rtx))
13285 {
13286 if (INTVAL (new_rtx) < -16*1024*1024
13287 || INTVAL (new_rtx) >= 16*1024*1024)
13288 {
13289 if (!x86_64_immediate_operand (new_rtx, mode))
13290 new_rtx = force_reg (mode, new_rtx);
13291 new_rtx
13292 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13293 }
13294 else
13295 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13296 }
13297 else
13298 {
13299 if (GET_CODE (new_rtx) == PLUS
13300 && CONSTANT_P (XEXP (new_rtx, 1)))
13301 {
13302 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13303 new_rtx = XEXP (new_rtx, 1);
13304 }
13305 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13306 }
13307 }
13308 }
13309 }
13310 return new_rtx;
13311 }
13312 \f
13313 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13314
13315 static rtx
13316 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13317 {
13318 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13319
13320 if (GET_MODE (tp) != tp_mode)
13321 {
13322 gcc_assert (GET_MODE (tp) == SImode);
13323 gcc_assert (tp_mode == DImode);
13324
13325 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13326 }
13327
13328 if (to_reg)
13329 tp = copy_to_mode_reg (tp_mode, tp);
13330
13331 return tp;
13332 }
13333
13334 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13335
13336 static GTY(()) rtx ix86_tls_symbol;
13337
13338 static rtx
13339 ix86_tls_get_addr (void)
13340 {
13341 if (!ix86_tls_symbol)
13342 {
13343 const char *sym
13344 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13345 ? "___tls_get_addr" : "__tls_get_addr");
13346
13347 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13348 }
13349
13350 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13351 {
13352 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13353 UNSPEC_PLTOFF);
13354 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13355 gen_rtx_CONST (Pmode, unspec));
13356 }
13357
13358 return ix86_tls_symbol;
13359 }
13360
13361 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13362
13363 static GTY(()) rtx ix86_tls_module_base_symbol;
13364
13365 rtx
13366 ix86_tls_module_base (void)
13367 {
13368 if (!ix86_tls_module_base_symbol)
13369 {
13370 ix86_tls_module_base_symbol
13371 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13372
13373 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13374 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13375 }
13376
13377 return ix86_tls_module_base_symbol;
13378 }
13379
13380 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13381 false if we expect this to be used for a memory address and true if
13382 we expect to load the address into a register. */
13383
13384 static rtx
13385 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13386 {
13387 rtx dest, base, off;
13388 rtx pic = NULL_RTX, tp = NULL_RTX;
13389 enum machine_mode tp_mode = Pmode;
13390 int type;
13391
13392 /* Fall back to global dynamic model if tool chain cannot support local
13393 dynamic. */
13394 if (TARGET_SUN_TLS && !TARGET_64BIT
13395 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13396 && model == TLS_MODEL_LOCAL_DYNAMIC)
13397 model = TLS_MODEL_GLOBAL_DYNAMIC;
13398
13399 switch (model)
13400 {
13401 case TLS_MODEL_GLOBAL_DYNAMIC:
13402 dest = gen_reg_rtx (Pmode);
13403
13404 if (!TARGET_64BIT)
13405 {
13406 if (flag_pic && !TARGET_PECOFF)
13407 pic = pic_offset_table_rtx;
13408 else
13409 {
13410 pic = gen_reg_rtx (Pmode);
13411 emit_insn (gen_set_got (pic));
13412 }
13413 }
13414
13415 if (TARGET_GNU2_TLS)
13416 {
13417 if (TARGET_64BIT)
13418 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13419 else
13420 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13421
13422 tp = get_thread_pointer (Pmode, true);
13423 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13424
13425 if (GET_MODE (x) != Pmode)
13426 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13427
13428 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13429 }
13430 else
13431 {
13432 rtx caddr = ix86_tls_get_addr ();
13433
13434 if (TARGET_64BIT)
13435 {
13436 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13437 rtx insns;
13438
13439 start_sequence ();
13440 emit_call_insn
13441 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13442 insns = get_insns ();
13443 end_sequence ();
13444
13445 if (GET_MODE (x) != Pmode)
13446 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13447
13448 RTL_CONST_CALL_P (insns) = 1;
13449 emit_libcall_block (insns, dest, rax, x);
13450 }
13451 else
13452 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13453 }
13454 break;
13455
13456 case TLS_MODEL_LOCAL_DYNAMIC:
13457 base = gen_reg_rtx (Pmode);
13458
13459 if (!TARGET_64BIT)
13460 {
13461 if (flag_pic)
13462 pic = pic_offset_table_rtx;
13463 else
13464 {
13465 pic = gen_reg_rtx (Pmode);
13466 emit_insn (gen_set_got (pic));
13467 }
13468 }
13469
13470 if (TARGET_GNU2_TLS)
13471 {
13472 rtx tmp = ix86_tls_module_base ();
13473
13474 if (TARGET_64BIT)
13475 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13476 else
13477 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13478
13479 tp = get_thread_pointer (Pmode, true);
13480 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13481 gen_rtx_MINUS (Pmode, tmp, tp));
13482 }
13483 else
13484 {
13485 rtx caddr = ix86_tls_get_addr ();
13486
13487 if (TARGET_64BIT)
13488 {
13489 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13490 rtx insns, eqv;
13491
13492 start_sequence ();
13493 emit_call_insn
13494 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13495 insns = get_insns ();
13496 end_sequence ();
13497
13498 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13499 share the LD_BASE result with other LD model accesses. */
13500 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13501 UNSPEC_TLS_LD_BASE);
13502
13503 RTL_CONST_CALL_P (insns) = 1;
13504 emit_libcall_block (insns, base, rax, eqv);
13505 }
13506 else
13507 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13508 }
13509
13510 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13511 off = gen_rtx_CONST (Pmode, off);
13512
13513 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13514
13515 if (TARGET_GNU2_TLS)
13516 {
13517 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13518
13519 if (GET_MODE (x) != Pmode)
13520 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13521
13522 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13523 }
13524 break;
13525
13526 case TLS_MODEL_INITIAL_EXEC:
13527 if (TARGET_64BIT)
13528 {
13529 if (TARGET_SUN_TLS && !TARGET_X32)
13530 {
13531 /* The Sun linker took the AMD64 TLS spec literally
13532 and can only handle %rax as destination of the
13533 initial executable code sequence. */
13534
13535 dest = gen_reg_rtx (DImode);
13536 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13537 return dest;
13538 }
13539
13540 /* Generate DImode references to avoid %fs:(%reg32)
13541 problems and linker IE->LE relaxation bug. */
13542 tp_mode = DImode;
13543 pic = NULL;
13544 type = UNSPEC_GOTNTPOFF;
13545 }
13546 else if (flag_pic)
13547 {
13548 if (reload_in_progress)
13549 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13550 pic = pic_offset_table_rtx;
13551 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13552 }
13553 else if (!TARGET_ANY_GNU_TLS)
13554 {
13555 pic = gen_reg_rtx (Pmode);
13556 emit_insn (gen_set_got (pic));
13557 type = UNSPEC_GOTTPOFF;
13558 }
13559 else
13560 {
13561 pic = NULL;
13562 type = UNSPEC_INDNTPOFF;
13563 }
13564
13565 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13566 off = gen_rtx_CONST (tp_mode, off);
13567 if (pic)
13568 off = gen_rtx_PLUS (tp_mode, pic, off);
13569 off = gen_const_mem (tp_mode, off);
13570 set_mem_alias_set (off, ix86_GOT_alias_set ());
13571
13572 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13573 {
13574 base = get_thread_pointer (tp_mode,
13575 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13576 off = force_reg (tp_mode, off);
13577 return gen_rtx_PLUS (tp_mode, base, off);
13578 }
13579 else
13580 {
13581 base = get_thread_pointer (Pmode, true);
13582 dest = gen_reg_rtx (Pmode);
13583 emit_insn (ix86_gen_sub3 (dest, base, off));
13584 }
13585 break;
13586
13587 case TLS_MODEL_LOCAL_EXEC:
13588 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13589 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13590 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13591 off = gen_rtx_CONST (Pmode, off);
13592
13593 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13594 {
13595 base = get_thread_pointer (Pmode,
13596 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13597 return gen_rtx_PLUS (Pmode, base, off);
13598 }
13599 else
13600 {
13601 base = get_thread_pointer (Pmode, true);
13602 dest = gen_reg_rtx (Pmode);
13603 emit_insn (ix86_gen_sub3 (dest, base, off));
13604 }
13605 break;
13606
13607 default:
13608 gcc_unreachable ();
13609 }
13610
13611 return dest;
13612 }
13613
13614 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13615 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13616 unique refptr-DECL symbol corresponding to symbol DECL. */
13617
13618 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13619 htab_t dllimport_map;
13620
13621 static tree
13622 get_dllimport_decl (tree decl, bool beimport)
13623 {
13624 struct tree_map *h, in;
13625 void **loc;
13626 const char *name;
13627 const char *prefix;
13628 size_t namelen, prefixlen;
13629 char *imp_name;
13630 tree to;
13631 rtx rtl;
13632
13633 if (!dllimport_map)
13634 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13635
13636 in.hash = htab_hash_pointer (decl);
13637 in.base.from = decl;
13638 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13639 h = (struct tree_map *) *loc;
13640 if (h)
13641 return h->to;
13642
13643 *loc = h = ggc_alloc_tree_map ();
13644 h->hash = in.hash;
13645 h->base.from = decl;
13646 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13647 VAR_DECL, NULL, ptr_type_node);
13648 DECL_ARTIFICIAL (to) = 1;
13649 DECL_IGNORED_P (to) = 1;
13650 DECL_EXTERNAL (to) = 1;
13651 TREE_READONLY (to) = 1;
13652
13653 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13654 name = targetm.strip_name_encoding (name);
13655 if (beimport)
13656 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13657 ? "*__imp_" : "*__imp__";
13658 else
13659 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13660 namelen = strlen (name);
13661 prefixlen = strlen (prefix);
13662 imp_name = (char *) alloca (namelen + prefixlen + 1);
13663 memcpy (imp_name, prefix, prefixlen);
13664 memcpy (imp_name + prefixlen, name, namelen + 1);
13665
13666 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13667 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13668 SET_SYMBOL_REF_DECL (rtl, to);
13669 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13670 if (!beimport)
13671 {
13672 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13673 #ifdef SUB_TARGET_RECORD_STUB
13674 SUB_TARGET_RECORD_STUB (name);
13675 #endif
13676 }
13677
13678 rtl = gen_const_mem (Pmode, rtl);
13679 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13680
13681 SET_DECL_RTL (to, rtl);
13682 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13683
13684 return to;
13685 }
13686
13687 /* Expand SYMBOL into its corresponding far-addresse symbol.
13688 WANT_REG is true if we require the result be a register. */
13689
13690 static rtx
13691 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13692 {
13693 tree imp_decl;
13694 rtx x;
13695
13696 gcc_assert (SYMBOL_REF_DECL (symbol));
13697 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13698
13699 x = DECL_RTL (imp_decl);
13700 if (want_reg)
13701 x = force_reg (Pmode, x);
13702 return x;
13703 }
13704
13705 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13706 true if we require the result be a register. */
13707
13708 static rtx
13709 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13710 {
13711 tree imp_decl;
13712 rtx x;
13713
13714 gcc_assert (SYMBOL_REF_DECL (symbol));
13715 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13716
13717 x = DECL_RTL (imp_decl);
13718 if (want_reg)
13719 x = force_reg (Pmode, x);
13720 return x;
13721 }
13722
13723 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13724 is true if we require the result be a register. */
13725
13726 static rtx
13727 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13728 {
13729 if (!TARGET_PECOFF)
13730 return NULL_RTX;
13731
13732 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13733 {
13734 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13735 return legitimize_dllimport_symbol (addr, inreg);
13736 if (GET_CODE (addr) == CONST
13737 && GET_CODE (XEXP (addr, 0)) == PLUS
13738 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13739 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13740 {
13741 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13742 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13743 }
13744 }
13745
13746 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13747 return NULL_RTX;
13748 if (GET_CODE (addr) == SYMBOL_REF
13749 && !is_imported_p (addr)
13750 && SYMBOL_REF_EXTERNAL_P (addr)
13751 && SYMBOL_REF_DECL (addr))
13752 return legitimize_pe_coff_extern_decl (addr, inreg);
13753
13754 if (GET_CODE (addr) == CONST
13755 && GET_CODE (XEXP (addr, 0)) == PLUS
13756 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13757 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13758 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13759 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13760 {
13761 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13762 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13763 }
13764 return NULL_RTX;
13765 }
13766
13767 /* Try machine-dependent ways of modifying an illegitimate address
13768 to be legitimate. If we find one, return the new, valid address.
13769 This macro is used in only one place: `memory_address' in explow.c.
13770
13771 OLDX is the address as it was before break_out_memory_refs was called.
13772 In some cases it is useful to look at this to decide what needs to be done.
13773
13774 It is always safe for this macro to do nothing. It exists to recognize
13775 opportunities to optimize the output.
13776
13777 For the 80386, we handle X+REG by loading X into a register R and
13778 using R+REG. R will go in a general reg and indexing will be used.
13779 However, if REG is a broken-out memory address or multiplication,
13780 nothing needs to be done because REG can certainly go in a general reg.
13781
13782 When -fpic is used, special handling is needed for symbolic references.
13783 See comments by legitimize_pic_address in i386.c for details. */
13784
13785 static rtx
13786 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13787 enum machine_mode mode)
13788 {
13789 int changed = 0;
13790 unsigned log;
13791
13792 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13793 if (log)
13794 return legitimize_tls_address (x, (enum tls_model) log, false);
13795 if (GET_CODE (x) == CONST
13796 && GET_CODE (XEXP (x, 0)) == PLUS
13797 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13798 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13799 {
13800 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13801 (enum tls_model) log, false);
13802 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13803 }
13804
13805 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13806 {
13807 rtx tmp = legitimize_pe_coff_symbol (x, true);
13808 if (tmp)
13809 return tmp;
13810 }
13811
13812 if (flag_pic && SYMBOLIC_CONST (x))
13813 return legitimize_pic_address (x, 0);
13814
13815 #if TARGET_MACHO
13816 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13817 return machopic_indirect_data_reference (x, 0);
13818 #endif
13819
13820 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13821 if (GET_CODE (x) == ASHIFT
13822 && CONST_INT_P (XEXP (x, 1))
13823 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13824 {
13825 changed = 1;
13826 log = INTVAL (XEXP (x, 1));
13827 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13828 GEN_INT (1 << log));
13829 }
13830
13831 if (GET_CODE (x) == PLUS)
13832 {
13833 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13834
13835 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13836 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13837 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13838 {
13839 changed = 1;
13840 log = INTVAL (XEXP (XEXP (x, 0), 1));
13841 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13842 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13843 GEN_INT (1 << log));
13844 }
13845
13846 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13847 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13848 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13849 {
13850 changed = 1;
13851 log = INTVAL (XEXP (XEXP (x, 1), 1));
13852 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13853 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13854 GEN_INT (1 << log));
13855 }
13856
13857 /* Put multiply first if it isn't already. */
13858 if (GET_CODE (XEXP (x, 1)) == MULT)
13859 {
13860 rtx tmp = XEXP (x, 0);
13861 XEXP (x, 0) = XEXP (x, 1);
13862 XEXP (x, 1) = tmp;
13863 changed = 1;
13864 }
13865
13866 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13867 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13868 created by virtual register instantiation, register elimination, and
13869 similar optimizations. */
13870 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13871 {
13872 changed = 1;
13873 x = gen_rtx_PLUS (Pmode,
13874 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13875 XEXP (XEXP (x, 1), 0)),
13876 XEXP (XEXP (x, 1), 1));
13877 }
13878
13879 /* Canonicalize
13880 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13881 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13882 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13883 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13884 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13885 && CONSTANT_P (XEXP (x, 1)))
13886 {
13887 rtx constant;
13888 rtx other = NULL_RTX;
13889
13890 if (CONST_INT_P (XEXP (x, 1)))
13891 {
13892 constant = XEXP (x, 1);
13893 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13894 }
13895 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13896 {
13897 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13898 other = XEXP (x, 1);
13899 }
13900 else
13901 constant = 0;
13902
13903 if (constant)
13904 {
13905 changed = 1;
13906 x = gen_rtx_PLUS (Pmode,
13907 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13908 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13909 plus_constant (Pmode, other,
13910 INTVAL (constant)));
13911 }
13912 }
13913
13914 if (changed && ix86_legitimate_address_p (mode, x, false))
13915 return x;
13916
13917 if (GET_CODE (XEXP (x, 0)) == MULT)
13918 {
13919 changed = 1;
13920 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13921 }
13922
13923 if (GET_CODE (XEXP (x, 1)) == MULT)
13924 {
13925 changed = 1;
13926 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13927 }
13928
13929 if (changed
13930 && REG_P (XEXP (x, 1))
13931 && REG_P (XEXP (x, 0)))
13932 return x;
13933
13934 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13935 {
13936 changed = 1;
13937 x = legitimize_pic_address (x, 0);
13938 }
13939
13940 if (changed && ix86_legitimate_address_p (mode, x, false))
13941 return x;
13942
13943 if (REG_P (XEXP (x, 0)))
13944 {
13945 rtx temp = gen_reg_rtx (Pmode);
13946 rtx val = force_operand (XEXP (x, 1), temp);
13947 if (val != temp)
13948 {
13949 val = convert_to_mode (Pmode, val, 1);
13950 emit_move_insn (temp, val);
13951 }
13952
13953 XEXP (x, 1) = temp;
13954 return x;
13955 }
13956
13957 else if (REG_P (XEXP (x, 1)))
13958 {
13959 rtx temp = gen_reg_rtx (Pmode);
13960 rtx val = force_operand (XEXP (x, 0), temp);
13961 if (val != temp)
13962 {
13963 val = convert_to_mode (Pmode, val, 1);
13964 emit_move_insn (temp, val);
13965 }
13966
13967 XEXP (x, 0) = temp;
13968 return x;
13969 }
13970 }
13971
13972 return x;
13973 }
13974 \f
13975 /* Print an integer constant expression in assembler syntax. Addition
13976 and subtraction are the only arithmetic that may appear in these
13977 expressions. FILE is the stdio stream to write to, X is the rtx, and
13978 CODE is the operand print code from the output string. */
13979
13980 static void
13981 output_pic_addr_const (FILE *file, rtx x, int code)
13982 {
13983 char buf[256];
13984
13985 switch (GET_CODE (x))
13986 {
13987 case PC:
13988 gcc_assert (flag_pic);
13989 putc ('.', file);
13990 break;
13991
13992 case SYMBOL_REF:
13993 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13994 output_addr_const (file, x);
13995 else
13996 {
13997 const char *name = XSTR (x, 0);
13998
13999 /* Mark the decl as referenced so that cgraph will
14000 output the function. */
14001 if (SYMBOL_REF_DECL (x))
14002 mark_decl_referenced (SYMBOL_REF_DECL (x));
14003
14004 #if TARGET_MACHO
14005 if (MACHOPIC_INDIRECT
14006 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14007 name = machopic_indirection_name (x, /*stub_p=*/true);
14008 #endif
14009 assemble_name (file, name);
14010 }
14011 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14012 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14013 fputs ("@PLT", file);
14014 break;
14015
14016 case LABEL_REF:
14017 x = XEXP (x, 0);
14018 /* FALLTHRU */
14019 case CODE_LABEL:
14020 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14021 assemble_name (asm_out_file, buf);
14022 break;
14023
14024 case CONST_INT:
14025 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14026 break;
14027
14028 case CONST:
14029 /* This used to output parentheses around the expression,
14030 but that does not work on the 386 (either ATT or BSD assembler). */
14031 output_pic_addr_const (file, XEXP (x, 0), code);
14032 break;
14033
14034 case CONST_DOUBLE:
14035 if (GET_MODE (x) == VOIDmode)
14036 {
14037 /* We can use %d if the number is <32 bits and positive. */
14038 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14039 fprintf (file, "0x%lx%08lx",
14040 (unsigned long) CONST_DOUBLE_HIGH (x),
14041 (unsigned long) CONST_DOUBLE_LOW (x));
14042 else
14043 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14044 }
14045 else
14046 /* We can't handle floating point constants;
14047 TARGET_PRINT_OPERAND must handle them. */
14048 output_operand_lossage ("floating constant misused");
14049 break;
14050
14051 case PLUS:
14052 /* Some assemblers need integer constants to appear first. */
14053 if (CONST_INT_P (XEXP (x, 0)))
14054 {
14055 output_pic_addr_const (file, XEXP (x, 0), code);
14056 putc ('+', file);
14057 output_pic_addr_const (file, XEXP (x, 1), code);
14058 }
14059 else
14060 {
14061 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14062 output_pic_addr_const (file, XEXP (x, 1), code);
14063 putc ('+', file);
14064 output_pic_addr_const (file, XEXP (x, 0), code);
14065 }
14066 break;
14067
14068 case MINUS:
14069 if (!TARGET_MACHO)
14070 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14071 output_pic_addr_const (file, XEXP (x, 0), code);
14072 putc ('-', file);
14073 output_pic_addr_const (file, XEXP (x, 1), code);
14074 if (!TARGET_MACHO)
14075 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14076 break;
14077
14078 case UNSPEC:
14079 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14080 {
14081 bool f = i386_asm_output_addr_const_extra (file, x);
14082 gcc_assert (f);
14083 break;
14084 }
14085
14086 gcc_assert (XVECLEN (x, 0) == 1);
14087 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14088 switch (XINT (x, 1))
14089 {
14090 case UNSPEC_GOT:
14091 fputs ("@GOT", file);
14092 break;
14093 case UNSPEC_GOTOFF:
14094 fputs ("@GOTOFF", file);
14095 break;
14096 case UNSPEC_PLTOFF:
14097 fputs ("@PLTOFF", file);
14098 break;
14099 case UNSPEC_PCREL:
14100 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14101 "(%rip)" : "[rip]", file);
14102 break;
14103 case UNSPEC_GOTPCREL:
14104 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14105 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14106 break;
14107 case UNSPEC_GOTTPOFF:
14108 /* FIXME: This might be @TPOFF in Sun ld too. */
14109 fputs ("@gottpoff", file);
14110 break;
14111 case UNSPEC_TPOFF:
14112 fputs ("@tpoff", file);
14113 break;
14114 case UNSPEC_NTPOFF:
14115 if (TARGET_64BIT)
14116 fputs ("@tpoff", file);
14117 else
14118 fputs ("@ntpoff", file);
14119 break;
14120 case UNSPEC_DTPOFF:
14121 fputs ("@dtpoff", file);
14122 break;
14123 case UNSPEC_GOTNTPOFF:
14124 if (TARGET_64BIT)
14125 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14126 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14127 else
14128 fputs ("@gotntpoff", file);
14129 break;
14130 case UNSPEC_INDNTPOFF:
14131 fputs ("@indntpoff", file);
14132 break;
14133 #if TARGET_MACHO
14134 case UNSPEC_MACHOPIC_OFFSET:
14135 putc ('-', file);
14136 machopic_output_function_base_name (file);
14137 break;
14138 #endif
14139 default:
14140 output_operand_lossage ("invalid UNSPEC as operand");
14141 break;
14142 }
14143 break;
14144
14145 default:
14146 output_operand_lossage ("invalid expression as operand");
14147 }
14148 }
14149
14150 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14151 We need to emit DTP-relative relocations. */
14152
14153 static void ATTRIBUTE_UNUSED
14154 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14155 {
14156 fputs (ASM_LONG, file);
14157 output_addr_const (file, x);
14158 fputs ("@dtpoff", file);
14159 switch (size)
14160 {
14161 case 4:
14162 break;
14163 case 8:
14164 fputs (", 0", file);
14165 break;
14166 default:
14167 gcc_unreachable ();
14168 }
14169 }
14170
14171 /* Return true if X is a representation of the PIC register. This copes
14172 with calls from ix86_find_base_term, where the register might have
14173 been replaced by a cselib value. */
14174
14175 static bool
14176 ix86_pic_register_p (rtx x)
14177 {
14178 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14179 return (pic_offset_table_rtx
14180 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14181 else
14182 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14183 }
14184
14185 /* Helper function for ix86_delegitimize_address.
14186 Attempt to delegitimize TLS local-exec accesses. */
14187
14188 static rtx
14189 ix86_delegitimize_tls_address (rtx orig_x)
14190 {
14191 rtx x = orig_x, unspec;
14192 struct ix86_address addr;
14193
14194 if (!TARGET_TLS_DIRECT_SEG_REFS)
14195 return orig_x;
14196 if (MEM_P (x))
14197 x = XEXP (x, 0);
14198 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14199 return orig_x;
14200 if (ix86_decompose_address (x, &addr) == 0
14201 || addr.seg != DEFAULT_TLS_SEG_REG
14202 || addr.disp == NULL_RTX
14203 || GET_CODE (addr.disp) != CONST)
14204 return orig_x;
14205 unspec = XEXP (addr.disp, 0);
14206 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14207 unspec = XEXP (unspec, 0);
14208 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14209 return orig_x;
14210 x = XVECEXP (unspec, 0, 0);
14211 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14212 if (unspec != XEXP (addr.disp, 0))
14213 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14214 if (addr.index)
14215 {
14216 rtx idx = addr.index;
14217 if (addr.scale != 1)
14218 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14219 x = gen_rtx_PLUS (Pmode, idx, x);
14220 }
14221 if (addr.base)
14222 x = gen_rtx_PLUS (Pmode, addr.base, x);
14223 if (MEM_P (orig_x))
14224 x = replace_equiv_address_nv (orig_x, x);
14225 return x;
14226 }
14227
14228 /* In the name of slightly smaller debug output, and to cater to
14229 general assembler lossage, recognize PIC+GOTOFF and turn it back
14230 into a direct symbol reference.
14231
14232 On Darwin, this is necessary to avoid a crash, because Darwin
14233 has a different PIC label for each routine but the DWARF debugging
14234 information is not associated with any particular routine, so it's
14235 necessary to remove references to the PIC label from RTL stored by
14236 the DWARF output code. */
14237
14238 static rtx
14239 ix86_delegitimize_address (rtx x)
14240 {
14241 rtx orig_x = delegitimize_mem_from_attrs (x);
14242 /* addend is NULL or some rtx if x is something+GOTOFF where
14243 something doesn't include the PIC register. */
14244 rtx addend = NULL_RTX;
14245 /* reg_addend is NULL or a multiple of some register. */
14246 rtx reg_addend = NULL_RTX;
14247 /* const_addend is NULL or a const_int. */
14248 rtx const_addend = NULL_RTX;
14249 /* This is the result, or NULL. */
14250 rtx result = NULL_RTX;
14251
14252 x = orig_x;
14253
14254 if (MEM_P (x))
14255 x = XEXP (x, 0);
14256
14257 if (TARGET_64BIT)
14258 {
14259 if (GET_CODE (x) == CONST
14260 && GET_CODE (XEXP (x, 0)) == PLUS
14261 && GET_MODE (XEXP (x, 0)) == Pmode
14262 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14263 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14264 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14265 {
14266 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14267 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14268 if (MEM_P (orig_x))
14269 x = replace_equiv_address_nv (orig_x, x);
14270 return x;
14271 }
14272
14273 if (GET_CODE (x) == CONST
14274 && GET_CODE (XEXP (x, 0)) == UNSPEC
14275 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14276 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14277 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14278 {
14279 x = XVECEXP (XEXP (x, 0), 0, 0);
14280 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14281 {
14282 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14283 GET_MODE (x), 0);
14284 if (x == NULL_RTX)
14285 return orig_x;
14286 }
14287 return x;
14288 }
14289
14290 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14291 return ix86_delegitimize_tls_address (orig_x);
14292
14293 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14294 and -mcmodel=medium -fpic. */
14295 }
14296
14297 if (GET_CODE (x) != PLUS
14298 || GET_CODE (XEXP (x, 1)) != CONST)
14299 return ix86_delegitimize_tls_address (orig_x);
14300
14301 if (ix86_pic_register_p (XEXP (x, 0)))
14302 /* %ebx + GOT/GOTOFF */
14303 ;
14304 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14305 {
14306 /* %ebx + %reg * scale + GOT/GOTOFF */
14307 reg_addend = XEXP (x, 0);
14308 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14309 reg_addend = XEXP (reg_addend, 1);
14310 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14311 reg_addend = XEXP (reg_addend, 0);
14312 else
14313 {
14314 reg_addend = NULL_RTX;
14315 addend = XEXP (x, 0);
14316 }
14317 }
14318 else
14319 addend = XEXP (x, 0);
14320
14321 x = XEXP (XEXP (x, 1), 0);
14322 if (GET_CODE (x) == PLUS
14323 && CONST_INT_P (XEXP (x, 1)))
14324 {
14325 const_addend = XEXP (x, 1);
14326 x = XEXP (x, 0);
14327 }
14328
14329 if (GET_CODE (x) == UNSPEC
14330 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14331 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14332 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14333 && !MEM_P (orig_x) && !addend)))
14334 result = XVECEXP (x, 0, 0);
14335
14336 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14337 && !MEM_P (orig_x))
14338 result = XVECEXP (x, 0, 0);
14339
14340 if (! result)
14341 return ix86_delegitimize_tls_address (orig_x);
14342
14343 if (const_addend)
14344 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14345 if (reg_addend)
14346 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14347 if (addend)
14348 {
14349 /* If the rest of original X doesn't involve the PIC register, add
14350 addend and subtract pic_offset_table_rtx. This can happen e.g.
14351 for code like:
14352 leal (%ebx, %ecx, 4), %ecx
14353 ...
14354 movl foo@GOTOFF(%ecx), %edx
14355 in which case we return (%ecx - %ebx) + foo. */
14356 if (pic_offset_table_rtx)
14357 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14358 pic_offset_table_rtx),
14359 result);
14360 else
14361 return orig_x;
14362 }
14363 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14364 {
14365 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14366 if (result == NULL_RTX)
14367 return orig_x;
14368 }
14369 return result;
14370 }
14371
14372 /* If X is a machine specific address (i.e. a symbol or label being
14373 referenced as a displacement from the GOT implemented using an
14374 UNSPEC), then return the base term. Otherwise return X. */
14375
14376 rtx
14377 ix86_find_base_term (rtx x)
14378 {
14379 rtx term;
14380
14381 if (TARGET_64BIT)
14382 {
14383 if (GET_CODE (x) != CONST)
14384 return x;
14385 term = XEXP (x, 0);
14386 if (GET_CODE (term) == PLUS
14387 && (CONST_INT_P (XEXP (term, 1))
14388 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14389 term = XEXP (term, 0);
14390 if (GET_CODE (term) != UNSPEC
14391 || (XINT (term, 1) != UNSPEC_GOTPCREL
14392 && XINT (term, 1) != UNSPEC_PCREL))
14393 return x;
14394
14395 return XVECEXP (term, 0, 0);
14396 }
14397
14398 return ix86_delegitimize_address (x);
14399 }
14400 \f
14401 static void
14402 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14403 bool fp, FILE *file)
14404 {
14405 const char *suffix;
14406
14407 if (mode == CCFPmode || mode == CCFPUmode)
14408 {
14409 code = ix86_fp_compare_code_to_integer (code);
14410 mode = CCmode;
14411 }
14412 if (reverse)
14413 code = reverse_condition (code);
14414
14415 switch (code)
14416 {
14417 case EQ:
14418 switch (mode)
14419 {
14420 case CCAmode:
14421 suffix = "a";
14422 break;
14423
14424 case CCCmode:
14425 suffix = "c";
14426 break;
14427
14428 case CCOmode:
14429 suffix = "o";
14430 break;
14431
14432 case CCSmode:
14433 suffix = "s";
14434 break;
14435
14436 default:
14437 suffix = "e";
14438 }
14439 break;
14440 case NE:
14441 switch (mode)
14442 {
14443 case CCAmode:
14444 suffix = "na";
14445 break;
14446
14447 case CCCmode:
14448 suffix = "nc";
14449 break;
14450
14451 case CCOmode:
14452 suffix = "no";
14453 break;
14454
14455 case CCSmode:
14456 suffix = "ns";
14457 break;
14458
14459 default:
14460 suffix = "ne";
14461 }
14462 break;
14463 case GT:
14464 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14465 suffix = "g";
14466 break;
14467 case GTU:
14468 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14469 Those same assemblers have the same but opposite lossage on cmov. */
14470 if (mode == CCmode)
14471 suffix = fp ? "nbe" : "a";
14472 else
14473 gcc_unreachable ();
14474 break;
14475 case LT:
14476 switch (mode)
14477 {
14478 case CCNOmode:
14479 case CCGOCmode:
14480 suffix = "s";
14481 break;
14482
14483 case CCmode:
14484 case CCGCmode:
14485 suffix = "l";
14486 break;
14487
14488 default:
14489 gcc_unreachable ();
14490 }
14491 break;
14492 case LTU:
14493 if (mode == CCmode)
14494 suffix = "b";
14495 else if (mode == CCCmode)
14496 suffix = "c";
14497 else
14498 gcc_unreachable ();
14499 break;
14500 case GE:
14501 switch (mode)
14502 {
14503 case CCNOmode:
14504 case CCGOCmode:
14505 suffix = "ns";
14506 break;
14507
14508 case CCmode:
14509 case CCGCmode:
14510 suffix = "ge";
14511 break;
14512
14513 default:
14514 gcc_unreachable ();
14515 }
14516 break;
14517 case GEU:
14518 if (mode == CCmode)
14519 suffix = fp ? "nb" : "ae";
14520 else if (mode == CCCmode)
14521 suffix = "nc";
14522 else
14523 gcc_unreachable ();
14524 break;
14525 case LE:
14526 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14527 suffix = "le";
14528 break;
14529 case LEU:
14530 if (mode == CCmode)
14531 suffix = "be";
14532 else
14533 gcc_unreachable ();
14534 break;
14535 case UNORDERED:
14536 suffix = fp ? "u" : "p";
14537 break;
14538 case ORDERED:
14539 suffix = fp ? "nu" : "np";
14540 break;
14541 default:
14542 gcc_unreachable ();
14543 }
14544 fputs (suffix, file);
14545 }
14546
14547 /* Print the name of register X to FILE based on its machine mode and number.
14548 If CODE is 'w', pretend the mode is HImode.
14549 If CODE is 'b', pretend the mode is QImode.
14550 If CODE is 'k', pretend the mode is SImode.
14551 If CODE is 'q', pretend the mode is DImode.
14552 If CODE is 'x', pretend the mode is V4SFmode.
14553 If CODE is 't', pretend the mode is V8SFmode.
14554 If CODE is 'g', pretend the mode is V16SFmode.
14555 If CODE is 'h', pretend the reg is the 'high' byte register.
14556 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14557 If CODE is 'd', duplicate the operand for AVX instruction.
14558 */
14559
14560 void
14561 print_reg (rtx x, int code, FILE *file)
14562 {
14563 const char *reg;
14564 unsigned int regno;
14565 bool duplicated = code == 'd' && TARGET_AVX;
14566
14567 if (ASSEMBLER_DIALECT == ASM_ATT)
14568 putc ('%', file);
14569
14570 if (x == pc_rtx)
14571 {
14572 gcc_assert (TARGET_64BIT);
14573 fputs ("rip", file);
14574 return;
14575 }
14576
14577 regno = true_regnum (x);
14578 gcc_assert (regno != ARG_POINTER_REGNUM
14579 && regno != FRAME_POINTER_REGNUM
14580 && regno != FLAGS_REG
14581 && regno != FPSR_REG
14582 && regno != FPCR_REG);
14583
14584 if (code == 'w' || MMX_REG_P (x))
14585 code = 2;
14586 else if (code == 'b')
14587 code = 1;
14588 else if (code == 'k')
14589 code = 4;
14590 else if (code == 'q')
14591 code = 8;
14592 else if (code == 'y')
14593 code = 3;
14594 else if (code == 'h')
14595 code = 0;
14596 else if (code == 'x')
14597 code = 16;
14598 else if (code == 't')
14599 code = 32;
14600 else if (code == 'g')
14601 code = 64;
14602 else
14603 code = GET_MODE_SIZE (GET_MODE (x));
14604
14605 /* Irritatingly, AMD extended registers use different naming convention
14606 from the normal registers: "r%d[bwd]" */
14607 if (REX_INT_REGNO_P (regno))
14608 {
14609 gcc_assert (TARGET_64BIT);
14610 putc ('r', file);
14611 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14612 switch (code)
14613 {
14614 case 0:
14615 error ("extended registers have no high halves");
14616 break;
14617 case 1:
14618 putc ('b', file);
14619 break;
14620 case 2:
14621 putc ('w', file);
14622 break;
14623 case 4:
14624 putc ('d', file);
14625 break;
14626 case 8:
14627 /* no suffix */
14628 break;
14629 default:
14630 error ("unsupported operand size for extended register");
14631 break;
14632 }
14633 return;
14634 }
14635
14636 reg = NULL;
14637 switch (code)
14638 {
14639 case 3:
14640 if (STACK_TOP_P (x))
14641 {
14642 reg = "st(0)";
14643 break;
14644 }
14645 /* FALLTHRU */
14646 case 8:
14647 case 4:
14648 case 12:
14649 if (! ANY_FP_REG_P (x))
14650 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14651 /* FALLTHRU */
14652 case 16:
14653 case 2:
14654 normal:
14655 reg = hi_reg_name[regno];
14656 break;
14657 case 1:
14658 if (regno >= ARRAY_SIZE (qi_reg_name))
14659 goto normal;
14660 reg = qi_reg_name[regno];
14661 break;
14662 case 0:
14663 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14664 goto normal;
14665 reg = qi_high_reg_name[regno];
14666 break;
14667 case 32:
14668 if (SSE_REG_P (x))
14669 {
14670 gcc_assert (!duplicated);
14671 putc ('y', file);
14672 fputs (hi_reg_name[regno] + 1, file);
14673 return;
14674 }
14675 case 64:
14676 if (SSE_REG_P (x))
14677 {
14678 gcc_assert (!duplicated);
14679 putc ('z', file);
14680 fputs (hi_reg_name[REGNO (x)] + 1, file);
14681 return;
14682 }
14683 break;
14684 default:
14685 gcc_unreachable ();
14686 }
14687
14688 fputs (reg, file);
14689 if (duplicated)
14690 {
14691 if (ASSEMBLER_DIALECT == ASM_ATT)
14692 fprintf (file, ", %%%s", reg);
14693 else
14694 fprintf (file, ", %s", reg);
14695 }
14696 }
14697
14698 /* Locate some local-dynamic symbol still in use by this function
14699 so that we can print its name in some tls_local_dynamic_base
14700 pattern. */
14701
14702 static int
14703 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14704 {
14705 rtx x = *px;
14706
14707 if (GET_CODE (x) == SYMBOL_REF
14708 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14709 {
14710 cfun->machine->some_ld_name = XSTR (x, 0);
14711 return 1;
14712 }
14713
14714 return 0;
14715 }
14716
14717 static const char *
14718 get_some_local_dynamic_name (void)
14719 {
14720 rtx insn;
14721
14722 if (cfun->machine->some_ld_name)
14723 return cfun->machine->some_ld_name;
14724
14725 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14726 if (NONDEBUG_INSN_P (insn)
14727 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14728 return cfun->machine->some_ld_name;
14729
14730 return NULL;
14731 }
14732
14733 /* Meaning of CODE:
14734 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14735 C -- print opcode suffix for set/cmov insn.
14736 c -- like C, but print reversed condition
14737 F,f -- likewise, but for floating-point.
14738 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14739 otherwise nothing
14740 R -- print embeded rounding and sae.
14741 r -- print only sae.
14742 z -- print the opcode suffix for the size of the current operand.
14743 Z -- likewise, with special suffixes for x87 instructions.
14744 * -- print a star (in certain assembler syntax)
14745 A -- print an absolute memory reference.
14746 E -- print address with DImode register names if TARGET_64BIT.
14747 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14748 s -- print a shift double count, followed by the assemblers argument
14749 delimiter.
14750 b -- print the QImode name of the register for the indicated operand.
14751 %b0 would print %al if operands[0] is reg 0.
14752 w -- likewise, print the HImode name of the register.
14753 k -- likewise, print the SImode name of the register.
14754 q -- likewise, print the DImode name of the register.
14755 x -- likewise, print the V4SFmode name of the register.
14756 t -- likewise, print the V8SFmode name of the register.
14757 g -- likewise, print the V16SFmode name of the register.
14758 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14759 y -- print "st(0)" instead of "st" as a register.
14760 d -- print duplicated register operand for AVX instruction.
14761 D -- print condition for SSE cmp instruction.
14762 P -- if PIC, print an @PLT suffix.
14763 p -- print raw symbol name.
14764 X -- don't print any sort of PIC '@' suffix for a symbol.
14765 & -- print some in-use local-dynamic symbol name.
14766 H -- print a memory address offset by 8; used for sse high-parts
14767 Y -- print condition for XOP pcom* instruction.
14768 + -- print a branch hint as 'cs' or 'ds' prefix
14769 ; -- print a semicolon (after prefixes due to bug in older gas).
14770 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14771 @ -- print a segment register of thread base pointer load
14772 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14773 */
14774
14775 void
14776 ix86_print_operand (FILE *file, rtx x, int code)
14777 {
14778 if (code)
14779 {
14780 switch (code)
14781 {
14782 case 'A':
14783 switch (ASSEMBLER_DIALECT)
14784 {
14785 case ASM_ATT:
14786 putc ('*', file);
14787 break;
14788
14789 case ASM_INTEL:
14790 /* Intel syntax. For absolute addresses, registers should not
14791 be surrounded by braces. */
14792 if (!REG_P (x))
14793 {
14794 putc ('[', file);
14795 ix86_print_operand (file, x, 0);
14796 putc (']', file);
14797 return;
14798 }
14799 break;
14800
14801 default:
14802 gcc_unreachable ();
14803 }
14804
14805 ix86_print_operand (file, x, 0);
14806 return;
14807
14808 case 'E':
14809 /* Wrap address in an UNSPEC to declare special handling. */
14810 if (TARGET_64BIT)
14811 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14812
14813 output_address (x);
14814 return;
14815
14816 case 'L':
14817 if (ASSEMBLER_DIALECT == ASM_ATT)
14818 putc ('l', file);
14819 return;
14820
14821 case 'W':
14822 if (ASSEMBLER_DIALECT == ASM_ATT)
14823 putc ('w', file);
14824 return;
14825
14826 case 'B':
14827 if (ASSEMBLER_DIALECT == ASM_ATT)
14828 putc ('b', file);
14829 return;
14830
14831 case 'Q':
14832 if (ASSEMBLER_DIALECT == ASM_ATT)
14833 putc ('l', file);
14834 return;
14835
14836 case 'S':
14837 if (ASSEMBLER_DIALECT == ASM_ATT)
14838 putc ('s', file);
14839 return;
14840
14841 case 'T':
14842 if (ASSEMBLER_DIALECT == ASM_ATT)
14843 putc ('t', file);
14844 return;
14845
14846 case 'O':
14847 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14848 if (ASSEMBLER_DIALECT != ASM_ATT)
14849 return;
14850
14851 switch (GET_MODE_SIZE (GET_MODE (x)))
14852 {
14853 case 2:
14854 putc ('w', file);
14855 break;
14856
14857 case 4:
14858 putc ('l', file);
14859 break;
14860
14861 case 8:
14862 putc ('q', file);
14863 break;
14864
14865 default:
14866 output_operand_lossage
14867 ("invalid operand size for operand code 'O'");
14868 return;
14869 }
14870
14871 putc ('.', file);
14872 #endif
14873 return;
14874
14875 case 'z':
14876 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14877 {
14878 /* Opcodes don't get size suffixes if using Intel opcodes. */
14879 if (ASSEMBLER_DIALECT == ASM_INTEL)
14880 return;
14881
14882 switch (GET_MODE_SIZE (GET_MODE (x)))
14883 {
14884 case 1:
14885 putc ('b', file);
14886 return;
14887
14888 case 2:
14889 putc ('w', file);
14890 return;
14891
14892 case 4:
14893 putc ('l', file);
14894 return;
14895
14896 case 8:
14897 putc ('q', file);
14898 return;
14899
14900 default:
14901 output_operand_lossage
14902 ("invalid operand size for operand code 'z'");
14903 return;
14904 }
14905 }
14906
14907 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14908 warning
14909 (0, "non-integer operand used with operand code 'z'");
14910 /* FALLTHRU */
14911
14912 case 'Z':
14913 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14914 if (ASSEMBLER_DIALECT == ASM_INTEL)
14915 return;
14916
14917 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14918 {
14919 switch (GET_MODE_SIZE (GET_MODE (x)))
14920 {
14921 case 2:
14922 #ifdef HAVE_AS_IX86_FILDS
14923 putc ('s', file);
14924 #endif
14925 return;
14926
14927 case 4:
14928 putc ('l', file);
14929 return;
14930
14931 case 8:
14932 #ifdef HAVE_AS_IX86_FILDQ
14933 putc ('q', file);
14934 #else
14935 fputs ("ll", file);
14936 #endif
14937 return;
14938
14939 default:
14940 break;
14941 }
14942 }
14943 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14944 {
14945 /* 387 opcodes don't get size suffixes
14946 if the operands are registers. */
14947 if (STACK_REG_P (x))
14948 return;
14949
14950 switch (GET_MODE_SIZE (GET_MODE (x)))
14951 {
14952 case 4:
14953 putc ('s', file);
14954 return;
14955
14956 case 8:
14957 putc ('l', file);
14958 return;
14959
14960 case 12:
14961 case 16:
14962 putc ('t', file);
14963 return;
14964
14965 default:
14966 break;
14967 }
14968 }
14969 else
14970 {
14971 output_operand_lossage
14972 ("invalid operand type used with operand code 'Z'");
14973 return;
14974 }
14975
14976 output_operand_lossage
14977 ("invalid operand size for operand code 'Z'");
14978 return;
14979
14980 case 'd':
14981 case 'b':
14982 case 'w':
14983 case 'k':
14984 case 'q':
14985 case 'h':
14986 case 't':
14987 case 'g':
14988 case 'y':
14989 case 'x':
14990 case 'X':
14991 case 'P':
14992 case 'p':
14993 break;
14994
14995 case 's':
14996 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14997 {
14998 ix86_print_operand (file, x, 0);
14999 fputs (", ", file);
15000 }
15001 return;
15002
15003 case 'Y':
15004 switch (GET_CODE (x))
15005 {
15006 case NE:
15007 fputs ("neq", file);
15008 break;
15009 case EQ:
15010 fputs ("eq", file);
15011 break;
15012 case GE:
15013 case GEU:
15014 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15015 break;
15016 case GT:
15017 case GTU:
15018 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15019 break;
15020 case LE:
15021 case LEU:
15022 fputs ("le", file);
15023 break;
15024 case LT:
15025 case LTU:
15026 fputs ("lt", file);
15027 break;
15028 case UNORDERED:
15029 fputs ("unord", file);
15030 break;
15031 case ORDERED:
15032 fputs ("ord", file);
15033 break;
15034 case UNEQ:
15035 fputs ("ueq", file);
15036 break;
15037 case UNGE:
15038 fputs ("nlt", file);
15039 break;
15040 case UNGT:
15041 fputs ("nle", file);
15042 break;
15043 case UNLE:
15044 fputs ("ule", file);
15045 break;
15046 case UNLT:
15047 fputs ("ult", file);
15048 break;
15049 case LTGT:
15050 fputs ("une", file);
15051 break;
15052 default:
15053 output_operand_lossage ("operand is not a condition code, "
15054 "invalid operand code 'Y'");
15055 return;
15056 }
15057 return;
15058
15059 case 'D':
15060 /* Little bit of braindamage here. The SSE compare instructions
15061 does use completely different names for the comparisons that the
15062 fp conditional moves. */
15063 switch (GET_CODE (x))
15064 {
15065 case UNEQ:
15066 if (TARGET_AVX)
15067 {
15068 fputs ("eq_us", file);
15069 break;
15070 }
15071 case EQ:
15072 fputs ("eq", file);
15073 break;
15074 case UNLT:
15075 if (TARGET_AVX)
15076 {
15077 fputs ("nge", file);
15078 break;
15079 }
15080 case LT:
15081 fputs ("lt", file);
15082 break;
15083 case UNLE:
15084 if (TARGET_AVX)
15085 {
15086 fputs ("ngt", file);
15087 break;
15088 }
15089 case LE:
15090 fputs ("le", file);
15091 break;
15092 case UNORDERED:
15093 fputs ("unord", file);
15094 break;
15095 case LTGT:
15096 if (TARGET_AVX)
15097 {
15098 fputs ("neq_oq", file);
15099 break;
15100 }
15101 case NE:
15102 fputs ("neq", file);
15103 break;
15104 case GE:
15105 if (TARGET_AVX)
15106 {
15107 fputs ("ge", file);
15108 break;
15109 }
15110 case UNGE:
15111 fputs ("nlt", file);
15112 break;
15113 case GT:
15114 if (TARGET_AVX)
15115 {
15116 fputs ("gt", file);
15117 break;
15118 }
15119 case UNGT:
15120 fputs ("nle", file);
15121 break;
15122 case ORDERED:
15123 fputs ("ord", file);
15124 break;
15125 default:
15126 output_operand_lossage ("operand is not a condition code, "
15127 "invalid operand code 'D'");
15128 return;
15129 }
15130 return;
15131
15132 case 'F':
15133 case 'f':
15134 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15135 if (ASSEMBLER_DIALECT == ASM_ATT)
15136 putc ('.', file);
15137 #endif
15138
15139 case 'C':
15140 case 'c':
15141 if (!COMPARISON_P (x))
15142 {
15143 output_operand_lossage ("operand is not a condition code, "
15144 "invalid operand code '%c'", code);
15145 return;
15146 }
15147 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15148 code == 'c' || code == 'f',
15149 code == 'F' || code == 'f',
15150 file);
15151 return;
15152
15153 case 'H':
15154 if (!offsettable_memref_p (x))
15155 {
15156 output_operand_lossage ("operand is not an offsettable memory "
15157 "reference, invalid operand code 'H'");
15158 return;
15159 }
15160 /* It doesn't actually matter what mode we use here, as we're
15161 only going to use this for printing. */
15162 x = adjust_address_nv (x, DImode, 8);
15163 /* Output 'qword ptr' for intel assembler dialect. */
15164 if (ASSEMBLER_DIALECT == ASM_INTEL)
15165 code = 'q';
15166 break;
15167
15168 case 'K':
15169 gcc_assert (CONST_INT_P (x));
15170
15171 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15172 #ifdef HAVE_AS_IX86_HLE
15173 fputs ("xacquire ", file);
15174 #else
15175 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15176 #endif
15177 else if (INTVAL (x) & IX86_HLE_RELEASE)
15178 #ifdef HAVE_AS_IX86_HLE
15179 fputs ("xrelease ", file);
15180 #else
15181 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15182 #endif
15183 /* We do not want to print value of the operand. */
15184 return;
15185
15186 case 'N':
15187 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15188 fputs ("{z}", file);
15189 return;
15190
15191 case 'r':
15192 gcc_assert (CONST_INT_P (x));
15193 gcc_assert (INTVAL (x) == ROUND_SAE);
15194
15195 if (ASSEMBLER_DIALECT == ASM_INTEL)
15196 fputs (", ", file);
15197
15198 fputs ("{sae}", file);
15199
15200 if (ASSEMBLER_DIALECT == ASM_ATT)
15201 fputs (", ", file);
15202
15203 return;
15204
15205 case 'R':
15206 gcc_assert (CONST_INT_P (x));
15207
15208 if (ASSEMBLER_DIALECT == ASM_INTEL)
15209 fputs (", ", file);
15210
15211 switch (INTVAL (x))
15212 {
15213 case ROUND_NEAREST_INT | ROUND_SAE:
15214 fputs ("{rn-sae}", file);
15215 break;
15216 case ROUND_NEG_INF | ROUND_SAE:
15217 fputs ("{rd-sae}", file);
15218 break;
15219 case ROUND_POS_INF | ROUND_SAE:
15220 fputs ("{ru-sae}", file);
15221 break;
15222 case ROUND_ZERO | ROUND_SAE:
15223 fputs ("{rz-sae}", file);
15224 break;
15225 default:
15226 gcc_unreachable ();
15227 }
15228
15229 if (ASSEMBLER_DIALECT == ASM_ATT)
15230 fputs (", ", file);
15231
15232 return;
15233
15234 case '*':
15235 if (ASSEMBLER_DIALECT == ASM_ATT)
15236 putc ('*', file);
15237 return;
15238
15239 case '&':
15240 {
15241 const char *name = get_some_local_dynamic_name ();
15242 if (name == NULL)
15243 output_operand_lossage ("'%%&' used without any "
15244 "local dynamic TLS references");
15245 else
15246 assemble_name (file, name);
15247 return;
15248 }
15249
15250 case '+':
15251 {
15252 rtx x;
15253
15254 if (!optimize
15255 || optimize_function_for_size_p (cfun)
15256 || !TARGET_BRANCH_PREDICTION_HINTS)
15257 return;
15258
15259 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15260 if (x)
15261 {
15262 int pred_val = XINT (x, 0);
15263
15264 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15265 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15266 {
15267 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15268 bool cputaken
15269 = final_forward_branch_p (current_output_insn) == 0;
15270
15271 /* Emit hints only in the case default branch prediction
15272 heuristics would fail. */
15273 if (taken != cputaken)
15274 {
15275 /* We use 3e (DS) prefix for taken branches and
15276 2e (CS) prefix for not taken branches. */
15277 if (taken)
15278 fputs ("ds ; ", file);
15279 else
15280 fputs ("cs ; ", file);
15281 }
15282 }
15283 }
15284 return;
15285 }
15286
15287 case ';':
15288 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15289 putc (';', file);
15290 #endif
15291 return;
15292
15293 case '@':
15294 if (ASSEMBLER_DIALECT == ASM_ATT)
15295 putc ('%', file);
15296
15297 /* The kernel uses a different segment register for performance
15298 reasons; a system call would not have to trash the userspace
15299 segment register, which would be expensive. */
15300 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15301 fputs ("fs", file);
15302 else
15303 fputs ("gs", file);
15304 return;
15305
15306 case '~':
15307 putc (TARGET_AVX2 ? 'i' : 'f', file);
15308 return;
15309
15310 case '^':
15311 if (TARGET_64BIT && Pmode != word_mode)
15312 fputs ("addr32 ", file);
15313 return;
15314
15315 default:
15316 output_operand_lossage ("invalid operand code '%c'", code);
15317 }
15318 }
15319
15320 if (REG_P (x))
15321 print_reg (x, code, file);
15322
15323 else if (MEM_P (x))
15324 {
15325 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15326 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15327 && GET_MODE (x) != BLKmode)
15328 {
15329 const char * size;
15330 switch (GET_MODE_SIZE (GET_MODE (x)))
15331 {
15332 case 1: size = "BYTE"; break;
15333 case 2: size = "WORD"; break;
15334 case 4: size = "DWORD"; break;
15335 case 8: size = "QWORD"; break;
15336 case 12: size = "TBYTE"; break;
15337 case 16:
15338 if (GET_MODE (x) == XFmode)
15339 size = "TBYTE";
15340 else
15341 size = "XMMWORD";
15342 break;
15343 case 32: size = "YMMWORD"; break;
15344 case 64: size = "ZMMWORD"; break;
15345 default:
15346 gcc_unreachable ();
15347 }
15348
15349 /* Check for explicit size override (codes 'b', 'w', 'k',
15350 'q' and 'x') */
15351 if (code == 'b')
15352 size = "BYTE";
15353 else if (code == 'w')
15354 size = "WORD";
15355 else if (code == 'k')
15356 size = "DWORD";
15357 else if (code == 'q')
15358 size = "QWORD";
15359 else if (code == 'x')
15360 size = "XMMWORD";
15361
15362 fputs (size, file);
15363 fputs (" PTR ", file);
15364 }
15365
15366 x = XEXP (x, 0);
15367 /* Avoid (%rip) for call operands. */
15368 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15369 && !CONST_INT_P (x))
15370 output_addr_const (file, x);
15371 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15372 output_operand_lossage ("invalid constraints for operand");
15373 else
15374 output_address (x);
15375 }
15376
15377 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15378 {
15379 REAL_VALUE_TYPE r;
15380 long l;
15381
15382 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15383 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15384
15385 if (ASSEMBLER_DIALECT == ASM_ATT)
15386 putc ('$', file);
15387 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15388 if (code == 'q')
15389 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15390 (unsigned long long) (int) l);
15391 else
15392 fprintf (file, "0x%08x", (unsigned int) l);
15393 }
15394
15395 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15396 {
15397 REAL_VALUE_TYPE r;
15398 long l[2];
15399
15400 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15401 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15402
15403 if (ASSEMBLER_DIALECT == ASM_ATT)
15404 putc ('$', file);
15405 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15406 }
15407
15408 /* These float cases don't actually occur as immediate operands. */
15409 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15410 {
15411 char dstr[30];
15412
15413 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15414 fputs (dstr, file);
15415 }
15416
15417 else
15418 {
15419 /* We have patterns that allow zero sets of memory, for instance.
15420 In 64-bit mode, we should probably support all 8-byte vectors,
15421 since we can in fact encode that into an immediate. */
15422 if (GET_CODE (x) == CONST_VECTOR)
15423 {
15424 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15425 x = const0_rtx;
15426 }
15427
15428 if (code != 'P' && code != 'p')
15429 {
15430 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15431 {
15432 if (ASSEMBLER_DIALECT == ASM_ATT)
15433 putc ('$', file);
15434 }
15435 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15436 || GET_CODE (x) == LABEL_REF)
15437 {
15438 if (ASSEMBLER_DIALECT == ASM_ATT)
15439 putc ('$', file);
15440 else
15441 fputs ("OFFSET FLAT:", file);
15442 }
15443 }
15444 if (CONST_INT_P (x))
15445 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15446 else if (flag_pic || MACHOPIC_INDIRECT)
15447 output_pic_addr_const (file, x, code);
15448 else
15449 output_addr_const (file, x);
15450 }
15451 }
15452
15453 static bool
15454 ix86_print_operand_punct_valid_p (unsigned char code)
15455 {
15456 return (code == '@' || code == '*' || code == '+' || code == '&'
15457 || code == ';' || code == '~' || code == '^');
15458 }
15459 \f
15460 /* Print a memory operand whose address is ADDR. */
15461
15462 static void
15463 ix86_print_operand_address (FILE *file, rtx addr)
15464 {
15465 struct ix86_address parts;
15466 rtx base, index, disp;
15467 int scale;
15468 int ok;
15469 bool vsib = false;
15470 int code = 0;
15471
15472 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15473 {
15474 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15475 gcc_assert (parts.index == NULL_RTX);
15476 parts.index = XVECEXP (addr, 0, 1);
15477 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15478 addr = XVECEXP (addr, 0, 0);
15479 vsib = true;
15480 }
15481 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15482 {
15483 gcc_assert (TARGET_64BIT);
15484 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15485 code = 'q';
15486 }
15487 else
15488 ok = ix86_decompose_address (addr, &parts);
15489
15490 gcc_assert (ok);
15491
15492 base = parts.base;
15493 index = parts.index;
15494 disp = parts.disp;
15495 scale = parts.scale;
15496
15497 switch (parts.seg)
15498 {
15499 case SEG_DEFAULT:
15500 break;
15501 case SEG_FS:
15502 case SEG_GS:
15503 if (ASSEMBLER_DIALECT == ASM_ATT)
15504 putc ('%', file);
15505 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15506 break;
15507 default:
15508 gcc_unreachable ();
15509 }
15510
15511 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15512 if (TARGET_64BIT && !base && !index)
15513 {
15514 rtx symbol = disp;
15515
15516 if (GET_CODE (disp) == CONST
15517 && GET_CODE (XEXP (disp, 0)) == PLUS
15518 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15519 symbol = XEXP (XEXP (disp, 0), 0);
15520
15521 if (GET_CODE (symbol) == LABEL_REF
15522 || (GET_CODE (symbol) == SYMBOL_REF
15523 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15524 base = pc_rtx;
15525 }
15526 if (!base && !index)
15527 {
15528 /* Displacement only requires special attention. */
15529
15530 if (CONST_INT_P (disp))
15531 {
15532 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15533 fputs ("ds:", file);
15534 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15535 }
15536 else if (flag_pic)
15537 output_pic_addr_const (file, disp, 0);
15538 else
15539 output_addr_const (file, disp);
15540 }
15541 else
15542 {
15543 /* Print SImode register names to force addr32 prefix. */
15544 if (SImode_address_operand (addr, VOIDmode))
15545 {
15546 #ifdef ENABLE_CHECKING
15547 gcc_assert (TARGET_64BIT);
15548 switch (GET_CODE (addr))
15549 {
15550 case SUBREG:
15551 gcc_assert (GET_MODE (addr) == SImode);
15552 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15553 break;
15554 case ZERO_EXTEND:
15555 case AND:
15556 gcc_assert (GET_MODE (addr) == DImode);
15557 break;
15558 default:
15559 gcc_unreachable ();
15560 }
15561 #endif
15562 gcc_assert (!code);
15563 code = 'k';
15564 }
15565 else if (code == 0
15566 && TARGET_X32
15567 && disp
15568 && CONST_INT_P (disp)
15569 && INTVAL (disp) < -16*1024*1024)
15570 {
15571 /* X32 runs in 64-bit mode, where displacement, DISP, in
15572 address DISP(%r64), is encoded as 32-bit immediate sign-
15573 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15574 address is %r64 + 0xffffffffbffffd00. When %r64 <
15575 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15576 which is invalid for x32. The correct address is %r64
15577 - 0x40000300 == 0xf7ffdd64. To properly encode
15578 -0x40000300(%r64) for x32, we zero-extend negative
15579 displacement by forcing addr32 prefix which truncates
15580 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15581 zero-extend all negative displacements, including -1(%rsp).
15582 However, for small negative displacements, sign-extension
15583 won't cause overflow. We only zero-extend negative
15584 displacements if they < -16*1024*1024, which is also used
15585 to check legitimate address displacements for PIC. */
15586 code = 'k';
15587 }
15588
15589 if (ASSEMBLER_DIALECT == ASM_ATT)
15590 {
15591 if (disp)
15592 {
15593 if (flag_pic)
15594 output_pic_addr_const (file, disp, 0);
15595 else if (GET_CODE (disp) == LABEL_REF)
15596 output_asm_label (disp);
15597 else
15598 output_addr_const (file, disp);
15599 }
15600
15601 putc ('(', file);
15602 if (base)
15603 print_reg (base, code, file);
15604 if (index)
15605 {
15606 putc (',', file);
15607 print_reg (index, vsib ? 0 : code, file);
15608 if (scale != 1 || vsib)
15609 fprintf (file, ",%d", scale);
15610 }
15611 putc (')', file);
15612 }
15613 else
15614 {
15615 rtx offset = NULL_RTX;
15616
15617 if (disp)
15618 {
15619 /* Pull out the offset of a symbol; print any symbol itself. */
15620 if (GET_CODE (disp) == CONST
15621 && GET_CODE (XEXP (disp, 0)) == PLUS
15622 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15623 {
15624 offset = XEXP (XEXP (disp, 0), 1);
15625 disp = gen_rtx_CONST (VOIDmode,
15626 XEXP (XEXP (disp, 0), 0));
15627 }
15628
15629 if (flag_pic)
15630 output_pic_addr_const (file, disp, 0);
15631 else if (GET_CODE (disp) == LABEL_REF)
15632 output_asm_label (disp);
15633 else if (CONST_INT_P (disp))
15634 offset = disp;
15635 else
15636 output_addr_const (file, disp);
15637 }
15638
15639 putc ('[', file);
15640 if (base)
15641 {
15642 print_reg (base, code, file);
15643 if (offset)
15644 {
15645 if (INTVAL (offset) >= 0)
15646 putc ('+', file);
15647 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15648 }
15649 }
15650 else if (offset)
15651 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15652 else
15653 putc ('0', file);
15654
15655 if (index)
15656 {
15657 putc ('+', file);
15658 print_reg (index, vsib ? 0 : code, file);
15659 if (scale != 1 || vsib)
15660 fprintf (file, "*%d", scale);
15661 }
15662 putc (']', file);
15663 }
15664 }
15665 }
15666
15667 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15668
15669 static bool
15670 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15671 {
15672 rtx op;
15673
15674 if (GET_CODE (x) != UNSPEC)
15675 return false;
15676
15677 op = XVECEXP (x, 0, 0);
15678 switch (XINT (x, 1))
15679 {
15680 case UNSPEC_GOTTPOFF:
15681 output_addr_const (file, op);
15682 /* FIXME: This might be @TPOFF in Sun ld. */
15683 fputs ("@gottpoff", file);
15684 break;
15685 case UNSPEC_TPOFF:
15686 output_addr_const (file, op);
15687 fputs ("@tpoff", file);
15688 break;
15689 case UNSPEC_NTPOFF:
15690 output_addr_const (file, op);
15691 if (TARGET_64BIT)
15692 fputs ("@tpoff", file);
15693 else
15694 fputs ("@ntpoff", file);
15695 break;
15696 case UNSPEC_DTPOFF:
15697 output_addr_const (file, op);
15698 fputs ("@dtpoff", file);
15699 break;
15700 case UNSPEC_GOTNTPOFF:
15701 output_addr_const (file, op);
15702 if (TARGET_64BIT)
15703 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15704 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15705 else
15706 fputs ("@gotntpoff", file);
15707 break;
15708 case UNSPEC_INDNTPOFF:
15709 output_addr_const (file, op);
15710 fputs ("@indntpoff", file);
15711 break;
15712 #if TARGET_MACHO
15713 case UNSPEC_MACHOPIC_OFFSET:
15714 output_addr_const (file, op);
15715 putc ('-', file);
15716 machopic_output_function_base_name (file);
15717 break;
15718 #endif
15719
15720 case UNSPEC_STACK_CHECK:
15721 {
15722 int offset;
15723
15724 gcc_assert (flag_split_stack);
15725
15726 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15727 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15728 #else
15729 gcc_unreachable ();
15730 #endif
15731
15732 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15733 }
15734 break;
15735
15736 default:
15737 return false;
15738 }
15739
15740 return true;
15741 }
15742 \f
15743 /* Split one or more double-mode RTL references into pairs of half-mode
15744 references. The RTL can be REG, offsettable MEM, integer constant, or
15745 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15746 split and "num" is its length. lo_half and hi_half are output arrays
15747 that parallel "operands". */
15748
15749 void
15750 split_double_mode (enum machine_mode mode, rtx operands[],
15751 int num, rtx lo_half[], rtx hi_half[])
15752 {
15753 enum machine_mode half_mode;
15754 unsigned int byte;
15755
15756 switch (mode)
15757 {
15758 case TImode:
15759 half_mode = DImode;
15760 break;
15761 case DImode:
15762 half_mode = SImode;
15763 break;
15764 default:
15765 gcc_unreachable ();
15766 }
15767
15768 byte = GET_MODE_SIZE (half_mode);
15769
15770 while (num--)
15771 {
15772 rtx op = operands[num];
15773
15774 /* simplify_subreg refuse to split volatile memory addresses,
15775 but we still have to handle it. */
15776 if (MEM_P (op))
15777 {
15778 lo_half[num] = adjust_address (op, half_mode, 0);
15779 hi_half[num] = adjust_address (op, half_mode, byte);
15780 }
15781 else
15782 {
15783 lo_half[num] = simplify_gen_subreg (half_mode, op,
15784 GET_MODE (op) == VOIDmode
15785 ? mode : GET_MODE (op), 0);
15786 hi_half[num] = simplify_gen_subreg (half_mode, op,
15787 GET_MODE (op) == VOIDmode
15788 ? mode : GET_MODE (op), byte);
15789 }
15790 }
15791 }
15792 \f
15793 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15794 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15795 is the expression of the binary operation. The output may either be
15796 emitted here, or returned to the caller, like all output_* functions.
15797
15798 There is no guarantee that the operands are the same mode, as they
15799 might be within FLOAT or FLOAT_EXTEND expressions. */
15800
15801 #ifndef SYSV386_COMPAT
15802 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15803 wants to fix the assemblers because that causes incompatibility
15804 with gcc. No-one wants to fix gcc because that causes
15805 incompatibility with assemblers... You can use the option of
15806 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15807 #define SYSV386_COMPAT 1
15808 #endif
15809
15810 const char *
15811 output_387_binary_op (rtx insn, rtx *operands)
15812 {
15813 static char buf[40];
15814 const char *p;
15815 const char *ssep;
15816 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15817
15818 #ifdef ENABLE_CHECKING
15819 /* Even if we do not want to check the inputs, this documents input
15820 constraints. Which helps in understanding the following code. */
15821 if (STACK_REG_P (operands[0])
15822 && ((REG_P (operands[1])
15823 && REGNO (operands[0]) == REGNO (operands[1])
15824 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15825 || (REG_P (operands[2])
15826 && REGNO (operands[0]) == REGNO (operands[2])
15827 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15828 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15829 ; /* ok */
15830 else
15831 gcc_assert (is_sse);
15832 #endif
15833
15834 switch (GET_CODE (operands[3]))
15835 {
15836 case PLUS:
15837 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15838 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15839 p = "fiadd";
15840 else
15841 p = "fadd";
15842 ssep = "vadd";
15843 break;
15844
15845 case MINUS:
15846 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15847 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15848 p = "fisub";
15849 else
15850 p = "fsub";
15851 ssep = "vsub";
15852 break;
15853
15854 case MULT:
15855 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15856 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15857 p = "fimul";
15858 else
15859 p = "fmul";
15860 ssep = "vmul";
15861 break;
15862
15863 case DIV:
15864 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15865 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15866 p = "fidiv";
15867 else
15868 p = "fdiv";
15869 ssep = "vdiv";
15870 break;
15871
15872 default:
15873 gcc_unreachable ();
15874 }
15875
15876 if (is_sse)
15877 {
15878 if (TARGET_AVX)
15879 {
15880 strcpy (buf, ssep);
15881 if (GET_MODE (operands[0]) == SFmode)
15882 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15883 else
15884 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15885 }
15886 else
15887 {
15888 strcpy (buf, ssep + 1);
15889 if (GET_MODE (operands[0]) == SFmode)
15890 strcat (buf, "ss\t{%2, %0|%0, %2}");
15891 else
15892 strcat (buf, "sd\t{%2, %0|%0, %2}");
15893 }
15894 return buf;
15895 }
15896 strcpy (buf, p);
15897
15898 switch (GET_CODE (operands[3]))
15899 {
15900 case MULT:
15901 case PLUS:
15902 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15903 {
15904 rtx temp = operands[2];
15905 operands[2] = operands[1];
15906 operands[1] = temp;
15907 }
15908
15909 /* know operands[0] == operands[1]. */
15910
15911 if (MEM_P (operands[2]))
15912 {
15913 p = "%Z2\t%2";
15914 break;
15915 }
15916
15917 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15918 {
15919 if (STACK_TOP_P (operands[0]))
15920 /* How is it that we are storing to a dead operand[2]?
15921 Well, presumably operands[1] is dead too. We can't
15922 store the result to st(0) as st(0) gets popped on this
15923 instruction. Instead store to operands[2] (which I
15924 think has to be st(1)). st(1) will be popped later.
15925 gcc <= 2.8.1 didn't have this check and generated
15926 assembly code that the Unixware assembler rejected. */
15927 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15928 else
15929 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15930 break;
15931 }
15932
15933 if (STACK_TOP_P (operands[0]))
15934 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15935 else
15936 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15937 break;
15938
15939 case MINUS:
15940 case DIV:
15941 if (MEM_P (operands[1]))
15942 {
15943 p = "r%Z1\t%1";
15944 break;
15945 }
15946
15947 if (MEM_P (operands[2]))
15948 {
15949 p = "%Z2\t%2";
15950 break;
15951 }
15952
15953 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15954 {
15955 #if SYSV386_COMPAT
15956 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15957 derived assemblers, confusingly reverse the direction of
15958 the operation for fsub{r} and fdiv{r} when the
15959 destination register is not st(0). The Intel assembler
15960 doesn't have this brain damage. Read !SYSV386_COMPAT to
15961 figure out what the hardware really does. */
15962 if (STACK_TOP_P (operands[0]))
15963 p = "{p\t%0, %2|rp\t%2, %0}";
15964 else
15965 p = "{rp\t%2, %0|p\t%0, %2}";
15966 #else
15967 if (STACK_TOP_P (operands[0]))
15968 /* As above for fmul/fadd, we can't store to st(0). */
15969 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15970 else
15971 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15972 #endif
15973 break;
15974 }
15975
15976 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15977 {
15978 #if SYSV386_COMPAT
15979 if (STACK_TOP_P (operands[0]))
15980 p = "{rp\t%0, %1|p\t%1, %0}";
15981 else
15982 p = "{p\t%1, %0|rp\t%0, %1}";
15983 #else
15984 if (STACK_TOP_P (operands[0]))
15985 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15986 else
15987 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15988 #endif
15989 break;
15990 }
15991
15992 if (STACK_TOP_P (operands[0]))
15993 {
15994 if (STACK_TOP_P (operands[1]))
15995 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15996 else
15997 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15998 break;
15999 }
16000 else if (STACK_TOP_P (operands[1]))
16001 {
16002 #if SYSV386_COMPAT
16003 p = "{\t%1, %0|r\t%0, %1}";
16004 #else
16005 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16006 #endif
16007 }
16008 else
16009 {
16010 #if SYSV386_COMPAT
16011 p = "{r\t%2, %0|\t%0, %2}";
16012 #else
16013 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16014 #endif
16015 }
16016 break;
16017
16018 default:
16019 gcc_unreachable ();
16020 }
16021
16022 strcat (buf, p);
16023 return buf;
16024 }
16025
16026 /* Check if a 256bit AVX register is referenced inside of EXP. */
16027
16028 static int
16029 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16030 {
16031 rtx exp = *pexp;
16032
16033 if (GET_CODE (exp) == SUBREG)
16034 exp = SUBREG_REG (exp);
16035
16036 if (REG_P (exp)
16037 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16038 return 1;
16039
16040 return 0;
16041 }
16042
16043 /* Return needed mode for entity in optimize_mode_switching pass. */
16044
16045 static int
16046 ix86_avx_u128_mode_needed (rtx insn)
16047 {
16048 if (CALL_P (insn))
16049 {
16050 rtx link;
16051
16052 /* Needed mode is set to AVX_U128_CLEAN if there are
16053 no 256bit modes used in function arguments. */
16054 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16055 link;
16056 link = XEXP (link, 1))
16057 {
16058 if (GET_CODE (XEXP (link, 0)) == USE)
16059 {
16060 rtx arg = XEXP (XEXP (link, 0), 0);
16061
16062 if (ix86_check_avx256_register (&arg, NULL))
16063 return AVX_U128_DIRTY;
16064 }
16065 }
16066
16067 return AVX_U128_CLEAN;
16068 }
16069
16070 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16071 changes state only when a 256bit register is written to, but we need
16072 to prevent the compiler from moving optimal insertion point above
16073 eventual read from 256bit register. */
16074 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16075 return AVX_U128_DIRTY;
16076
16077 return AVX_U128_ANY;
16078 }
16079
16080 /* Return mode that i387 must be switched into
16081 prior to the execution of insn. */
16082
16083 static int
16084 ix86_i387_mode_needed (int entity, rtx insn)
16085 {
16086 enum attr_i387_cw mode;
16087
16088 /* The mode UNINITIALIZED is used to store control word after a
16089 function call or ASM pattern. The mode ANY specify that function
16090 has no requirements on the control word and make no changes in the
16091 bits we are interested in. */
16092
16093 if (CALL_P (insn)
16094 || (NONJUMP_INSN_P (insn)
16095 && (asm_noperands (PATTERN (insn)) >= 0
16096 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16097 return I387_CW_UNINITIALIZED;
16098
16099 if (recog_memoized (insn) < 0)
16100 return I387_CW_ANY;
16101
16102 mode = get_attr_i387_cw (insn);
16103
16104 switch (entity)
16105 {
16106 case I387_TRUNC:
16107 if (mode == I387_CW_TRUNC)
16108 return mode;
16109 break;
16110
16111 case I387_FLOOR:
16112 if (mode == I387_CW_FLOOR)
16113 return mode;
16114 break;
16115
16116 case I387_CEIL:
16117 if (mode == I387_CW_CEIL)
16118 return mode;
16119 break;
16120
16121 case I387_MASK_PM:
16122 if (mode == I387_CW_MASK_PM)
16123 return mode;
16124 break;
16125
16126 default:
16127 gcc_unreachable ();
16128 }
16129
16130 return I387_CW_ANY;
16131 }
16132
16133 /* Return mode that entity must be switched into
16134 prior to the execution of insn. */
16135
16136 int
16137 ix86_mode_needed (int entity, rtx insn)
16138 {
16139 switch (entity)
16140 {
16141 case AVX_U128:
16142 return ix86_avx_u128_mode_needed (insn);
16143 case I387_TRUNC:
16144 case I387_FLOOR:
16145 case I387_CEIL:
16146 case I387_MASK_PM:
16147 return ix86_i387_mode_needed (entity, insn);
16148 default:
16149 gcc_unreachable ();
16150 }
16151 return 0;
16152 }
16153
16154 /* Check if a 256bit AVX register is referenced in stores. */
16155
16156 static void
16157 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16158 {
16159 if (ix86_check_avx256_register (&dest, NULL))
16160 {
16161 bool *used = (bool *) data;
16162 *used = true;
16163 }
16164 }
16165
16166 /* Calculate mode of upper 128bit AVX registers after the insn. */
16167
16168 static int
16169 ix86_avx_u128_mode_after (int mode, rtx insn)
16170 {
16171 rtx pat = PATTERN (insn);
16172
16173 if (vzeroupper_operation (pat, VOIDmode)
16174 || vzeroall_operation (pat, VOIDmode))
16175 return AVX_U128_CLEAN;
16176
16177 /* We know that state is clean after CALL insn if there are no
16178 256bit registers used in the function return register. */
16179 if (CALL_P (insn))
16180 {
16181 bool avx_reg256_found = false;
16182 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16183
16184 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16185 }
16186
16187 /* Otherwise, return current mode. Remember that if insn
16188 references AVX 256bit registers, the mode was already changed
16189 to DIRTY from MODE_NEEDED. */
16190 return mode;
16191 }
16192
16193 /* Return the mode that an insn results in. */
16194
16195 int
16196 ix86_mode_after (int entity, int mode, rtx insn)
16197 {
16198 switch (entity)
16199 {
16200 case AVX_U128:
16201 return ix86_avx_u128_mode_after (mode, insn);
16202 case I387_TRUNC:
16203 case I387_FLOOR:
16204 case I387_CEIL:
16205 case I387_MASK_PM:
16206 return mode;
16207 default:
16208 gcc_unreachable ();
16209 }
16210 }
16211
16212 static int
16213 ix86_avx_u128_mode_entry (void)
16214 {
16215 tree arg;
16216
16217 /* Entry mode is set to AVX_U128_DIRTY if there are
16218 256bit modes used in function arguments. */
16219 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16220 arg = TREE_CHAIN (arg))
16221 {
16222 rtx incoming = DECL_INCOMING_RTL (arg);
16223
16224 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16225 return AVX_U128_DIRTY;
16226 }
16227
16228 return AVX_U128_CLEAN;
16229 }
16230
16231 /* Return a mode that ENTITY is assumed to be
16232 switched to at function entry. */
16233
16234 int
16235 ix86_mode_entry (int entity)
16236 {
16237 switch (entity)
16238 {
16239 case AVX_U128:
16240 return ix86_avx_u128_mode_entry ();
16241 case I387_TRUNC:
16242 case I387_FLOOR:
16243 case I387_CEIL:
16244 case I387_MASK_PM:
16245 return I387_CW_ANY;
16246 default:
16247 gcc_unreachable ();
16248 }
16249 }
16250
16251 static int
16252 ix86_avx_u128_mode_exit (void)
16253 {
16254 rtx reg = crtl->return_rtx;
16255
16256 /* Exit mode is set to AVX_U128_DIRTY if there are
16257 256bit modes used in the function return register. */
16258 if (reg && ix86_check_avx256_register (&reg, NULL))
16259 return AVX_U128_DIRTY;
16260
16261 return AVX_U128_CLEAN;
16262 }
16263
16264 /* Return a mode that ENTITY is assumed to be
16265 switched to at function exit. */
16266
16267 int
16268 ix86_mode_exit (int entity)
16269 {
16270 switch (entity)
16271 {
16272 case AVX_U128:
16273 return ix86_avx_u128_mode_exit ();
16274 case I387_TRUNC:
16275 case I387_FLOOR:
16276 case I387_CEIL:
16277 case I387_MASK_PM:
16278 return I387_CW_ANY;
16279 default:
16280 gcc_unreachable ();
16281 }
16282 }
16283
16284 /* Output code to initialize control word copies used by trunc?f?i and
16285 rounding patterns. CURRENT_MODE is set to current control word,
16286 while NEW_MODE is set to new control word. */
16287
16288 static void
16289 emit_i387_cw_initialization (int mode)
16290 {
16291 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16292 rtx new_mode;
16293
16294 enum ix86_stack_slot slot;
16295
16296 rtx reg = gen_reg_rtx (HImode);
16297
16298 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16299 emit_move_insn (reg, copy_rtx (stored_mode));
16300
16301 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16302 || optimize_insn_for_size_p ())
16303 {
16304 switch (mode)
16305 {
16306 case I387_CW_TRUNC:
16307 /* round toward zero (truncate) */
16308 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16309 slot = SLOT_CW_TRUNC;
16310 break;
16311
16312 case I387_CW_FLOOR:
16313 /* round down toward -oo */
16314 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16315 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16316 slot = SLOT_CW_FLOOR;
16317 break;
16318
16319 case I387_CW_CEIL:
16320 /* round up toward +oo */
16321 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16322 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16323 slot = SLOT_CW_CEIL;
16324 break;
16325
16326 case I387_CW_MASK_PM:
16327 /* mask precision exception for nearbyint() */
16328 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16329 slot = SLOT_CW_MASK_PM;
16330 break;
16331
16332 default:
16333 gcc_unreachable ();
16334 }
16335 }
16336 else
16337 {
16338 switch (mode)
16339 {
16340 case I387_CW_TRUNC:
16341 /* round toward zero (truncate) */
16342 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16343 slot = SLOT_CW_TRUNC;
16344 break;
16345
16346 case I387_CW_FLOOR:
16347 /* round down toward -oo */
16348 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16349 slot = SLOT_CW_FLOOR;
16350 break;
16351
16352 case I387_CW_CEIL:
16353 /* round up toward +oo */
16354 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16355 slot = SLOT_CW_CEIL;
16356 break;
16357
16358 case I387_CW_MASK_PM:
16359 /* mask precision exception for nearbyint() */
16360 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16361 slot = SLOT_CW_MASK_PM;
16362 break;
16363
16364 default:
16365 gcc_unreachable ();
16366 }
16367 }
16368
16369 gcc_assert (slot < MAX_386_STACK_LOCALS);
16370
16371 new_mode = assign_386_stack_local (HImode, slot);
16372 emit_move_insn (new_mode, reg);
16373 }
16374
16375 /* Emit vzeroupper. */
16376
16377 void
16378 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16379 {
16380 int i;
16381
16382 /* Cancel automatic vzeroupper insertion if there are
16383 live call-saved SSE registers at the insertion point. */
16384
16385 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16386 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16387 return;
16388
16389 if (TARGET_64BIT)
16390 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16391 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16392 return;
16393
16394 emit_insn (gen_avx_vzeroupper ());
16395 }
16396
16397 /* Generate one or more insns to set ENTITY to MODE. */
16398
16399 void
16400 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
16401 {
16402 switch (entity)
16403 {
16404 case AVX_U128:
16405 if (mode == AVX_U128_CLEAN)
16406 ix86_avx_emit_vzeroupper (regs_live);
16407 break;
16408 case I387_TRUNC:
16409 case I387_FLOOR:
16410 case I387_CEIL:
16411 case I387_MASK_PM:
16412 if (mode != I387_CW_ANY
16413 && mode != I387_CW_UNINITIALIZED)
16414 emit_i387_cw_initialization (mode);
16415 break;
16416 default:
16417 gcc_unreachable ();
16418 }
16419 }
16420
16421 /* Output code for INSN to convert a float to a signed int. OPERANDS
16422 are the insn operands. The output may be [HSD]Imode and the input
16423 operand may be [SDX]Fmode. */
16424
16425 const char *
16426 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16427 {
16428 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16429 int dimode_p = GET_MODE (operands[0]) == DImode;
16430 int round_mode = get_attr_i387_cw (insn);
16431
16432 /* Jump through a hoop or two for DImode, since the hardware has no
16433 non-popping instruction. We used to do this a different way, but
16434 that was somewhat fragile and broke with post-reload splitters. */
16435 if ((dimode_p || fisttp) && !stack_top_dies)
16436 output_asm_insn ("fld\t%y1", operands);
16437
16438 gcc_assert (STACK_TOP_P (operands[1]));
16439 gcc_assert (MEM_P (operands[0]));
16440 gcc_assert (GET_MODE (operands[1]) != TFmode);
16441
16442 if (fisttp)
16443 output_asm_insn ("fisttp%Z0\t%0", operands);
16444 else
16445 {
16446 if (round_mode != I387_CW_ANY)
16447 output_asm_insn ("fldcw\t%3", operands);
16448 if (stack_top_dies || dimode_p)
16449 output_asm_insn ("fistp%Z0\t%0", operands);
16450 else
16451 output_asm_insn ("fist%Z0\t%0", operands);
16452 if (round_mode != I387_CW_ANY)
16453 output_asm_insn ("fldcw\t%2", operands);
16454 }
16455
16456 return "";
16457 }
16458
16459 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16460 have the values zero or one, indicates the ffreep insn's operand
16461 from the OPERANDS array. */
16462
16463 static const char *
16464 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16465 {
16466 if (TARGET_USE_FFREEP)
16467 #ifdef HAVE_AS_IX86_FFREEP
16468 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16469 #else
16470 {
16471 static char retval[32];
16472 int regno = REGNO (operands[opno]);
16473
16474 gcc_assert (STACK_REGNO_P (regno));
16475
16476 regno -= FIRST_STACK_REG;
16477
16478 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16479 return retval;
16480 }
16481 #endif
16482
16483 return opno ? "fstp\t%y1" : "fstp\t%y0";
16484 }
16485
16486
16487 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16488 should be used. UNORDERED_P is true when fucom should be used. */
16489
16490 const char *
16491 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16492 {
16493 int stack_top_dies;
16494 rtx cmp_op0, cmp_op1;
16495 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16496
16497 if (eflags_p)
16498 {
16499 cmp_op0 = operands[0];
16500 cmp_op1 = operands[1];
16501 }
16502 else
16503 {
16504 cmp_op0 = operands[1];
16505 cmp_op1 = operands[2];
16506 }
16507
16508 if (is_sse)
16509 {
16510 if (GET_MODE (operands[0]) == SFmode)
16511 if (unordered_p)
16512 return "%vucomiss\t{%1, %0|%0, %1}";
16513 else
16514 return "%vcomiss\t{%1, %0|%0, %1}";
16515 else
16516 if (unordered_p)
16517 return "%vucomisd\t{%1, %0|%0, %1}";
16518 else
16519 return "%vcomisd\t{%1, %0|%0, %1}";
16520 }
16521
16522 gcc_assert (STACK_TOP_P (cmp_op0));
16523
16524 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16525
16526 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16527 {
16528 if (stack_top_dies)
16529 {
16530 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16531 return output_387_ffreep (operands, 1);
16532 }
16533 else
16534 return "ftst\n\tfnstsw\t%0";
16535 }
16536
16537 if (STACK_REG_P (cmp_op1)
16538 && stack_top_dies
16539 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16540 && REGNO (cmp_op1) != FIRST_STACK_REG)
16541 {
16542 /* If both the top of the 387 stack dies, and the other operand
16543 is also a stack register that dies, then this must be a
16544 `fcompp' float compare */
16545
16546 if (eflags_p)
16547 {
16548 /* There is no double popping fcomi variant. Fortunately,
16549 eflags is immune from the fstp's cc clobbering. */
16550 if (unordered_p)
16551 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16552 else
16553 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16554 return output_387_ffreep (operands, 0);
16555 }
16556 else
16557 {
16558 if (unordered_p)
16559 return "fucompp\n\tfnstsw\t%0";
16560 else
16561 return "fcompp\n\tfnstsw\t%0";
16562 }
16563 }
16564 else
16565 {
16566 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16567
16568 static const char * const alt[16] =
16569 {
16570 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16571 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16572 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16573 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16574
16575 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16576 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16577 NULL,
16578 NULL,
16579
16580 "fcomi\t{%y1, %0|%0, %y1}",
16581 "fcomip\t{%y1, %0|%0, %y1}",
16582 "fucomi\t{%y1, %0|%0, %y1}",
16583 "fucomip\t{%y1, %0|%0, %y1}",
16584
16585 NULL,
16586 NULL,
16587 NULL,
16588 NULL
16589 };
16590
16591 int mask;
16592 const char *ret;
16593
16594 mask = eflags_p << 3;
16595 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16596 mask |= unordered_p << 1;
16597 mask |= stack_top_dies;
16598
16599 gcc_assert (mask < 16);
16600 ret = alt[mask];
16601 gcc_assert (ret);
16602
16603 return ret;
16604 }
16605 }
16606
16607 void
16608 ix86_output_addr_vec_elt (FILE *file, int value)
16609 {
16610 const char *directive = ASM_LONG;
16611
16612 #ifdef ASM_QUAD
16613 if (TARGET_LP64)
16614 directive = ASM_QUAD;
16615 #else
16616 gcc_assert (!TARGET_64BIT);
16617 #endif
16618
16619 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16620 }
16621
16622 void
16623 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16624 {
16625 const char *directive = ASM_LONG;
16626
16627 #ifdef ASM_QUAD
16628 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16629 directive = ASM_QUAD;
16630 #else
16631 gcc_assert (!TARGET_64BIT);
16632 #endif
16633 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16634 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16635 fprintf (file, "%s%s%d-%s%d\n",
16636 directive, LPREFIX, value, LPREFIX, rel);
16637 else if (HAVE_AS_GOTOFF_IN_DATA)
16638 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16639 #if TARGET_MACHO
16640 else if (TARGET_MACHO)
16641 {
16642 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16643 machopic_output_function_base_name (file);
16644 putc ('\n', file);
16645 }
16646 #endif
16647 else
16648 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16649 GOT_SYMBOL_NAME, LPREFIX, value);
16650 }
16651 \f
16652 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16653 for the target. */
16654
16655 void
16656 ix86_expand_clear (rtx dest)
16657 {
16658 rtx tmp;
16659
16660 /* We play register width games, which are only valid after reload. */
16661 gcc_assert (reload_completed);
16662
16663 /* Avoid HImode and its attendant prefix byte. */
16664 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16665 dest = gen_rtx_REG (SImode, REGNO (dest));
16666 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16667
16668 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16669 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
16670 {
16671 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16672 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16673 }
16674
16675 emit_insn (tmp);
16676 }
16677
16678 /* X is an unchanging MEM. If it is a constant pool reference, return
16679 the constant pool rtx, else NULL. */
16680
16681 rtx
16682 maybe_get_pool_constant (rtx x)
16683 {
16684 x = ix86_delegitimize_address (XEXP (x, 0));
16685
16686 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16687 return get_pool_constant (x);
16688
16689 return NULL_RTX;
16690 }
16691
16692 void
16693 ix86_expand_move (enum machine_mode mode, rtx operands[])
16694 {
16695 rtx op0, op1;
16696 enum tls_model model;
16697
16698 op0 = operands[0];
16699 op1 = operands[1];
16700
16701 if (GET_CODE (op1) == SYMBOL_REF)
16702 {
16703 rtx tmp;
16704
16705 model = SYMBOL_REF_TLS_MODEL (op1);
16706 if (model)
16707 {
16708 op1 = legitimize_tls_address (op1, model, true);
16709 op1 = force_operand (op1, op0);
16710 if (op1 == op0)
16711 return;
16712 op1 = convert_to_mode (mode, op1, 1);
16713 }
16714 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16715 op1 = tmp;
16716 }
16717 else if (GET_CODE (op1) == CONST
16718 && GET_CODE (XEXP (op1, 0)) == PLUS
16719 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16720 {
16721 rtx addend = XEXP (XEXP (op1, 0), 1);
16722 rtx symbol = XEXP (XEXP (op1, 0), 0);
16723 rtx tmp;
16724
16725 model = SYMBOL_REF_TLS_MODEL (symbol);
16726 if (model)
16727 tmp = legitimize_tls_address (symbol, model, true);
16728 else
16729 tmp = legitimize_pe_coff_symbol (symbol, true);
16730
16731 if (tmp)
16732 {
16733 tmp = force_operand (tmp, NULL);
16734 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16735 op0, 1, OPTAB_DIRECT);
16736 if (tmp == op0)
16737 return;
16738 op1 = convert_to_mode (mode, tmp, 1);
16739 }
16740 }
16741
16742 if ((flag_pic || MACHOPIC_INDIRECT)
16743 && symbolic_operand (op1, mode))
16744 {
16745 if (TARGET_MACHO && !TARGET_64BIT)
16746 {
16747 #if TARGET_MACHO
16748 /* dynamic-no-pic */
16749 if (MACHOPIC_INDIRECT)
16750 {
16751 rtx temp = ((reload_in_progress
16752 || ((op0 && REG_P (op0))
16753 && mode == Pmode))
16754 ? op0 : gen_reg_rtx (Pmode));
16755 op1 = machopic_indirect_data_reference (op1, temp);
16756 if (MACHOPIC_PURE)
16757 op1 = machopic_legitimize_pic_address (op1, mode,
16758 temp == op1 ? 0 : temp);
16759 }
16760 if (op0 != op1 && GET_CODE (op0) != MEM)
16761 {
16762 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16763 emit_insn (insn);
16764 return;
16765 }
16766 if (GET_CODE (op0) == MEM)
16767 op1 = force_reg (Pmode, op1);
16768 else
16769 {
16770 rtx temp = op0;
16771 if (GET_CODE (temp) != REG)
16772 temp = gen_reg_rtx (Pmode);
16773 temp = legitimize_pic_address (op1, temp);
16774 if (temp == op0)
16775 return;
16776 op1 = temp;
16777 }
16778 /* dynamic-no-pic */
16779 #endif
16780 }
16781 else
16782 {
16783 if (MEM_P (op0))
16784 op1 = force_reg (mode, op1);
16785 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16786 {
16787 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16788 op1 = legitimize_pic_address (op1, reg);
16789 if (op0 == op1)
16790 return;
16791 op1 = convert_to_mode (mode, op1, 1);
16792 }
16793 }
16794 }
16795 else
16796 {
16797 if (MEM_P (op0)
16798 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16799 || !push_operand (op0, mode))
16800 && MEM_P (op1))
16801 op1 = force_reg (mode, op1);
16802
16803 if (push_operand (op0, mode)
16804 && ! general_no_elim_operand (op1, mode))
16805 op1 = copy_to_mode_reg (mode, op1);
16806
16807 /* Force large constants in 64bit compilation into register
16808 to get them CSEed. */
16809 if (can_create_pseudo_p ()
16810 && (mode == DImode) && TARGET_64BIT
16811 && immediate_operand (op1, mode)
16812 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16813 && !register_operand (op0, mode)
16814 && optimize)
16815 op1 = copy_to_mode_reg (mode, op1);
16816
16817 if (can_create_pseudo_p ()
16818 && FLOAT_MODE_P (mode)
16819 && GET_CODE (op1) == CONST_DOUBLE)
16820 {
16821 /* If we are loading a floating point constant to a register,
16822 force the value to memory now, since we'll get better code
16823 out the back end. */
16824
16825 op1 = validize_mem (force_const_mem (mode, op1));
16826 if (!register_operand (op0, mode))
16827 {
16828 rtx temp = gen_reg_rtx (mode);
16829 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16830 emit_move_insn (op0, temp);
16831 return;
16832 }
16833 }
16834 }
16835
16836 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16837 }
16838
16839 void
16840 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16841 {
16842 rtx op0 = operands[0], op1 = operands[1];
16843 unsigned int align = GET_MODE_ALIGNMENT (mode);
16844
16845 if (push_operand (op0, VOIDmode))
16846 op0 = emit_move_resolve_push (mode, op0);
16847
16848 /* Force constants other than zero into memory. We do not know how
16849 the instructions used to build constants modify the upper 64 bits
16850 of the register, once we have that information we may be able
16851 to handle some of them more efficiently. */
16852 if (can_create_pseudo_p ()
16853 && register_operand (op0, mode)
16854 && (CONSTANT_P (op1)
16855 || (GET_CODE (op1) == SUBREG
16856 && CONSTANT_P (SUBREG_REG (op1))))
16857 && !standard_sse_constant_p (op1))
16858 op1 = validize_mem (force_const_mem (mode, op1));
16859
16860 /* We need to check memory alignment for SSE mode since attribute
16861 can make operands unaligned. */
16862 if (can_create_pseudo_p ()
16863 && SSE_REG_MODE_P (mode)
16864 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16865 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16866 {
16867 rtx tmp[2];
16868
16869 /* ix86_expand_vector_move_misalign() does not like constants ... */
16870 if (CONSTANT_P (op1)
16871 || (GET_CODE (op1) == SUBREG
16872 && CONSTANT_P (SUBREG_REG (op1))))
16873 op1 = validize_mem (force_const_mem (mode, op1));
16874
16875 /* ... nor both arguments in memory. */
16876 if (!register_operand (op0, mode)
16877 && !register_operand (op1, mode))
16878 op1 = force_reg (mode, op1);
16879
16880 tmp[0] = op0; tmp[1] = op1;
16881 ix86_expand_vector_move_misalign (mode, tmp);
16882 return;
16883 }
16884
16885 /* Make operand1 a register if it isn't already. */
16886 if (can_create_pseudo_p ()
16887 && !register_operand (op0, mode)
16888 && !register_operand (op1, mode))
16889 {
16890 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16891 return;
16892 }
16893
16894 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16895 }
16896
16897 /* Split 32-byte AVX unaligned load and store if needed. */
16898
16899 static void
16900 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16901 {
16902 rtx m;
16903 rtx (*extract) (rtx, rtx, rtx);
16904 rtx (*load_unaligned) (rtx, rtx);
16905 rtx (*store_unaligned) (rtx, rtx);
16906 enum machine_mode mode;
16907
16908 switch (GET_MODE (op0))
16909 {
16910 default:
16911 gcc_unreachable ();
16912 case V32QImode:
16913 extract = gen_avx_vextractf128v32qi;
16914 load_unaligned = gen_avx_loaddquv32qi;
16915 store_unaligned = gen_avx_storedquv32qi;
16916 mode = V16QImode;
16917 break;
16918 case V8SFmode:
16919 extract = gen_avx_vextractf128v8sf;
16920 load_unaligned = gen_avx_loadups256;
16921 store_unaligned = gen_avx_storeups256;
16922 mode = V4SFmode;
16923 break;
16924 case V4DFmode:
16925 extract = gen_avx_vextractf128v4df;
16926 load_unaligned = gen_avx_loadupd256;
16927 store_unaligned = gen_avx_storeupd256;
16928 mode = V2DFmode;
16929 break;
16930 }
16931
16932 if (MEM_P (op1))
16933 {
16934 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16935 {
16936 rtx r = gen_reg_rtx (mode);
16937 m = adjust_address (op1, mode, 0);
16938 emit_move_insn (r, m);
16939 m = adjust_address (op1, mode, 16);
16940 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16941 emit_move_insn (op0, r);
16942 }
16943 /* Normal *mov<mode>_internal pattern will handle
16944 unaligned loads just fine if misaligned_operand
16945 is true, and without the UNSPEC it can be combined
16946 with arithmetic instructions. */
16947 else if (misaligned_operand (op1, GET_MODE (op1)))
16948 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16949 else
16950 emit_insn (load_unaligned (op0, op1));
16951 }
16952 else if (MEM_P (op0))
16953 {
16954 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16955 {
16956 m = adjust_address (op0, mode, 0);
16957 emit_insn (extract (m, op1, const0_rtx));
16958 m = adjust_address (op0, mode, 16);
16959 emit_insn (extract (m, op1, const1_rtx));
16960 }
16961 else
16962 emit_insn (store_unaligned (op0, op1));
16963 }
16964 else
16965 gcc_unreachable ();
16966 }
16967
16968 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16969 straight to ix86_expand_vector_move. */
16970 /* Code generation for scalar reg-reg moves of single and double precision data:
16971 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16972 movaps reg, reg
16973 else
16974 movss reg, reg
16975 if (x86_sse_partial_reg_dependency == true)
16976 movapd reg, reg
16977 else
16978 movsd reg, reg
16979
16980 Code generation for scalar loads of double precision data:
16981 if (x86_sse_split_regs == true)
16982 movlpd mem, reg (gas syntax)
16983 else
16984 movsd mem, reg
16985
16986 Code generation for unaligned packed loads of single precision data
16987 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16988 if (x86_sse_unaligned_move_optimal)
16989 movups mem, reg
16990
16991 if (x86_sse_partial_reg_dependency == true)
16992 {
16993 xorps reg, reg
16994 movlps mem, reg
16995 movhps mem+8, reg
16996 }
16997 else
16998 {
16999 movlps mem, reg
17000 movhps mem+8, reg
17001 }
17002
17003 Code generation for unaligned packed loads of double precision data
17004 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17005 if (x86_sse_unaligned_move_optimal)
17006 movupd mem, reg
17007
17008 if (x86_sse_split_regs == true)
17009 {
17010 movlpd mem, reg
17011 movhpd mem+8, reg
17012 }
17013 else
17014 {
17015 movsd mem, reg
17016 movhpd mem+8, reg
17017 }
17018 */
17019
17020 void
17021 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17022 {
17023 rtx op0, op1, orig_op0 = NULL_RTX, m;
17024 rtx (*load_unaligned) (rtx, rtx);
17025 rtx (*store_unaligned) (rtx, rtx);
17026
17027 op0 = operands[0];
17028 op1 = operands[1];
17029
17030 if (GET_MODE_SIZE (mode) == 64)
17031 {
17032 switch (GET_MODE_CLASS (mode))
17033 {
17034 case MODE_VECTOR_INT:
17035 case MODE_INT:
17036 if (GET_MODE (op0) != V16SImode)
17037 {
17038 if (!MEM_P (op0))
17039 {
17040 orig_op0 = op0;
17041 op0 = gen_reg_rtx (V16SImode);
17042 }
17043 else
17044 op0 = gen_lowpart (V16SImode, op0);
17045 }
17046 op1 = gen_lowpart (V16SImode, op1);
17047 /* FALLTHRU */
17048
17049 case MODE_VECTOR_FLOAT:
17050 switch (GET_MODE (op0))
17051 {
17052 default:
17053 gcc_unreachable ();
17054 case V16SImode:
17055 load_unaligned = gen_avx512f_loaddquv16si;
17056 store_unaligned = gen_avx512f_storedquv16si;
17057 break;
17058 case V16SFmode:
17059 load_unaligned = gen_avx512f_loadups512;
17060 store_unaligned = gen_avx512f_storeups512;
17061 break;
17062 case V8DFmode:
17063 load_unaligned = gen_avx512f_loadupd512;
17064 store_unaligned = gen_avx512f_storeupd512;
17065 break;
17066 }
17067
17068 if (MEM_P (op1))
17069 emit_insn (load_unaligned (op0, op1));
17070 else if (MEM_P (op0))
17071 emit_insn (store_unaligned (op0, op1));
17072 else
17073 gcc_unreachable ();
17074 if (orig_op0)
17075 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17076 break;
17077
17078 default:
17079 gcc_unreachable ();
17080 }
17081
17082 return;
17083 }
17084
17085 if (TARGET_AVX
17086 && GET_MODE_SIZE (mode) == 32)
17087 {
17088 switch (GET_MODE_CLASS (mode))
17089 {
17090 case MODE_VECTOR_INT:
17091 case MODE_INT:
17092 if (GET_MODE (op0) != V32QImode)
17093 {
17094 if (!MEM_P (op0))
17095 {
17096 orig_op0 = op0;
17097 op0 = gen_reg_rtx (V32QImode);
17098 }
17099 else
17100 op0 = gen_lowpart (V32QImode, op0);
17101 }
17102 op1 = gen_lowpart (V32QImode, op1);
17103 /* FALLTHRU */
17104
17105 case MODE_VECTOR_FLOAT:
17106 ix86_avx256_split_vector_move_misalign (op0, op1);
17107 if (orig_op0)
17108 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17109 break;
17110
17111 default:
17112 gcc_unreachable ();
17113 }
17114
17115 return;
17116 }
17117
17118 if (MEM_P (op1))
17119 {
17120 /* Normal *mov<mode>_internal pattern will handle
17121 unaligned loads just fine if misaligned_operand
17122 is true, and without the UNSPEC it can be combined
17123 with arithmetic instructions. */
17124 if (TARGET_AVX
17125 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17126 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17127 && misaligned_operand (op1, GET_MODE (op1)))
17128 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17129 /* ??? If we have typed data, then it would appear that using
17130 movdqu is the only way to get unaligned data loaded with
17131 integer type. */
17132 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17133 {
17134 if (GET_MODE (op0) != V16QImode)
17135 {
17136 orig_op0 = op0;
17137 op0 = gen_reg_rtx (V16QImode);
17138 }
17139 op1 = gen_lowpart (V16QImode, op1);
17140 /* We will eventually emit movups based on insn attributes. */
17141 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17142 if (orig_op0)
17143 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17144 }
17145 else if (TARGET_SSE2 && mode == V2DFmode)
17146 {
17147 rtx zero;
17148
17149 if (TARGET_AVX
17150 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17151 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17152 || optimize_insn_for_size_p ())
17153 {
17154 /* We will eventually emit movups based on insn attributes. */
17155 emit_insn (gen_sse2_loadupd (op0, op1));
17156 return;
17157 }
17158
17159 /* When SSE registers are split into halves, we can avoid
17160 writing to the top half twice. */
17161 if (TARGET_SSE_SPLIT_REGS)
17162 {
17163 emit_clobber (op0);
17164 zero = op0;
17165 }
17166 else
17167 {
17168 /* ??? Not sure about the best option for the Intel chips.
17169 The following would seem to satisfy; the register is
17170 entirely cleared, breaking the dependency chain. We
17171 then store to the upper half, with a dependency depth
17172 of one. A rumor has it that Intel recommends two movsd
17173 followed by an unpacklpd, but this is unconfirmed. And
17174 given that the dependency depth of the unpacklpd would
17175 still be one, I'm not sure why this would be better. */
17176 zero = CONST0_RTX (V2DFmode);
17177 }
17178
17179 m = adjust_address (op1, DFmode, 0);
17180 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17181 m = adjust_address (op1, DFmode, 8);
17182 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17183 }
17184 else
17185 {
17186 rtx t;
17187
17188 if (TARGET_AVX
17189 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17190 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17191 || optimize_insn_for_size_p ())
17192 {
17193 if (GET_MODE (op0) != V4SFmode)
17194 {
17195 orig_op0 = op0;
17196 op0 = gen_reg_rtx (V4SFmode);
17197 }
17198 op1 = gen_lowpart (V4SFmode, op1);
17199 emit_insn (gen_sse_loadups (op0, op1));
17200 if (orig_op0)
17201 emit_move_insn (orig_op0,
17202 gen_lowpart (GET_MODE (orig_op0), op0));
17203 return;
17204 }
17205
17206 if (mode != V4SFmode)
17207 t = gen_reg_rtx (V4SFmode);
17208 else
17209 t = op0;
17210
17211 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17212 emit_move_insn (t, CONST0_RTX (V4SFmode));
17213 else
17214 emit_clobber (t);
17215
17216 m = adjust_address (op1, V2SFmode, 0);
17217 emit_insn (gen_sse_loadlps (t, t, m));
17218 m = adjust_address (op1, V2SFmode, 8);
17219 emit_insn (gen_sse_loadhps (t, t, m));
17220 if (mode != V4SFmode)
17221 emit_move_insn (op0, gen_lowpart (mode, t));
17222 }
17223 }
17224 else if (MEM_P (op0))
17225 {
17226 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17227 {
17228 op0 = gen_lowpart (V16QImode, op0);
17229 op1 = gen_lowpart (V16QImode, op1);
17230 /* We will eventually emit movups based on insn attributes. */
17231 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17232 }
17233 else if (TARGET_SSE2 && mode == V2DFmode)
17234 {
17235 if (TARGET_AVX
17236 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17237 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17238 || optimize_insn_for_size_p ())
17239 /* We will eventually emit movups based on insn attributes. */
17240 emit_insn (gen_sse2_storeupd (op0, op1));
17241 else
17242 {
17243 m = adjust_address (op0, DFmode, 0);
17244 emit_insn (gen_sse2_storelpd (m, op1));
17245 m = adjust_address (op0, DFmode, 8);
17246 emit_insn (gen_sse2_storehpd (m, op1));
17247 }
17248 }
17249 else
17250 {
17251 if (mode != V4SFmode)
17252 op1 = gen_lowpart (V4SFmode, op1);
17253
17254 if (TARGET_AVX
17255 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17256 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17257 || optimize_insn_for_size_p ())
17258 {
17259 op0 = gen_lowpart (V4SFmode, op0);
17260 emit_insn (gen_sse_storeups (op0, op1));
17261 }
17262 else
17263 {
17264 m = adjust_address (op0, V2SFmode, 0);
17265 emit_insn (gen_sse_storelps (m, op1));
17266 m = adjust_address (op0, V2SFmode, 8);
17267 emit_insn (gen_sse_storehps (m, op1));
17268 }
17269 }
17270 }
17271 else
17272 gcc_unreachable ();
17273 }
17274
17275 /* Helper function of ix86_fixup_binary_operands to canonicalize
17276 operand order. Returns true if the operands should be swapped. */
17277
17278 static bool
17279 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17280 rtx operands[])
17281 {
17282 rtx dst = operands[0];
17283 rtx src1 = operands[1];
17284 rtx src2 = operands[2];
17285
17286 /* If the operation is not commutative, we can't do anything. */
17287 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17288 return false;
17289
17290 /* Highest priority is that src1 should match dst. */
17291 if (rtx_equal_p (dst, src1))
17292 return false;
17293 if (rtx_equal_p (dst, src2))
17294 return true;
17295
17296 /* Next highest priority is that immediate constants come second. */
17297 if (immediate_operand (src2, mode))
17298 return false;
17299 if (immediate_operand (src1, mode))
17300 return true;
17301
17302 /* Lowest priority is that memory references should come second. */
17303 if (MEM_P (src2))
17304 return false;
17305 if (MEM_P (src1))
17306 return true;
17307
17308 return false;
17309 }
17310
17311
17312 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17313 destination to use for the operation. If different from the true
17314 destination in operands[0], a copy operation will be required. */
17315
17316 rtx
17317 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17318 rtx operands[])
17319 {
17320 rtx dst = operands[0];
17321 rtx src1 = operands[1];
17322 rtx src2 = operands[2];
17323
17324 /* Canonicalize operand order. */
17325 if (ix86_swap_binary_operands_p (code, mode, operands))
17326 {
17327 rtx temp;
17328
17329 /* It is invalid to swap operands of different modes. */
17330 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17331
17332 temp = src1;
17333 src1 = src2;
17334 src2 = temp;
17335 }
17336
17337 /* Both source operands cannot be in memory. */
17338 if (MEM_P (src1) && MEM_P (src2))
17339 {
17340 /* Optimization: Only read from memory once. */
17341 if (rtx_equal_p (src1, src2))
17342 {
17343 src2 = force_reg (mode, src2);
17344 src1 = src2;
17345 }
17346 else if (rtx_equal_p (dst, src1))
17347 src2 = force_reg (mode, src2);
17348 else
17349 src1 = force_reg (mode, src1);
17350 }
17351
17352 /* If the destination is memory, and we do not have matching source
17353 operands, do things in registers. */
17354 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17355 dst = gen_reg_rtx (mode);
17356
17357 /* Source 1 cannot be a constant. */
17358 if (CONSTANT_P (src1))
17359 src1 = force_reg (mode, src1);
17360
17361 /* Source 1 cannot be a non-matching memory. */
17362 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17363 src1 = force_reg (mode, src1);
17364
17365 /* Improve address combine. */
17366 if (code == PLUS
17367 && GET_MODE_CLASS (mode) == MODE_INT
17368 && MEM_P (src2))
17369 src2 = force_reg (mode, src2);
17370
17371 operands[1] = src1;
17372 operands[2] = src2;
17373 return dst;
17374 }
17375
17376 /* Similarly, but assume that the destination has already been
17377 set up properly. */
17378
17379 void
17380 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17381 enum machine_mode mode, rtx operands[])
17382 {
17383 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17384 gcc_assert (dst == operands[0]);
17385 }
17386
17387 /* Attempt to expand a binary operator. Make the expansion closer to the
17388 actual machine, then just general_operand, which will allow 3 separate
17389 memory references (one output, two input) in a single insn. */
17390
17391 void
17392 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17393 rtx operands[])
17394 {
17395 rtx src1, src2, dst, op, clob;
17396
17397 dst = ix86_fixup_binary_operands (code, mode, operands);
17398 src1 = operands[1];
17399 src2 = operands[2];
17400
17401 /* Emit the instruction. */
17402
17403 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17404 if (reload_in_progress)
17405 {
17406 /* Reload doesn't know about the flags register, and doesn't know that
17407 it doesn't want to clobber it. We can only do this with PLUS. */
17408 gcc_assert (code == PLUS);
17409 emit_insn (op);
17410 }
17411 else if (reload_completed
17412 && code == PLUS
17413 && !rtx_equal_p (dst, src1))
17414 {
17415 /* This is going to be an LEA; avoid splitting it later. */
17416 emit_insn (op);
17417 }
17418 else
17419 {
17420 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17421 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17422 }
17423
17424 /* Fix up the destination if needed. */
17425 if (dst != operands[0])
17426 emit_move_insn (operands[0], dst);
17427 }
17428
17429 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17430 the given OPERANDS. */
17431
17432 void
17433 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17434 rtx operands[])
17435 {
17436 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17437 if (GET_CODE (operands[1]) == SUBREG)
17438 {
17439 op1 = operands[1];
17440 op2 = operands[2];
17441 }
17442 else if (GET_CODE (operands[2]) == SUBREG)
17443 {
17444 op1 = operands[2];
17445 op2 = operands[1];
17446 }
17447 /* Optimize (__m128i) d | (__m128i) e and similar code
17448 when d and e are float vectors into float vector logical
17449 insn. In C/C++ without using intrinsics there is no other way
17450 to express vector logical operation on float vectors than
17451 to cast them temporarily to integer vectors. */
17452 if (op1
17453 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17454 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17455 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17456 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17457 && SUBREG_BYTE (op1) == 0
17458 && (GET_CODE (op2) == CONST_VECTOR
17459 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17460 && SUBREG_BYTE (op2) == 0))
17461 && can_create_pseudo_p ())
17462 {
17463 rtx dst;
17464 switch (GET_MODE (SUBREG_REG (op1)))
17465 {
17466 case V4SFmode:
17467 case V8SFmode:
17468 case V2DFmode:
17469 case V4DFmode:
17470 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17471 if (GET_CODE (op2) == CONST_VECTOR)
17472 {
17473 op2 = gen_lowpart (GET_MODE (dst), op2);
17474 op2 = force_reg (GET_MODE (dst), op2);
17475 }
17476 else
17477 {
17478 op1 = operands[1];
17479 op2 = SUBREG_REG (operands[2]);
17480 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17481 op2 = force_reg (GET_MODE (dst), op2);
17482 }
17483 op1 = SUBREG_REG (op1);
17484 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17485 op1 = force_reg (GET_MODE (dst), op1);
17486 emit_insn (gen_rtx_SET (VOIDmode, dst,
17487 gen_rtx_fmt_ee (code, GET_MODE (dst),
17488 op1, op2)));
17489 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17490 return;
17491 default:
17492 break;
17493 }
17494 }
17495 if (!nonimmediate_operand (operands[1], mode))
17496 operands[1] = force_reg (mode, operands[1]);
17497 if (!nonimmediate_operand (operands[2], mode))
17498 operands[2] = force_reg (mode, operands[2]);
17499 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17500 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17501 gen_rtx_fmt_ee (code, mode, operands[1],
17502 operands[2])));
17503 }
17504
17505 /* Return TRUE or FALSE depending on whether the binary operator meets the
17506 appropriate constraints. */
17507
17508 bool
17509 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17510 rtx operands[3])
17511 {
17512 rtx dst = operands[0];
17513 rtx src1 = operands[1];
17514 rtx src2 = operands[2];
17515
17516 /* Both source operands cannot be in memory. */
17517 if (MEM_P (src1) && MEM_P (src2))
17518 return false;
17519
17520 /* Canonicalize operand order for commutative operators. */
17521 if (ix86_swap_binary_operands_p (code, mode, operands))
17522 {
17523 rtx temp = src1;
17524 src1 = src2;
17525 src2 = temp;
17526 }
17527
17528 /* If the destination is memory, we must have a matching source operand. */
17529 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17530 return false;
17531
17532 /* Source 1 cannot be a constant. */
17533 if (CONSTANT_P (src1))
17534 return false;
17535
17536 /* Source 1 cannot be a non-matching memory. */
17537 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17538 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17539 return (code == AND
17540 && (mode == HImode
17541 || mode == SImode
17542 || (TARGET_64BIT && mode == DImode))
17543 && satisfies_constraint_L (src2));
17544
17545 return true;
17546 }
17547
17548 /* Attempt to expand a unary operator. Make the expansion closer to the
17549 actual machine, then just general_operand, which will allow 2 separate
17550 memory references (one output, one input) in a single insn. */
17551
17552 void
17553 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17554 rtx operands[])
17555 {
17556 int matching_memory;
17557 rtx src, dst, op, clob;
17558
17559 dst = operands[0];
17560 src = operands[1];
17561
17562 /* If the destination is memory, and we do not have matching source
17563 operands, do things in registers. */
17564 matching_memory = 0;
17565 if (MEM_P (dst))
17566 {
17567 if (rtx_equal_p (dst, src))
17568 matching_memory = 1;
17569 else
17570 dst = gen_reg_rtx (mode);
17571 }
17572
17573 /* When source operand is memory, destination must match. */
17574 if (MEM_P (src) && !matching_memory)
17575 src = force_reg (mode, src);
17576
17577 /* Emit the instruction. */
17578
17579 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17580 if (reload_in_progress || code == NOT)
17581 {
17582 /* Reload doesn't know about the flags register, and doesn't know that
17583 it doesn't want to clobber it. */
17584 gcc_assert (code == NOT);
17585 emit_insn (op);
17586 }
17587 else
17588 {
17589 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17590 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17591 }
17592
17593 /* Fix up the destination if needed. */
17594 if (dst != operands[0])
17595 emit_move_insn (operands[0], dst);
17596 }
17597
17598 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17599 divisor are within the range [0-255]. */
17600
17601 void
17602 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17603 bool signed_p)
17604 {
17605 rtx end_label, qimode_label;
17606 rtx insn, div, mod;
17607 rtx scratch, tmp0, tmp1, tmp2;
17608 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17609 rtx (*gen_zero_extend) (rtx, rtx);
17610 rtx (*gen_test_ccno_1) (rtx, rtx);
17611
17612 switch (mode)
17613 {
17614 case SImode:
17615 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17616 gen_test_ccno_1 = gen_testsi_ccno_1;
17617 gen_zero_extend = gen_zero_extendqisi2;
17618 break;
17619 case DImode:
17620 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17621 gen_test_ccno_1 = gen_testdi_ccno_1;
17622 gen_zero_extend = gen_zero_extendqidi2;
17623 break;
17624 default:
17625 gcc_unreachable ();
17626 }
17627
17628 end_label = gen_label_rtx ();
17629 qimode_label = gen_label_rtx ();
17630
17631 scratch = gen_reg_rtx (mode);
17632
17633 /* Use 8bit unsigned divimod if dividend and divisor are within
17634 the range [0-255]. */
17635 emit_move_insn (scratch, operands[2]);
17636 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17637 scratch, 1, OPTAB_DIRECT);
17638 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17639 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17640 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17641 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17642 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17643 pc_rtx);
17644 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17645 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17646 JUMP_LABEL (insn) = qimode_label;
17647
17648 /* Generate original signed/unsigned divimod. */
17649 div = gen_divmod4_1 (operands[0], operands[1],
17650 operands[2], operands[3]);
17651 emit_insn (div);
17652
17653 /* Branch to the end. */
17654 emit_jump_insn (gen_jump (end_label));
17655 emit_barrier ();
17656
17657 /* Generate 8bit unsigned divide. */
17658 emit_label (qimode_label);
17659 /* Don't use operands[0] for result of 8bit divide since not all
17660 registers support QImode ZERO_EXTRACT. */
17661 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17662 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17663 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17664 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17665
17666 if (signed_p)
17667 {
17668 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17669 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17670 }
17671 else
17672 {
17673 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17674 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17675 }
17676
17677 /* Extract remainder from AH. */
17678 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17679 if (REG_P (operands[1]))
17680 insn = emit_move_insn (operands[1], tmp1);
17681 else
17682 {
17683 /* Need a new scratch register since the old one has result
17684 of 8bit divide. */
17685 scratch = gen_reg_rtx (mode);
17686 emit_move_insn (scratch, tmp1);
17687 insn = emit_move_insn (operands[1], scratch);
17688 }
17689 set_unique_reg_note (insn, REG_EQUAL, mod);
17690
17691 /* Zero extend quotient from AL. */
17692 tmp1 = gen_lowpart (QImode, tmp0);
17693 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17694 set_unique_reg_note (insn, REG_EQUAL, div);
17695
17696 emit_label (end_label);
17697 }
17698
17699 /* Whether it is OK to emit CFI directives when emitting asm code. */
17700
17701 bool
17702 ix86_emit_cfi ()
17703 {
17704 return dwarf2out_do_cfi_asm ();
17705 }
17706
17707 #define LEA_MAX_STALL (3)
17708 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17709
17710 /* Increase given DISTANCE in half-cycles according to
17711 dependencies between PREV and NEXT instructions.
17712 Add 1 half-cycle if there is no dependency and
17713 go to next cycle if there is some dependecy. */
17714
17715 static unsigned int
17716 increase_distance (rtx prev, rtx next, unsigned int distance)
17717 {
17718 df_ref *use_rec;
17719 df_ref *def_rec;
17720
17721 if (!prev || !next)
17722 return distance + (distance & 1) + 2;
17723
17724 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17725 return distance + 1;
17726
17727 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
17728 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
17729 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
17730 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
17731 return distance + (distance & 1) + 2;
17732
17733 return distance + 1;
17734 }
17735
17736 /* Function checks if instruction INSN defines register number
17737 REGNO1 or REGNO2. */
17738
17739 static bool
17740 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17741 rtx insn)
17742 {
17743 df_ref *def_rec;
17744
17745 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
17746 if (DF_REF_REG_DEF_P (*def_rec)
17747 && !DF_REF_IS_ARTIFICIAL (*def_rec)
17748 && (regno1 == DF_REF_REGNO (*def_rec)
17749 || regno2 == DF_REF_REGNO (*def_rec)))
17750 {
17751 return true;
17752 }
17753
17754 return false;
17755 }
17756
17757 /* Function checks if instruction INSN uses register number
17758 REGNO as a part of address expression. */
17759
17760 static bool
17761 insn_uses_reg_mem (unsigned int regno, rtx insn)
17762 {
17763 df_ref *use_rec;
17764
17765 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
17766 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
17767 return true;
17768
17769 return false;
17770 }
17771
17772 /* Search backward for non-agu definition of register number REGNO1
17773 or register number REGNO2 in basic block starting from instruction
17774 START up to head of basic block or instruction INSN.
17775
17776 Function puts true value into *FOUND var if definition was found
17777 and false otherwise.
17778
17779 Distance in half-cycles between START and found instruction or head
17780 of BB is added to DISTANCE and returned. */
17781
17782 static int
17783 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17784 rtx insn, int distance,
17785 rtx start, bool *found)
17786 {
17787 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17788 rtx prev = start;
17789 rtx next = NULL;
17790
17791 *found = false;
17792
17793 while (prev
17794 && prev != insn
17795 && distance < LEA_SEARCH_THRESHOLD)
17796 {
17797 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17798 {
17799 distance = increase_distance (prev, next, distance);
17800 if (insn_defines_reg (regno1, regno2, prev))
17801 {
17802 if (recog_memoized (prev) < 0
17803 || get_attr_type (prev) != TYPE_LEA)
17804 {
17805 *found = true;
17806 return distance;
17807 }
17808 }
17809
17810 next = prev;
17811 }
17812 if (prev == BB_HEAD (bb))
17813 break;
17814
17815 prev = PREV_INSN (prev);
17816 }
17817
17818 return distance;
17819 }
17820
17821 /* Search backward for non-agu definition of register number REGNO1
17822 or register number REGNO2 in INSN's basic block until
17823 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17824 2. Reach neighbour BBs boundary, or
17825 3. Reach agu definition.
17826 Returns the distance between the non-agu definition point and INSN.
17827 If no definition point, returns -1. */
17828
17829 static int
17830 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17831 rtx insn)
17832 {
17833 basic_block bb = BLOCK_FOR_INSN (insn);
17834 int distance = 0;
17835 bool found = false;
17836
17837 if (insn != BB_HEAD (bb))
17838 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17839 distance, PREV_INSN (insn),
17840 &found);
17841
17842 if (!found && distance < LEA_SEARCH_THRESHOLD)
17843 {
17844 edge e;
17845 edge_iterator ei;
17846 bool simple_loop = false;
17847
17848 FOR_EACH_EDGE (e, ei, bb->preds)
17849 if (e->src == bb)
17850 {
17851 simple_loop = true;
17852 break;
17853 }
17854
17855 if (simple_loop)
17856 distance = distance_non_agu_define_in_bb (regno1, regno2,
17857 insn, distance,
17858 BB_END (bb), &found);
17859 else
17860 {
17861 int shortest_dist = -1;
17862 bool found_in_bb = false;
17863
17864 FOR_EACH_EDGE (e, ei, bb->preds)
17865 {
17866 int bb_dist
17867 = distance_non_agu_define_in_bb (regno1, regno2,
17868 insn, distance,
17869 BB_END (e->src),
17870 &found_in_bb);
17871 if (found_in_bb)
17872 {
17873 if (shortest_dist < 0)
17874 shortest_dist = bb_dist;
17875 else if (bb_dist > 0)
17876 shortest_dist = MIN (bb_dist, shortest_dist);
17877
17878 found = true;
17879 }
17880 }
17881
17882 distance = shortest_dist;
17883 }
17884 }
17885
17886 /* get_attr_type may modify recog data. We want to make sure
17887 that recog data is valid for instruction INSN, on which
17888 distance_non_agu_define is called. INSN is unchanged here. */
17889 extract_insn_cached (insn);
17890
17891 if (!found)
17892 return -1;
17893
17894 return distance >> 1;
17895 }
17896
17897 /* Return the distance in half-cycles between INSN and the next
17898 insn that uses register number REGNO in memory address added
17899 to DISTANCE. Return -1 if REGNO0 is set.
17900
17901 Put true value into *FOUND if register usage was found and
17902 false otherwise.
17903 Put true value into *REDEFINED if register redefinition was
17904 found and false otherwise. */
17905
17906 static int
17907 distance_agu_use_in_bb (unsigned int regno,
17908 rtx insn, int distance, rtx start,
17909 bool *found, bool *redefined)
17910 {
17911 basic_block bb = NULL;
17912 rtx next = start;
17913 rtx prev = NULL;
17914
17915 *found = false;
17916 *redefined = false;
17917
17918 if (start != NULL_RTX)
17919 {
17920 bb = BLOCK_FOR_INSN (start);
17921 if (start != BB_HEAD (bb))
17922 /* If insn and start belong to the same bb, set prev to insn,
17923 so the call to increase_distance will increase the distance
17924 between insns by 1. */
17925 prev = insn;
17926 }
17927
17928 while (next
17929 && next != insn
17930 && distance < LEA_SEARCH_THRESHOLD)
17931 {
17932 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17933 {
17934 distance = increase_distance(prev, next, distance);
17935 if (insn_uses_reg_mem (regno, next))
17936 {
17937 /* Return DISTANCE if OP0 is used in memory
17938 address in NEXT. */
17939 *found = true;
17940 return distance;
17941 }
17942
17943 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17944 {
17945 /* Return -1 if OP0 is set in NEXT. */
17946 *redefined = true;
17947 return -1;
17948 }
17949
17950 prev = next;
17951 }
17952
17953 if (next == BB_END (bb))
17954 break;
17955
17956 next = NEXT_INSN (next);
17957 }
17958
17959 return distance;
17960 }
17961
17962 /* Return the distance between INSN and the next insn that uses
17963 register number REGNO0 in memory address. Return -1 if no such
17964 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17965
17966 static int
17967 distance_agu_use (unsigned int regno0, rtx insn)
17968 {
17969 basic_block bb = BLOCK_FOR_INSN (insn);
17970 int distance = 0;
17971 bool found = false;
17972 bool redefined = false;
17973
17974 if (insn != BB_END (bb))
17975 distance = distance_agu_use_in_bb (regno0, insn, distance,
17976 NEXT_INSN (insn),
17977 &found, &redefined);
17978
17979 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
17980 {
17981 edge e;
17982 edge_iterator ei;
17983 bool simple_loop = false;
17984
17985 FOR_EACH_EDGE (e, ei, bb->succs)
17986 if (e->dest == bb)
17987 {
17988 simple_loop = true;
17989 break;
17990 }
17991
17992 if (simple_loop)
17993 distance = distance_agu_use_in_bb (regno0, insn,
17994 distance, BB_HEAD (bb),
17995 &found, &redefined);
17996 else
17997 {
17998 int shortest_dist = -1;
17999 bool found_in_bb = false;
18000 bool redefined_in_bb = false;
18001
18002 FOR_EACH_EDGE (e, ei, bb->succs)
18003 {
18004 int bb_dist
18005 = distance_agu_use_in_bb (regno0, insn,
18006 distance, BB_HEAD (e->dest),
18007 &found_in_bb, &redefined_in_bb);
18008 if (found_in_bb)
18009 {
18010 if (shortest_dist < 0)
18011 shortest_dist = bb_dist;
18012 else if (bb_dist > 0)
18013 shortest_dist = MIN (bb_dist, shortest_dist);
18014
18015 found = true;
18016 }
18017 }
18018
18019 distance = shortest_dist;
18020 }
18021 }
18022
18023 if (!found || redefined)
18024 return -1;
18025
18026 return distance >> 1;
18027 }
18028
18029 /* Define this macro to tune LEA priority vs ADD, it take effect when
18030 there is a dilemma of choicing LEA or ADD
18031 Negative value: ADD is more preferred than LEA
18032 Zero: Netrual
18033 Positive value: LEA is more preferred than ADD*/
18034 #define IX86_LEA_PRIORITY 0
18035
18036 /* Return true if usage of lea INSN has performance advantage
18037 over a sequence of instructions. Instructions sequence has
18038 SPLIT_COST cycles higher latency than lea latency. */
18039
18040 static bool
18041 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18042 unsigned int regno2, int split_cost, bool has_scale)
18043 {
18044 int dist_define, dist_use;
18045
18046 /* For Silvermont if using a 2-source or 3-source LEA for
18047 non-destructive destination purposes, or due to wanting
18048 ability to use SCALE, the use of LEA is justified. */
18049 if (TARGET_SILVERMONT || TARGET_INTEL)
18050 {
18051 if (has_scale)
18052 return true;
18053 if (split_cost < 1)
18054 return false;
18055 if (regno0 == regno1 || regno0 == regno2)
18056 return false;
18057 return true;
18058 }
18059
18060 dist_define = distance_non_agu_define (regno1, regno2, insn);
18061 dist_use = distance_agu_use (regno0, insn);
18062
18063 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18064 {
18065 /* If there is no non AGU operand definition, no AGU
18066 operand usage and split cost is 0 then both lea
18067 and non lea variants have same priority. Currently
18068 we prefer lea for 64 bit code and non lea on 32 bit
18069 code. */
18070 if (dist_use < 0 && split_cost == 0)
18071 return TARGET_64BIT || IX86_LEA_PRIORITY;
18072 else
18073 return true;
18074 }
18075
18076 /* With longer definitions distance lea is more preferable.
18077 Here we change it to take into account splitting cost and
18078 lea priority. */
18079 dist_define += split_cost + IX86_LEA_PRIORITY;
18080
18081 /* If there is no use in memory addess then we just check
18082 that split cost exceeds AGU stall. */
18083 if (dist_use < 0)
18084 return dist_define > LEA_MAX_STALL;
18085
18086 /* If this insn has both backward non-agu dependence and forward
18087 agu dependence, the one with short distance takes effect. */
18088 return dist_define >= dist_use;
18089 }
18090
18091 /* Return true if it is legal to clobber flags by INSN and
18092 false otherwise. */
18093
18094 static bool
18095 ix86_ok_to_clobber_flags (rtx insn)
18096 {
18097 basic_block bb = BLOCK_FOR_INSN (insn);
18098 df_ref *use;
18099 bitmap live;
18100
18101 while (insn)
18102 {
18103 if (NONDEBUG_INSN_P (insn))
18104 {
18105 for (use = DF_INSN_USES (insn); *use; use++)
18106 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
18107 return false;
18108
18109 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18110 return true;
18111 }
18112
18113 if (insn == BB_END (bb))
18114 break;
18115
18116 insn = NEXT_INSN (insn);
18117 }
18118
18119 live = df_get_live_out(bb);
18120 return !REGNO_REG_SET_P (live, FLAGS_REG);
18121 }
18122
18123 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18124 move and add to avoid AGU stalls. */
18125
18126 bool
18127 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18128 {
18129 unsigned int regno0, regno1, regno2;
18130
18131 /* Check if we need to optimize. */
18132 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18133 return false;
18134
18135 /* Check it is correct to split here. */
18136 if (!ix86_ok_to_clobber_flags(insn))
18137 return false;
18138
18139 regno0 = true_regnum (operands[0]);
18140 regno1 = true_regnum (operands[1]);
18141 regno2 = true_regnum (operands[2]);
18142
18143 /* We need to split only adds with non destructive
18144 destination operand. */
18145 if (regno0 == regno1 || regno0 == regno2)
18146 return false;
18147 else
18148 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18149 }
18150
18151 /* Return true if we should emit lea instruction instead of mov
18152 instruction. */
18153
18154 bool
18155 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18156 {
18157 unsigned int regno0, regno1;
18158
18159 /* Check if we need to optimize. */
18160 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18161 return false;
18162
18163 /* Use lea for reg to reg moves only. */
18164 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18165 return false;
18166
18167 regno0 = true_regnum (operands[0]);
18168 regno1 = true_regnum (operands[1]);
18169
18170 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18171 }
18172
18173 /* Return true if we need to split lea into a sequence of
18174 instructions to avoid AGU stalls. */
18175
18176 bool
18177 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18178 {
18179 unsigned int regno0, regno1, regno2;
18180 int split_cost;
18181 struct ix86_address parts;
18182 int ok;
18183
18184 /* Check we need to optimize. */
18185 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18186 return false;
18187
18188 /* The "at least two components" test below might not catch simple
18189 move or zero extension insns if parts.base is non-NULL and parts.disp
18190 is const0_rtx as the only components in the address, e.g. if the
18191 register is %rbp or %r13. As this test is much cheaper and moves or
18192 zero extensions are the common case, do this check first. */
18193 if (REG_P (operands[1])
18194 || (SImode_address_operand (operands[1], VOIDmode)
18195 && REG_P (XEXP (operands[1], 0))))
18196 return false;
18197
18198 /* Check if it is OK to split here. */
18199 if (!ix86_ok_to_clobber_flags (insn))
18200 return false;
18201
18202 ok = ix86_decompose_address (operands[1], &parts);
18203 gcc_assert (ok);
18204
18205 /* There should be at least two components in the address. */
18206 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18207 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18208 return false;
18209
18210 /* We should not split into add if non legitimate pic
18211 operand is used as displacement. */
18212 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18213 return false;
18214
18215 regno0 = true_regnum (operands[0]) ;
18216 regno1 = INVALID_REGNUM;
18217 regno2 = INVALID_REGNUM;
18218
18219 if (parts.base)
18220 regno1 = true_regnum (parts.base);
18221 if (parts.index)
18222 regno2 = true_regnum (parts.index);
18223
18224 split_cost = 0;
18225
18226 /* Compute how many cycles we will add to execution time
18227 if split lea into a sequence of instructions. */
18228 if (parts.base || parts.index)
18229 {
18230 /* Have to use mov instruction if non desctructive
18231 destination form is used. */
18232 if (regno1 != regno0 && regno2 != regno0)
18233 split_cost += 1;
18234
18235 /* Have to add index to base if both exist. */
18236 if (parts.base && parts.index)
18237 split_cost += 1;
18238
18239 /* Have to use shift and adds if scale is 2 or greater. */
18240 if (parts.scale > 1)
18241 {
18242 if (regno0 != regno1)
18243 split_cost += 1;
18244 else if (regno2 == regno0)
18245 split_cost += 4;
18246 else
18247 split_cost += parts.scale;
18248 }
18249
18250 /* Have to use add instruction with immediate if
18251 disp is non zero. */
18252 if (parts.disp && parts.disp != const0_rtx)
18253 split_cost += 1;
18254
18255 /* Subtract the price of lea. */
18256 split_cost -= 1;
18257 }
18258
18259 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18260 parts.scale > 1);
18261 }
18262
18263 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18264 matches destination. RTX includes clobber of FLAGS_REG. */
18265
18266 static void
18267 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18268 rtx dst, rtx src)
18269 {
18270 rtx op, clob;
18271
18272 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18273 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18274
18275 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18276 }
18277
18278 /* Return true if regno1 def is nearest to the insn. */
18279
18280 static bool
18281 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18282 {
18283 rtx prev = insn;
18284 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18285
18286 if (insn == start)
18287 return false;
18288 while (prev && prev != start)
18289 {
18290 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18291 {
18292 prev = PREV_INSN (prev);
18293 continue;
18294 }
18295 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18296 return true;
18297 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18298 return false;
18299 prev = PREV_INSN (prev);
18300 }
18301
18302 /* None of the regs is defined in the bb. */
18303 return false;
18304 }
18305
18306 /* Split lea instructions into a sequence of instructions
18307 which are executed on ALU to avoid AGU stalls.
18308 It is assumed that it is allowed to clobber flags register
18309 at lea position. */
18310
18311 void
18312 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18313 {
18314 unsigned int regno0, regno1, regno2;
18315 struct ix86_address parts;
18316 rtx target, tmp;
18317 int ok, adds;
18318
18319 ok = ix86_decompose_address (operands[1], &parts);
18320 gcc_assert (ok);
18321
18322 target = gen_lowpart (mode, operands[0]);
18323
18324 regno0 = true_regnum (target);
18325 regno1 = INVALID_REGNUM;
18326 regno2 = INVALID_REGNUM;
18327
18328 if (parts.base)
18329 {
18330 parts.base = gen_lowpart (mode, parts.base);
18331 regno1 = true_regnum (parts.base);
18332 }
18333
18334 if (parts.index)
18335 {
18336 parts.index = gen_lowpart (mode, parts.index);
18337 regno2 = true_regnum (parts.index);
18338 }
18339
18340 if (parts.disp)
18341 parts.disp = gen_lowpart (mode, parts.disp);
18342
18343 if (parts.scale > 1)
18344 {
18345 /* Case r1 = r1 + ... */
18346 if (regno1 == regno0)
18347 {
18348 /* If we have a case r1 = r1 + C * r2 then we
18349 should use multiplication which is very
18350 expensive. Assume cost model is wrong if we
18351 have such case here. */
18352 gcc_assert (regno2 != regno0);
18353
18354 for (adds = parts.scale; adds > 0; adds--)
18355 ix86_emit_binop (PLUS, mode, target, parts.index);
18356 }
18357 else
18358 {
18359 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18360 if (regno0 != regno2)
18361 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18362
18363 /* Use shift for scaling. */
18364 ix86_emit_binop (ASHIFT, mode, target,
18365 GEN_INT (exact_log2 (parts.scale)));
18366
18367 if (parts.base)
18368 ix86_emit_binop (PLUS, mode, target, parts.base);
18369
18370 if (parts.disp && parts.disp != const0_rtx)
18371 ix86_emit_binop (PLUS, mode, target, parts.disp);
18372 }
18373 }
18374 else if (!parts.base && !parts.index)
18375 {
18376 gcc_assert(parts.disp);
18377 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18378 }
18379 else
18380 {
18381 if (!parts.base)
18382 {
18383 if (regno0 != regno2)
18384 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18385 }
18386 else if (!parts.index)
18387 {
18388 if (regno0 != regno1)
18389 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18390 }
18391 else
18392 {
18393 if (regno0 == regno1)
18394 tmp = parts.index;
18395 else if (regno0 == regno2)
18396 tmp = parts.base;
18397 else
18398 {
18399 rtx tmp1;
18400
18401 /* Find better operand for SET instruction, depending
18402 on which definition is farther from the insn. */
18403 if (find_nearest_reg_def (insn, regno1, regno2))
18404 tmp = parts.index, tmp1 = parts.base;
18405 else
18406 tmp = parts.base, tmp1 = parts.index;
18407
18408 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18409
18410 if (parts.disp && parts.disp != const0_rtx)
18411 ix86_emit_binop (PLUS, mode, target, parts.disp);
18412
18413 ix86_emit_binop (PLUS, mode, target, tmp1);
18414 return;
18415 }
18416
18417 ix86_emit_binop (PLUS, mode, target, tmp);
18418 }
18419
18420 if (parts.disp && parts.disp != const0_rtx)
18421 ix86_emit_binop (PLUS, mode, target, parts.disp);
18422 }
18423 }
18424
18425 /* Return true if it is ok to optimize an ADD operation to LEA
18426 operation to avoid flag register consumation. For most processors,
18427 ADD is faster than LEA. For the processors like BONNELL, if the
18428 destination register of LEA holds an actual address which will be
18429 used soon, LEA is better and otherwise ADD is better. */
18430
18431 bool
18432 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18433 {
18434 unsigned int regno0 = true_regnum (operands[0]);
18435 unsigned int regno1 = true_regnum (operands[1]);
18436 unsigned int regno2 = true_regnum (operands[2]);
18437
18438 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18439 if (regno0 != regno1 && regno0 != regno2)
18440 return true;
18441
18442 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18443 return false;
18444
18445 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18446 }
18447
18448 /* Return true if destination reg of SET_BODY is shift count of
18449 USE_BODY. */
18450
18451 static bool
18452 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18453 {
18454 rtx set_dest;
18455 rtx shift_rtx;
18456 int i;
18457
18458 /* Retrieve destination of SET_BODY. */
18459 switch (GET_CODE (set_body))
18460 {
18461 case SET:
18462 set_dest = SET_DEST (set_body);
18463 if (!set_dest || !REG_P (set_dest))
18464 return false;
18465 break;
18466 case PARALLEL:
18467 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18468 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18469 use_body))
18470 return true;
18471 default:
18472 return false;
18473 break;
18474 }
18475
18476 /* Retrieve shift count of USE_BODY. */
18477 switch (GET_CODE (use_body))
18478 {
18479 case SET:
18480 shift_rtx = XEXP (use_body, 1);
18481 break;
18482 case PARALLEL:
18483 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18484 if (ix86_dep_by_shift_count_body (set_body,
18485 XVECEXP (use_body, 0, i)))
18486 return true;
18487 default:
18488 return false;
18489 break;
18490 }
18491
18492 if (shift_rtx
18493 && (GET_CODE (shift_rtx) == ASHIFT
18494 || GET_CODE (shift_rtx) == LSHIFTRT
18495 || GET_CODE (shift_rtx) == ASHIFTRT
18496 || GET_CODE (shift_rtx) == ROTATE
18497 || GET_CODE (shift_rtx) == ROTATERT))
18498 {
18499 rtx shift_count = XEXP (shift_rtx, 1);
18500
18501 /* Return true if shift count is dest of SET_BODY. */
18502 if (REG_P (shift_count))
18503 {
18504 /* Add check since it can be invoked before register
18505 allocation in pre-reload schedule. */
18506 if (reload_completed
18507 && true_regnum (set_dest) == true_regnum (shift_count))
18508 return true;
18509 else if (REGNO(set_dest) == REGNO(shift_count))
18510 return true;
18511 }
18512 }
18513
18514 return false;
18515 }
18516
18517 /* Return true if destination reg of SET_INSN is shift count of
18518 USE_INSN. */
18519
18520 bool
18521 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18522 {
18523 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18524 PATTERN (use_insn));
18525 }
18526
18527 /* Return TRUE or FALSE depending on whether the unary operator meets the
18528 appropriate constraints. */
18529
18530 bool
18531 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18532 enum machine_mode mode ATTRIBUTE_UNUSED,
18533 rtx operands[2])
18534 {
18535 /* If one of operands is memory, source and destination must match. */
18536 if ((MEM_P (operands[0])
18537 || MEM_P (operands[1]))
18538 && ! rtx_equal_p (operands[0], operands[1]))
18539 return false;
18540 return true;
18541 }
18542
18543 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18544 are ok, keeping in mind the possible movddup alternative. */
18545
18546 bool
18547 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18548 {
18549 if (MEM_P (operands[0]))
18550 return rtx_equal_p (operands[0], operands[1 + high]);
18551 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18552 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18553 return true;
18554 }
18555
18556 /* Post-reload splitter for converting an SF or DFmode value in an
18557 SSE register into an unsigned SImode. */
18558
18559 void
18560 ix86_split_convert_uns_si_sse (rtx operands[])
18561 {
18562 enum machine_mode vecmode;
18563 rtx value, large, zero_or_two31, input, two31, x;
18564
18565 large = operands[1];
18566 zero_or_two31 = operands[2];
18567 input = operands[3];
18568 two31 = operands[4];
18569 vecmode = GET_MODE (large);
18570 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18571
18572 /* Load up the value into the low element. We must ensure that the other
18573 elements are valid floats -- zero is the easiest such value. */
18574 if (MEM_P (input))
18575 {
18576 if (vecmode == V4SFmode)
18577 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18578 else
18579 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18580 }
18581 else
18582 {
18583 input = gen_rtx_REG (vecmode, REGNO (input));
18584 emit_move_insn (value, CONST0_RTX (vecmode));
18585 if (vecmode == V4SFmode)
18586 emit_insn (gen_sse_movss (value, value, input));
18587 else
18588 emit_insn (gen_sse2_movsd (value, value, input));
18589 }
18590
18591 emit_move_insn (large, two31);
18592 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18593
18594 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18595 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18596
18597 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18598 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18599
18600 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18601 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18602
18603 large = gen_rtx_REG (V4SImode, REGNO (large));
18604 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18605
18606 x = gen_rtx_REG (V4SImode, REGNO (value));
18607 if (vecmode == V4SFmode)
18608 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18609 else
18610 emit_insn (gen_sse2_cvttpd2dq (x, value));
18611 value = x;
18612
18613 emit_insn (gen_xorv4si3 (value, value, large));
18614 }
18615
18616 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18617 Expects the 64-bit DImode to be supplied in a pair of integral
18618 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18619 -mfpmath=sse, !optimize_size only. */
18620
18621 void
18622 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18623 {
18624 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18625 rtx int_xmm, fp_xmm;
18626 rtx biases, exponents;
18627 rtx x;
18628
18629 int_xmm = gen_reg_rtx (V4SImode);
18630 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18631 emit_insn (gen_movdi_to_sse (int_xmm, input));
18632 else if (TARGET_SSE_SPLIT_REGS)
18633 {
18634 emit_clobber (int_xmm);
18635 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18636 }
18637 else
18638 {
18639 x = gen_reg_rtx (V2DImode);
18640 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18641 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18642 }
18643
18644 x = gen_rtx_CONST_VECTOR (V4SImode,
18645 gen_rtvec (4, GEN_INT (0x43300000UL),
18646 GEN_INT (0x45300000UL),
18647 const0_rtx, const0_rtx));
18648 exponents = validize_mem (force_const_mem (V4SImode, x));
18649
18650 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18651 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18652
18653 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18654 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18655 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18656 (0x1.0p84 + double(fp_value_hi_xmm)).
18657 Note these exponents differ by 32. */
18658
18659 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18660
18661 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18662 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18663 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18664 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18665 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18666 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18667 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18668 biases = validize_mem (force_const_mem (V2DFmode, biases));
18669 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18670
18671 /* Add the upper and lower DFmode values together. */
18672 if (TARGET_SSE3)
18673 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18674 else
18675 {
18676 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18677 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18678 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18679 }
18680
18681 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18682 }
18683
18684 /* Not used, but eases macroization of patterns. */
18685 void
18686 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18687 rtx input ATTRIBUTE_UNUSED)
18688 {
18689 gcc_unreachable ();
18690 }
18691
18692 /* Convert an unsigned SImode value into a DFmode. Only currently used
18693 for SSE, but applicable anywhere. */
18694
18695 void
18696 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18697 {
18698 REAL_VALUE_TYPE TWO31r;
18699 rtx x, fp;
18700
18701 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18702 NULL, 1, OPTAB_DIRECT);
18703
18704 fp = gen_reg_rtx (DFmode);
18705 emit_insn (gen_floatsidf2 (fp, x));
18706
18707 real_ldexp (&TWO31r, &dconst1, 31);
18708 x = const_double_from_real_value (TWO31r, DFmode);
18709
18710 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18711 if (x != target)
18712 emit_move_insn (target, x);
18713 }
18714
18715 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18716 32-bit mode; otherwise we have a direct convert instruction. */
18717
18718 void
18719 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18720 {
18721 REAL_VALUE_TYPE TWO32r;
18722 rtx fp_lo, fp_hi, x;
18723
18724 fp_lo = gen_reg_rtx (DFmode);
18725 fp_hi = gen_reg_rtx (DFmode);
18726
18727 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18728
18729 real_ldexp (&TWO32r, &dconst1, 32);
18730 x = const_double_from_real_value (TWO32r, DFmode);
18731 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18732
18733 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18734
18735 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18736 0, OPTAB_DIRECT);
18737 if (x != target)
18738 emit_move_insn (target, x);
18739 }
18740
18741 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18742 For x86_32, -mfpmath=sse, !optimize_size only. */
18743 void
18744 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18745 {
18746 REAL_VALUE_TYPE ONE16r;
18747 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18748
18749 real_ldexp (&ONE16r, &dconst1, 16);
18750 x = const_double_from_real_value (ONE16r, SFmode);
18751 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18752 NULL, 0, OPTAB_DIRECT);
18753 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18754 NULL, 0, OPTAB_DIRECT);
18755 fp_hi = gen_reg_rtx (SFmode);
18756 fp_lo = gen_reg_rtx (SFmode);
18757 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18758 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18759 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18760 0, OPTAB_DIRECT);
18761 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18762 0, OPTAB_DIRECT);
18763 if (!rtx_equal_p (target, fp_hi))
18764 emit_move_insn (target, fp_hi);
18765 }
18766
18767 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18768 a vector of unsigned ints VAL to vector of floats TARGET. */
18769
18770 void
18771 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18772 {
18773 rtx tmp[8];
18774 REAL_VALUE_TYPE TWO16r;
18775 enum machine_mode intmode = GET_MODE (val);
18776 enum machine_mode fltmode = GET_MODE (target);
18777 rtx (*cvt) (rtx, rtx);
18778
18779 if (intmode == V4SImode)
18780 cvt = gen_floatv4siv4sf2;
18781 else
18782 cvt = gen_floatv8siv8sf2;
18783 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18784 tmp[0] = force_reg (intmode, tmp[0]);
18785 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18786 OPTAB_DIRECT);
18787 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18788 NULL_RTX, 1, OPTAB_DIRECT);
18789 tmp[3] = gen_reg_rtx (fltmode);
18790 emit_insn (cvt (tmp[3], tmp[1]));
18791 tmp[4] = gen_reg_rtx (fltmode);
18792 emit_insn (cvt (tmp[4], tmp[2]));
18793 real_ldexp (&TWO16r, &dconst1, 16);
18794 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18795 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18796 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18797 OPTAB_DIRECT);
18798 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18799 OPTAB_DIRECT);
18800 if (tmp[7] != target)
18801 emit_move_insn (target, tmp[7]);
18802 }
18803
18804 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18805 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18806 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18807 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18808
18809 rtx
18810 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18811 {
18812 REAL_VALUE_TYPE TWO31r;
18813 rtx two31r, tmp[4];
18814 enum machine_mode mode = GET_MODE (val);
18815 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18816 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18817 rtx (*cmp) (rtx, rtx, rtx, rtx);
18818 int i;
18819
18820 for (i = 0; i < 3; i++)
18821 tmp[i] = gen_reg_rtx (mode);
18822 real_ldexp (&TWO31r, &dconst1, 31);
18823 two31r = const_double_from_real_value (TWO31r, scalarmode);
18824 two31r = ix86_build_const_vector (mode, 1, two31r);
18825 two31r = force_reg (mode, two31r);
18826 switch (mode)
18827 {
18828 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18829 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18830 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18831 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18832 default: gcc_unreachable ();
18833 }
18834 tmp[3] = gen_rtx_LE (mode, two31r, val);
18835 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18836 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18837 0, OPTAB_DIRECT);
18838 if (intmode == V4SImode || TARGET_AVX2)
18839 *xorp = expand_simple_binop (intmode, ASHIFT,
18840 gen_lowpart (intmode, tmp[0]),
18841 GEN_INT (31), NULL_RTX, 0,
18842 OPTAB_DIRECT);
18843 else
18844 {
18845 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18846 two31 = ix86_build_const_vector (intmode, 1, two31);
18847 *xorp = expand_simple_binop (intmode, AND,
18848 gen_lowpart (intmode, tmp[0]),
18849 two31, NULL_RTX, 0,
18850 OPTAB_DIRECT);
18851 }
18852 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18853 0, OPTAB_DIRECT);
18854 }
18855
18856 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18857 then replicate the value for all elements of the vector
18858 register. */
18859
18860 rtx
18861 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18862 {
18863 int i, n_elt;
18864 rtvec v;
18865 enum machine_mode scalar_mode;
18866
18867 switch (mode)
18868 {
18869 case V64QImode:
18870 case V32QImode:
18871 case V16QImode:
18872 case V32HImode:
18873 case V16HImode:
18874 case V8HImode:
18875 case V16SImode:
18876 case V8SImode:
18877 case V4SImode:
18878 case V8DImode:
18879 case V4DImode:
18880 case V2DImode:
18881 gcc_assert (vect);
18882 case V16SFmode:
18883 case V8SFmode:
18884 case V4SFmode:
18885 case V8DFmode:
18886 case V4DFmode:
18887 case V2DFmode:
18888 n_elt = GET_MODE_NUNITS (mode);
18889 v = rtvec_alloc (n_elt);
18890 scalar_mode = GET_MODE_INNER (mode);
18891
18892 RTVEC_ELT (v, 0) = value;
18893
18894 for (i = 1; i < n_elt; ++i)
18895 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18896
18897 return gen_rtx_CONST_VECTOR (mode, v);
18898
18899 default:
18900 gcc_unreachable ();
18901 }
18902 }
18903
18904 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18905 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18906 for an SSE register. If VECT is true, then replicate the mask for
18907 all elements of the vector register. If INVERT is true, then create
18908 a mask excluding the sign bit. */
18909
18910 rtx
18911 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18912 {
18913 enum machine_mode vec_mode, imode;
18914 HOST_WIDE_INT hi, lo;
18915 int shift = 63;
18916 rtx v;
18917 rtx mask;
18918
18919 /* Find the sign bit, sign extended to 2*HWI. */
18920 switch (mode)
18921 {
18922 case V16SImode:
18923 case V16SFmode:
18924 case V8SImode:
18925 case V4SImode:
18926 case V8SFmode:
18927 case V4SFmode:
18928 vec_mode = mode;
18929 mode = GET_MODE_INNER (mode);
18930 imode = SImode;
18931 lo = 0x80000000, hi = lo < 0;
18932 break;
18933
18934 case V8DImode:
18935 case V4DImode:
18936 case V2DImode:
18937 case V8DFmode:
18938 case V4DFmode:
18939 case V2DFmode:
18940 vec_mode = mode;
18941 mode = GET_MODE_INNER (mode);
18942 imode = DImode;
18943 if (HOST_BITS_PER_WIDE_INT >= 64)
18944 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18945 else
18946 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18947 break;
18948
18949 case TImode:
18950 case TFmode:
18951 vec_mode = VOIDmode;
18952 if (HOST_BITS_PER_WIDE_INT >= 64)
18953 {
18954 imode = TImode;
18955 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18956 }
18957 else
18958 {
18959 rtvec vec;
18960
18961 imode = DImode;
18962 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18963
18964 if (invert)
18965 {
18966 lo = ~lo, hi = ~hi;
18967 v = constm1_rtx;
18968 }
18969 else
18970 v = const0_rtx;
18971
18972 mask = immed_double_const (lo, hi, imode);
18973
18974 vec = gen_rtvec (2, v, mask);
18975 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
18976 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
18977
18978 return v;
18979 }
18980 break;
18981
18982 default:
18983 gcc_unreachable ();
18984 }
18985
18986 if (invert)
18987 lo = ~lo, hi = ~hi;
18988
18989 /* Force this value into the low part of a fp vector constant. */
18990 mask = immed_double_const (lo, hi, imode);
18991 mask = gen_lowpart (mode, mask);
18992
18993 if (vec_mode == VOIDmode)
18994 return force_reg (mode, mask);
18995
18996 v = ix86_build_const_vector (vec_mode, vect, mask);
18997 return force_reg (vec_mode, v);
18998 }
18999
19000 /* Generate code for floating point ABS or NEG. */
19001
19002 void
19003 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19004 rtx operands[])
19005 {
19006 rtx mask, set, dst, src;
19007 bool use_sse = false;
19008 bool vector_mode = VECTOR_MODE_P (mode);
19009 enum machine_mode vmode = mode;
19010
19011 if (vector_mode)
19012 use_sse = true;
19013 else if (mode == TFmode)
19014 use_sse = true;
19015 else if (TARGET_SSE_MATH)
19016 {
19017 use_sse = SSE_FLOAT_MODE_P (mode);
19018 if (mode == SFmode)
19019 vmode = V4SFmode;
19020 else if (mode == DFmode)
19021 vmode = V2DFmode;
19022 }
19023
19024 /* NEG and ABS performed with SSE use bitwise mask operations.
19025 Create the appropriate mask now. */
19026 if (use_sse)
19027 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19028 else
19029 mask = NULL_RTX;
19030
19031 dst = operands[0];
19032 src = operands[1];
19033
19034 set = gen_rtx_fmt_e (code, mode, src);
19035 set = gen_rtx_SET (VOIDmode, dst, set);
19036
19037 if (mask)
19038 {
19039 rtx use, clob;
19040 rtvec par;
19041
19042 use = gen_rtx_USE (VOIDmode, mask);
19043 if (vector_mode)
19044 par = gen_rtvec (2, set, use);
19045 else
19046 {
19047 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19048 par = gen_rtvec (3, set, use, clob);
19049 }
19050 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19051 }
19052 else
19053 emit_insn (set);
19054 }
19055
19056 /* Expand a copysign operation. Special case operand 0 being a constant. */
19057
19058 void
19059 ix86_expand_copysign (rtx operands[])
19060 {
19061 enum machine_mode mode, vmode;
19062 rtx dest, op0, op1, mask, nmask;
19063
19064 dest = operands[0];
19065 op0 = operands[1];
19066 op1 = operands[2];
19067
19068 mode = GET_MODE (dest);
19069
19070 if (mode == SFmode)
19071 vmode = V4SFmode;
19072 else if (mode == DFmode)
19073 vmode = V2DFmode;
19074 else
19075 vmode = mode;
19076
19077 if (GET_CODE (op0) == CONST_DOUBLE)
19078 {
19079 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19080
19081 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19082 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19083
19084 if (mode == SFmode || mode == DFmode)
19085 {
19086 if (op0 == CONST0_RTX (mode))
19087 op0 = CONST0_RTX (vmode);
19088 else
19089 {
19090 rtx v = ix86_build_const_vector (vmode, false, op0);
19091
19092 op0 = force_reg (vmode, v);
19093 }
19094 }
19095 else if (op0 != CONST0_RTX (mode))
19096 op0 = force_reg (mode, op0);
19097
19098 mask = ix86_build_signbit_mask (vmode, 0, 0);
19099
19100 if (mode == SFmode)
19101 copysign_insn = gen_copysignsf3_const;
19102 else if (mode == DFmode)
19103 copysign_insn = gen_copysigndf3_const;
19104 else
19105 copysign_insn = gen_copysigntf3_const;
19106
19107 emit_insn (copysign_insn (dest, op0, op1, mask));
19108 }
19109 else
19110 {
19111 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19112
19113 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19114 mask = ix86_build_signbit_mask (vmode, 0, 0);
19115
19116 if (mode == SFmode)
19117 copysign_insn = gen_copysignsf3_var;
19118 else if (mode == DFmode)
19119 copysign_insn = gen_copysigndf3_var;
19120 else
19121 copysign_insn = gen_copysigntf3_var;
19122
19123 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19124 }
19125 }
19126
19127 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19128 be a constant, and so has already been expanded into a vector constant. */
19129
19130 void
19131 ix86_split_copysign_const (rtx operands[])
19132 {
19133 enum machine_mode mode, vmode;
19134 rtx dest, op0, mask, x;
19135
19136 dest = operands[0];
19137 op0 = operands[1];
19138 mask = operands[3];
19139
19140 mode = GET_MODE (dest);
19141 vmode = GET_MODE (mask);
19142
19143 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19144 x = gen_rtx_AND (vmode, dest, mask);
19145 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19146
19147 if (op0 != CONST0_RTX (vmode))
19148 {
19149 x = gen_rtx_IOR (vmode, dest, op0);
19150 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19151 }
19152 }
19153
19154 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19155 so we have to do two masks. */
19156
19157 void
19158 ix86_split_copysign_var (rtx operands[])
19159 {
19160 enum machine_mode mode, vmode;
19161 rtx dest, scratch, op0, op1, mask, nmask, x;
19162
19163 dest = operands[0];
19164 scratch = operands[1];
19165 op0 = operands[2];
19166 op1 = operands[3];
19167 nmask = operands[4];
19168 mask = operands[5];
19169
19170 mode = GET_MODE (dest);
19171 vmode = GET_MODE (mask);
19172
19173 if (rtx_equal_p (op0, op1))
19174 {
19175 /* Shouldn't happen often (it's useless, obviously), but when it does
19176 we'd generate incorrect code if we continue below. */
19177 emit_move_insn (dest, op0);
19178 return;
19179 }
19180
19181 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19182 {
19183 gcc_assert (REGNO (op1) == REGNO (scratch));
19184
19185 x = gen_rtx_AND (vmode, scratch, mask);
19186 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19187
19188 dest = mask;
19189 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19190 x = gen_rtx_NOT (vmode, dest);
19191 x = gen_rtx_AND (vmode, x, op0);
19192 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19193 }
19194 else
19195 {
19196 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19197 {
19198 x = gen_rtx_AND (vmode, scratch, mask);
19199 }
19200 else /* alternative 2,4 */
19201 {
19202 gcc_assert (REGNO (mask) == REGNO (scratch));
19203 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19204 x = gen_rtx_AND (vmode, scratch, op1);
19205 }
19206 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19207
19208 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19209 {
19210 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19211 x = gen_rtx_AND (vmode, dest, nmask);
19212 }
19213 else /* alternative 3,4 */
19214 {
19215 gcc_assert (REGNO (nmask) == REGNO (dest));
19216 dest = nmask;
19217 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19218 x = gen_rtx_AND (vmode, dest, op0);
19219 }
19220 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19221 }
19222
19223 x = gen_rtx_IOR (vmode, dest, scratch);
19224 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19225 }
19226
19227 /* Return TRUE or FALSE depending on whether the first SET in INSN
19228 has source and destination with matching CC modes, and that the
19229 CC mode is at least as constrained as REQ_MODE. */
19230
19231 bool
19232 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19233 {
19234 rtx set;
19235 enum machine_mode set_mode;
19236
19237 set = PATTERN (insn);
19238 if (GET_CODE (set) == PARALLEL)
19239 set = XVECEXP (set, 0, 0);
19240 gcc_assert (GET_CODE (set) == SET);
19241 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19242
19243 set_mode = GET_MODE (SET_DEST (set));
19244 switch (set_mode)
19245 {
19246 case CCNOmode:
19247 if (req_mode != CCNOmode
19248 && (req_mode != CCmode
19249 || XEXP (SET_SRC (set), 1) != const0_rtx))
19250 return false;
19251 break;
19252 case CCmode:
19253 if (req_mode == CCGCmode)
19254 return false;
19255 /* FALLTHRU */
19256 case CCGCmode:
19257 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19258 return false;
19259 /* FALLTHRU */
19260 case CCGOCmode:
19261 if (req_mode == CCZmode)
19262 return false;
19263 /* FALLTHRU */
19264 case CCZmode:
19265 break;
19266
19267 case CCAmode:
19268 case CCCmode:
19269 case CCOmode:
19270 case CCSmode:
19271 if (set_mode != req_mode)
19272 return false;
19273 break;
19274
19275 default:
19276 gcc_unreachable ();
19277 }
19278
19279 return GET_MODE (SET_SRC (set)) == set_mode;
19280 }
19281
19282 /* Generate insn patterns to do an integer compare of OPERANDS. */
19283
19284 static rtx
19285 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19286 {
19287 enum machine_mode cmpmode;
19288 rtx tmp, flags;
19289
19290 cmpmode = SELECT_CC_MODE (code, op0, op1);
19291 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19292
19293 /* This is very simple, but making the interface the same as in the
19294 FP case makes the rest of the code easier. */
19295 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19296 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19297
19298 /* Return the test that should be put into the flags user, i.e.
19299 the bcc, scc, or cmov instruction. */
19300 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19301 }
19302
19303 /* Figure out whether to use ordered or unordered fp comparisons.
19304 Return the appropriate mode to use. */
19305
19306 enum machine_mode
19307 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19308 {
19309 /* ??? In order to make all comparisons reversible, we do all comparisons
19310 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19311 all forms trapping and nontrapping comparisons, we can make inequality
19312 comparisons trapping again, since it results in better code when using
19313 FCOM based compares. */
19314 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19315 }
19316
19317 enum machine_mode
19318 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19319 {
19320 enum machine_mode mode = GET_MODE (op0);
19321
19322 if (SCALAR_FLOAT_MODE_P (mode))
19323 {
19324 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19325 return ix86_fp_compare_mode (code);
19326 }
19327
19328 switch (code)
19329 {
19330 /* Only zero flag is needed. */
19331 case EQ: /* ZF=0 */
19332 case NE: /* ZF!=0 */
19333 return CCZmode;
19334 /* Codes needing carry flag. */
19335 case GEU: /* CF=0 */
19336 case LTU: /* CF=1 */
19337 /* Detect overflow checks. They need just the carry flag. */
19338 if (GET_CODE (op0) == PLUS
19339 && rtx_equal_p (op1, XEXP (op0, 0)))
19340 return CCCmode;
19341 else
19342 return CCmode;
19343 case GTU: /* CF=0 & ZF=0 */
19344 case LEU: /* CF=1 | ZF=1 */
19345 return CCmode;
19346 /* Codes possibly doable only with sign flag when
19347 comparing against zero. */
19348 case GE: /* SF=OF or SF=0 */
19349 case LT: /* SF<>OF or SF=1 */
19350 if (op1 == const0_rtx)
19351 return CCGOCmode;
19352 else
19353 /* For other cases Carry flag is not required. */
19354 return CCGCmode;
19355 /* Codes doable only with sign flag when comparing
19356 against zero, but we miss jump instruction for it
19357 so we need to use relational tests against overflow
19358 that thus needs to be zero. */
19359 case GT: /* ZF=0 & SF=OF */
19360 case LE: /* ZF=1 | SF<>OF */
19361 if (op1 == const0_rtx)
19362 return CCNOmode;
19363 else
19364 return CCGCmode;
19365 /* strcmp pattern do (use flags) and combine may ask us for proper
19366 mode. */
19367 case USE:
19368 return CCmode;
19369 default:
19370 gcc_unreachable ();
19371 }
19372 }
19373
19374 /* Return the fixed registers used for condition codes. */
19375
19376 static bool
19377 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19378 {
19379 *p1 = FLAGS_REG;
19380 *p2 = FPSR_REG;
19381 return true;
19382 }
19383
19384 /* If two condition code modes are compatible, return a condition code
19385 mode which is compatible with both. Otherwise, return
19386 VOIDmode. */
19387
19388 static enum machine_mode
19389 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19390 {
19391 if (m1 == m2)
19392 return m1;
19393
19394 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19395 return VOIDmode;
19396
19397 if ((m1 == CCGCmode && m2 == CCGOCmode)
19398 || (m1 == CCGOCmode && m2 == CCGCmode))
19399 return CCGCmode;
19400
19401 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19402 return m2;
19403 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19404 return m1;
19405
19406 switch (m1)
19407 {
19408 default:
19409 gcc_unreachable ();
19410
19411 case CCmode:
19412 case CCGCmode:
19413 case CCGOCmode:
19414 case CCNOmode:
19415 case CCAmode:
19416 case CCCmode:
19417 case CCOmode:
19418 case CCSmode:
19419 case CCZmode:
19420 switch (m2)
19421 {
19422 default:
19423 return VOIDmode;
19424
19425 case CCmode:
19426 case CCGCmode:
19427 case CCGOCmode:
19428 case CCNOmode:
19429 case CCAmode:
19430 case CCCmode:
19431 case CCOmode:
19432 case CCSmode:
19433 case CCZmode:
19434 return CCmode;
19435 }
19436
19437 case CCFPmode:
19438 case CCFPUmode:
19439 /* These are only compatible with themselves, which we already
19440 checked above. */
19441 return VOIDmode;
19442 }
19443 }
19444
19445
19446 /* Return a comparison we can do and that it is equivalent to
19447 swap_condition (code) apart possibly from orderedness.
19448 But, never change orderedness if TARGET_IEEE_FP, returning
19449 UNKNOWN in that case if necessary. */
19450
19451 static enum rtx_code
19452 ix86_fp_swap_condition (enum rtx_code code)
19453 {
19454 switch (code)
19455 {
19456 case GT: /* GTU - CF=0 & ZF=0 */
19457 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19458 case GE: /* GEU - CF=0 */
19459 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19460 case UNLT: /* LTU - CF=1 */
19461 return TARGET_IEEE_FP ? UNKNOWN : GT;
19462 case UNLE: /* LEU - CF=1 | ZF=1 */
19463 return TARGET_IEEE_FP ? UNKNOWN : GE;
19464 default:
19465 return swap_condition (code);
19466 }
19467 }
19468
19469 /* Return cost of comparison CODE using the best strategy for performance.
19470 All following functions do use number of instructions as a cost metrics.
19471 In future this should be tweaked to compute bytes for optimize_size and
19472 take into account performance of various instructions on various CPUs. */
19473
19474 static int
19475 ix86_fp_comparison_cost (enum rtx_code code)
19476 {
19477 int arith_cost;
19478
19479 /* The cost of code using bit-twiddling on %ah. */
19480 switch (code)
19481 {
19482 case UNLE:
19483 case UNLT:
19484 case LTGT:
19485 case GT:
19486 case GE:
19487 case UNORDERED:
19488 case ORDERED:
19489 case UNEQ:
19490 arith_cost = 4;
19491 break;
19492 case LT:
19493 case NE:
19494 case EQ:
19495 case UNGE:
19496 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19497 break;
19498 case LE:
19499 case UNGT:
19500 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19501 break;
19502 default:
19503 gcc_unreachable ();
19504 }
19505
19506 switch (ix86_fp_comparison_strategy (code))
19507 {
19508 case IX86_FPCMP_COMI:
19509 return arith_cost > 4 ? 3 : 2;
19510 case IX86_FPCMP_SAHF:
19511 return arith_cost > 4 ? 4 : 3;
19512 default:
19513 return arith_cost;
19514 }
19515 }
19516
19517 /* Return strategy to use for floating-point. We assume that fcomi is always
19518 preferrable where available, since that is also true when looking at size
19519 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19520
19521 enum ix86_fpcmp_strategy
19522 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19523 {
19524 /* Do fcomi/sahf based test when profitable. */
19525
19526 if (TARGET_CMOVE)
19527 return IX86_FPCMP_COMI;
19528
19529 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19530 return IX86_FPCMP_SAHF;
19531
19532 return IX86_FPCMP_ARITH;
19533 }
19534
19535 /* Swap, force into registers, or otherwise massage the two operands
19536 to a fp comparison. The operands are updated in place; the new
19537 comparison code is returned. */
19538
19539 static enum rtx_code
19540 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19541 {
19542 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19543 rtx op0 = *pop0, op1 = *pop1;
19544 enum machine_mode op_mode = GET_MODE (op0);
19545 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19546
19547 /* All of the unordered compare instructions only work on registers.
19548 The same is true of the fcomi compare instructions. The XFmode
19549 compare instructions require registers except when comparing
19550 against zero or when converting operand 1 from fixed point to
19551 floating point. */
19552
19553 if (!is_sse
19554 && (fpcmp_mode == CCFPUmode
19555 || (op_mode == XFmode
19556 && ! (standard_80387_constant_p (op0) == 1
19557 || standard_80387_constant_p (op1) == 1)
19558 && GET_CODE (op1) != FLOAT)
19559 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19560 {
19561 op0 = force_reg (op_mode, op0);
19562 op1 = force_reg (op_mode, op1);
19563 }
19564 else
19565 {
19566 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19567 things around if they appear profitable, otherwise force op0
19568 into a register. */
19569
19570 if (standard_80387_constant_p (op0) == 0
19571 || (MEM_P (op0)
19572 && ! (standard_80387_constant_p (op1) == 0
19573 || MEM_P (op1))))
19574 {
19575 enum rtx_code new_code = ix86_fp_swap_condition (code);
19576 if (new_code != UNKNOWN)
19577 {
19578 rtx tmp;
19579 tmp = op0, op0 = op1, op1 = tmp;
19580 code = new_code;
19581 }
19582 }
19583
19584 if (!REG_P (op0))
19585 op0 = force_reg (op_mode, op0);
19586
19587 if (CONSTANT_P (op1))
19588 {
19589 int tmp = standard_80387_constant_p (op1);
19590 if (tmp == 0)
19591 op1 = validize_mem (force_const_mem (op_mode, op1));
19592 else if (tmp == 1)
19593 {
19594 if (TARGET_CMOVE)
19595 op1 = force_reg (op_mode, op1);
19596 }
19597 else
19598 op1 = force_reg (op_mode, op1);
19599 }
19600 }
19601
19602 /* Try to rearrange the comparison to make it cheaper. */
19603 if (ix86_fp_comparison_cost (code)
19604 > ix86_fp_comparison_cost (swap_condition (code))
19605 && (REG_P (op1) || can_create_pseudo_p ()))
19606 {
19607 rtx tmp;
19608 tmp = op0, op0 = op1, op1 = tmp;
19609 code = swap_condition (code);
19610 if (!REG_P (op0))
19611 op0 = force_reg (op_mode, op0);
19612 }
19613
19614 *pop0 = op0;
19615 *pop1 = op1;
19616 return code;
19617 }
19618
19619 /* Convert comparison codes we use to represent FP comparison to integer
19620 code that will result in proper branch. Return UNKNOWN if no such code
19621 is available. */
19622
19623 enum rtx_code
19624 ix86_fp_compare_code_to_integer (enum rtx_code code)
19625 {
19626 switch (code)
19627 {
19628 case GT:
19629 return GTU;
19630 case GE:
19631 return GEU;
19632 case ORDERED:
19633 case UNORDERED:
19634 return code;
19635 break;
19636 case UNEQ:
19637 return EQ;
19638 break;
19639 case UNLT:
19640 return LTU;
19641 break;
19642 case UNLE:
19643 return LEU;
19644 break;
19645 case LTGT:
19646 return NE;
19647 break;
19648 default:
19649 return UNKNOWN;
19650 }
19651 }
19652
19653 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19654
19655 static rtx
19656 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19657 {
19658 enum machine_mode fpcmp_mode, intcmp_mode;
19659 rtx tmp, tmp2;
19660
19661 fpcmp_mode = ix86_fp_compare_mode (code);
19662 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19663
19664 /* Do fcomi/sahf based test when profitable. */
19665 switch (ix86_fp_comparison_strategy (code))
19666 {
19667 case IX86_FPCMP_COMI:
19668 intcmp_mode = fpcmp_mode;
19669 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19670 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19671 tmp);
19672 emit_insn (tmp);
19673 break;
19674
19675 case IX86_FPCMP_SAHF:
19676 intcmp_mode = fpcmp_mode;
19677 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19678 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19679 tmp);
19680
19681 if (!scratch)
19682 scratch = gen_reg_rtx (HImode);
19683 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19684 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19685 break;
19686
19687 case IX86_FPCMP_ARITH:
19688 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19689 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19690 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19691 if (!scratch)
19692 scratch = gen_reg_rtx (HImode);
19693 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19694
19695 /* In the unordered case, we have to check C2 for NaN's, which
19696 doesn't happen to work out to anything nice combination-wise.
19697 So do some bit twiddling on the value we've got in AH to come
19698 up with an appropriate set of condition codes. */
19699
19700 intcmp_mode = CCNOmode;
19701 switch (code)
19702 {
19703 case GT:
19704 case UNGT:
19705 if (code == GT || !TARGET_IEEE_FP)
19706 {
19707 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19708 code = EQ;
19709 }
19710 else
19711 {
19712 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19713 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19714 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19715 intcmp_mode = CCmode;
19716 code = GEU;
19717 }
19718 break;
19719 case LT:
19720 case UNLT:
19721 if (code == LT && TARGET_IEEE_FP)
19722 {
19723 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19724 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19725 intcmp_mode = CCmode;
19726 code = EQ;
19727 }
19728 else
19729 {
19730 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19731 code = NE;
19732 }
19733 break;
19734 case GE:
19735 case UNGE:
19736 if (code == GE || !TARGET_IEEE_FP)
19737 {
19738 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19739 code = EQ;
19740 }
19741 else
19742 {
19743 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19744 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19745 code = NE;
19746 }
19747 break;
19748 case LE:
19749 case UNLE:
19750 if (code == LE && TARGET_IEEE_FP)
19751 {
19752 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19753 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19754 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19755 intcmp_mode = CCmode;
19756 code = LTU;
19757 }
19758 else
19759 {
19760 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19761 code = NE;
19762 }
19763 break;
19764 case EQ:
19765 case UNEQ:
19766 if (code == EQ && TARGET_IEEE_FP)
19767 {
19768 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19769 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19770 intcmp_mode = CCmode;
19771 code = EQ;
19772 }
19773 else
19774 {
19775 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19776 code = NE;
19777 }
19778 break;
19779 case NE:
19780 case LTGT:
19781 if (code == NE && TARGET_IEEE_FP)
19782 {
19783 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19784 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19785 GEN_INT (0x40)));
19786 code = NE;
19787 }
19788 else
19789 {
19790 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19791 code = EQ;
19792 }
19793 break;
19794
19795 case UNORDERED:
19796 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19797 code = NE;
19798 break;
19799 case ORDERED:
19800 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19801 code = EQ;
19802 break;
19803
19804 default:
19805 gcc_unreachable ();
19806 }
19807 break;
19808
19809 default:
19810 gcc_unreachable();
19811 }
19812
19813 /* Return the test that should be put into the flags user, i.e.
19814 the bcc, scc, or cmov instruction. */
19815 return gen_rtx_fmt_ee (code, VOIDmode,
19816 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19817 const0_rtx);
19818 }
19819
19820 static rtx
19821 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19822 {
19823 rtx ret;
19824
19825 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19826 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19827
19828 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19829 {
19830 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19831 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19832 }
19833 else
19834 ret = ix86_expand_int_compare (code, op0, op1);
19835
19836 return ret;
19837 }
19838
19839 void
19840 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19841 {
19842 enum machine_mode mode = GET_MODE (op0);
19843 rtx tmp;
19844
19845 switch (mode)
19846 {
19847 case SFmode:
19848 case DFmode:
19849 case XFmode:
19850 case QImode:
19851 case HImode:
19852 case SImode:
19853 simple:
19854 tmp = ix86_expand_compare (code, op0, op1);
19855 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19856 gen_rtx_LABEL_REF (VOIDmode, label),
19857 pc_rtx);
19858 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19859 return;
19860
19861 case DImode:
19862 if (TARGET_64BIT)
19863 goto simple;
19864 case TImode:
19865 /* Expand DImode branch into multiple compare+branch. */
19866 {
19867 rtx lo[2], hi[2], label2;
19868 enum rtx_code code1, code2, code3;
19869 enum machine_mode submode;
19870
19871 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19872 {
19873 tmp = op0, op0 = op1, op1 = tmp;
19874 code = swap_condition (code);
19875 }
19876
19877 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19878 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19879
19880 submode = mode == DImode ? SImode : DImode;
19881
19882 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19883 avoid two branches. This costs one extra insn, so disable when
19884 optimizing for size. */
19885
19886 if ((code == EQ || code == NE)
19887 && (!optimize_insn_for_size_p ()
19888 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19889 {
19890 rtx xor0, xor1;
19891
19892 xor1 = hi[0];
19893 if (hi[1] != const0_rtx)
19894 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19895 NULL_RTX, 0, OPTAB_WIDEN);
19896
19897 xor0 = lo[0];
19898 if (lo[1] != const0_rtx)
19899 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19900 NULL_RTX, 0, OPTAB_WIDEN);
19901
19902 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19903 NULL_RTX, 0, OPTAB_WIDEN);
19904
19905 ix86_expand_branch (code, tmp, const0_rtx, label);
19906 return;
19907 }
19908
19909 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19910 op1 is a constant and the low word is zero, then we can just
19911 examine the high word. Similarly for low word -1 and
19912 less-or-equal-than or greater-than. */
19913
19914 if (CONST_INT_P (hi[1]))
19915 switch (code)
19916 {
19917 case LT: case LTU: case GE: case GEU:
19918 if (lo[1] == const0_rtx)
19919 {
19920 ix86_expand_branch (code, hi[0], hi[1], label);
19921 return;
19922 }
19923 break;
19924 case LE: case LEU: case GT: case GTU:
19925 if (lo[1] == constm1_rtx)
19926 {
19927 ix86_expand_branch (code, hi[0], hi[1], label);
19928 return;
19929 }
19930 break;
19931 default:
19932 break;
19933 }
19934
19935 /* Otherwise, we need two or three jumps. */
19936
19937 label2 = gen_label_rtx ();
19938
19939 code1 = code;
19940 code2 = swap_condition (code);
19941 code3 = unsigned_condition (code);
19942
19943 switch (code)
19944 {
19945 case LT: case GT: case LTU: case GTU:
19946 break;
19947
19948 case LE: code1 = LT; code2 = GT; break;
19949 case GE: code1 = GT; code2 = LT; break;
19950 case LEU: code1 = LTU; code2 = GTU; break;
19951 case GEU: code1 = GTU; code2 = LTU; break;
19952
19953 case EQ: code1 = UNKNOWN; code2 = NE; break;
19954 case NE: code2 = UNKNOWN; break;
19955
19956 default:
19957 gcc_unreachable ();
19958 }
19959
19960 /*
19961 * a < b =>
19962 * if (hi(a) < hi(b)) goto true;
19963 * if (hi(a) > hi(b)) goto false;
19964 * if (lo(a) < lo(b)) goto true;
19965 * false:
19966 */
19967
19968 if (code1 != UNKNOWN)
19969 ix86_expand_branch (code1, hi[0], hi[1], label);
19970 if (code2 != UNKNOWN)
19971 ix86_expand_branch (code2, hi[0], hi[1], label2);
19972
19973 ix86_expand_branch (code3, lo[0], lo[1], label);
19974
19975 if (code2 != UNKNOWN)
19976 emit_label (label2);
19977 return;
19978 }
19979
19980 default:
19981 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
19982 goto simple;
19983 }
19984 }
19985
19986 /* Split branch based on floating point condition. */
19987 void
19988 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
19989 rtx target1, rtx target2, rtx tmp)
19990 {
19991 rtx condition;
19992 rtx i;
19993
19994 if (target2 != pc_rtx)
19995 {
19996 rtx tmp = target2;
19997 code = reverse_condition_maybe_unordered (code);
19998 target2 = target1;
19999 target1 = tmp;
20000 }
20001
20002 condition = ix86_expand_fp_compare (code, op1, op2,
20003 tmp);
20004
20005 i = emit_jump_insn (gen_rtx_SET
20006 (VOIDmode, pc_rtx,
20007 gen_rtx_IF_THEN_ELSE (VOIDmode,
20008 condition, target1, target2)));
20009 if (split_branch_probability >= 0)
20010 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20011 }
20012
20013 void
20014 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20015 {
20016 rtx ret;
20017
20018 gcc_assert (GET_MODE (dest) == QImode);
20019
20020 ret = ix86_expand_compare (code, op0, op1);
20021 PUT_MODE (ret, QImode);
20022 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20023 }
20024
20025 /* Expand comparison setting or clearing carry flag. Return true when
20026 successful and set pop for the operation. */
20027 static bool
20028 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20029 {
20030 enum machine_mode mode =
20031 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20032
20033 /* Do not handle double-mode compares that go through special path. */
20034 if (mode == (TARGET_64BIT ? TImode : DImode))
20035 return false;
20036
20037 if (SCALAR_FLOAT_MODE_P (mode))
20038 {
20039 rtx compare_op, compare_seq;
20040
20041 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20042
20043 /* Shortcut: following common codes never translate
20044 into carry flag compares. */
20045 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20046 || code == ORDERED || code == UNORDERED)
20047 return false;
20048
20049 /* These comparisons require zero flag; swap operands so they won't. */
20050 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20051 && !TARGET_IEEE_FP)
20052 {
20053 rtx tmp = op0;
20054 op0 = op1;
20055 op1 = tmp;
20056 code = swap_condition (code);
20057 }
20058
20059 /* Try to expand the comparison and verify that we end up with
20060 carry flag based comparison. This fails to be true only when
20061 we decide to expand comparison using arithmetic that is not
20062 too common scenario. */
20063 start_sequence ();
20064 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20065 compare_seq = get_insns ();
20066 end_sequence ();
20067
20068 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20069 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20070 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20071 else
20072 code = GET_CODE (compare_op);
20073
20074 if (code != LTU && code != GEU)
20075 return false;
20076
20077 emit_insn (compare_seq);
20078 *pop = compare_op;
20079 return true;
20080 }
20081
20082 if (!INTEGRAL_MODE_P (mode))
20083 return false;
20084
20085 switch (code)
20086 {
20087 case LTU:
20088 case GEU:
20089 break;
20090
20091 /* Convert a==0 into (unsigned)a<1. */
20092 case EQ:
20093 case NE:
20094 if (op1 != const0_rtx)
20095 return false;
20096 op1 = const1_rtx;
20097 code = (code == EQ ? LTU : GEU);
20098 break;
20099
20100 /* Convert a>b into b<a or a>=b-1. */
20101 case GTU:
20102 case LEU:
20103 if (CONST_INT_P (op1))
20104 {
20105 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20106 /* Bail out on overflow. We still can swap operands but that
20107 would force loading of the constant into register. */
20108 if (op1 == const0_rtx
20109 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20110 return false;
20111 code = (code == GTU ? GEU : LTU);
20112 }
20113 else
20114 {
20115 rtx tmp = op1;
20116 op1 = op0;
20117 op0 = tmp;
20118 code = (code == GTU ? LTU : GEU);
20119 }
20120 break;
20121
20122 /* Convert a>=0 into (unsigned)a<0x80000000. */
20123 case LT:
20124 case GE:
20125 if (mode == DImode || op1 != const0_rtx)
20126 return false;
20127 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20128 code = (code == LT ? GEU : LTU);
20129 break;
20130 case LE:
20131 case GT:
20132 if (mode == DImode || op1 != constm1_rtx)
20133 return false;
20134 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20135 code = (code == LE ? GEU : LTU);
20136 break;
20137
20138 default:
20139 return false;
20140 }
20141 /* Swapping operands may cause constant to appear as first operand. */
20142 if (!nonimmediate_operand (op0, VOIDmode))
20143 {
20144 if (!can_create_pseudo_p ())
20145 return false;
20146 op0 = force_reg (mode, op0);
20147 }
20148 *pop = ix86_expand_compare (code, op0, op1);
20149 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20150 return true;
20151 }
20152
20153 bool
20154 ix86_expand_int_movcc (rtx operands[])
20155 {
20156 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20157 rtx compare_seq, compare_op;
20158 enum machine_mode mode = GET_MODE (operands[0]);
20159 bool sign_bit_compare_p = false;
20160 rtx op0 = XEXP (operands[1], 0);
20161 rtx op1 = XEXP (operands[1], 1);
20162
20163 if (GET_MODE (op0) == TImode
20164 || (GET_MODE (op0) == DImode
20165 && !TARGET_64BIT))
20166 return false;
20167
20168 start_sequence ();
20169 compare_op = ix86_expand_compare (code, op0, op1);
20170 compare_seq = get_insns ();
20171 end_sequence ();
20172
20173 compare_code = GET_CODE (compare_op);
20174
20175 if ((op1 == const0_rtx && (code == GE || code == LT))
20176 || (op1 == constm1_rtx && (code == GT || code == LE)))
20177 sign_bit_compare_p = true;
20178
20179 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20180 HImode insns, we'd be swallowed in word prefix ops. */
20181
20182 if ((mode != HImode || TARGET_FAST_PREFIX)
20183 && (mode != (TARGET_64BIT ? TImode : DImode))
20184 && CONST_INT_P (operands[2])
20185 && CONST_INT_P (operands[3]))
20186 {
20187 rtx out = operands[0];
20188 HOST_WIDE_INT ct = INTVAL (operands[2]);
20189 HOST_WIDE_INT cf = INTVAL (operands[3]);
20190 HOST_WIDE_INT diff;
20191
20192 diff = ct - cf;
20193 /* Sign bit compares are better done using shifts than we do by using
20194 sbb. */
20195 if (sign_bit_compare_p
20196 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20197 {
20198 /* Detect overlap between destination and compare sources. */
20199 rtx tmp = out;
20200
20201 if (!sign_bit_compare_p)
20202 {
20203 rtx flags;
20204 bool fpcmp = false;
20205
20206 compare_code = GET_CODE (compare_op);
20207
20208 flags = XEXP (compare_op, 0);
20209
20210 if (GET_MODE (flags) == CCFPmode
20211 || GET_MODE (flags) == CCFPUmode)
20212 {
20213 fpcmp = true;
20214 compare_code
20215 = ix86_fp_compare_code_to_integer (compare_code);
20216 }
20217
20218 /* To simplify rest of code, restrict to the GEU case. */
20219 if (compare_code == LTU)
20220 {
20221 HOST_WIDE_INT tmp = ct;
20222 ct = cf;
20223 cf = tmp;
20224 compare_code = reverse_condition (compare_code);
20225 code = reverse_condition (code);
20226 }
20227 else
20228 {
20229 if (fpcmp)
20230 PUT_CODE (compare_op,
20231 reverse_condition_maybe_unordered
20232 (GET_CODE (compare_op)));
20233 else
20234 PUT_CODE (compare_op,
20235 reverse_condition (GET_CODE (compare_op)));
20236 }
20237 diff = ct - cf;
20238
20239 if (reg_overlap_mentioned_p (out, op0)
20240 || reg_overlap_mentioned_p (out, op1))
20241 tmp = gen_reg_rtx (mode);
20242
20243 if (mode == DImode)
20244 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20245 else
20246 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20247 flags, compare_op));
20248 }
20249 else
20250 {
20251 if (code == GT || code == GE)
20252 code = reverse_condition (code);
20253 else
20254 {
20255 HOST_WIDE_INT tmp = ct;
20256 ct = cf;
20257 cf = tmp;
20258 diff = ct - cf;
20259 }
20260 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20261 }
20262
20263 if (diff == 1)
20264 {
20265 /*
20266 * cmpl op0,op1
20267 * sbbl dest,dest
20268 * [addl dest, ct]
20269 *
20270 * Size 5 - 8.
20271 */
20272 if (ct)
20273 tmp = expand_simple_binop (mode, PLUS,
20274 tmp, GEN_INT (ct),
20275 copy_rtx (tmp), 1, OPTAB_DIRECT);
20276 }
20277 else if (cf == -1)
20278 {
20279 /*
20280 * cmpl op0,op1
20281 * sbbl dest,dest
20282 * orl $ct, dest
20283 *
20284 * Size 8.
20285 */
20286 tmp = expand_simple_binop (mode, IOR,
20287 tmp, GEN_INT (ct),
20288 copy_rtx (tmp), 1, OPTAB_DIRECT);
20289 }
20290 else if (diff == -1 && ct)
20291 {
20292 /*
20293 * cmpl op0,op1
20294 * sbbl dest,dest
20295 * notl dest
20296 * [addl dest, cf]
20297 *
20298 * Size 8 - 11.
20299 */
20300 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20301 if (cf)
20302 tmp = expand_simple_binop (mode, PLUS,
20303 copy_rtx (tmp), GEN_INT (cf),
20304 copy_rtx (tmp), 1, OPTAB_DIRECT);
20305 }
20306 else
20307 {
20308 /*
20309 * cmpl op0,op1
20310 * sbbl dest,dest
20311 * [notl dest]
20312 * andl cf - ct, dest
20313 * [addl dest, ct]
20314 *
20315 * Size 8 - 11.
20316 */
20317
20318 if (cf == 0)
20319 {
20320 cf = ct;
20321 ct = 0;
20322 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20323 }
20324
20325 tmp = expand_simple_binop (mode, AND,
20326 copy_rtx (tmp),
20327 gen_int_mode (cf - ct, mode),
20328 copy_rtx (tmp), 1, OPTAB_DIRECT);
20329 if (ct)
20330 tmp = expand_simple_binop (mode, PLUS,
20331 copy_rtx (tmp), GEN_INT (ct),
20332 copy_rtx (tmp), 1, OPTAB_DIRECT);
20333 }
20334
20335 if (!rtx_equal_p (tmp, out))
20336 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20337
20338 return true;
20339 }
20340
20341 if (diff < 0)
20342 {
20343 enum machine_mode cmp_mode = GET_MODE (op0);
20344
20345 HOST_WIDE_INT tmp;
20346 tmp = ct, ct = cf, cf = tmp;
20347 diff = -diff;
20348
20349 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20350 {
20351 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20352
20353 /* We may be reversing unordered compare to normal compare, that
20354 is not valid in general (we may convert non-trapping condition
20355 to trapping one), however on i386 we currently emit all
20356 comparisons unordered. */
20357 compare_code = reverse_condition_maybe_unordered (compare_code);
20358 code = reverse_condition_maybe_unordered (code);
20359 }
20360 else
20361 {
20362 compare_code = reverse_condition (compare_code);
20363 code = reverse_condition (code);
20364 }
20365 }
20366
20367 compare_code = UNKNOWN;
20368 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20369 && CONST_INT_P (op1))
20370 {
20371 if (op1 == const0_rtx
20372 && (code == LT || code == GE))
20373 compare_code = code;
20374 else if (op1 == constm1_rtx)
20375 {
20376 if (code == LE)
20377 compare_code = LT;
20378 else if (code == GT)
20379 compare_code = GE;
20380 }
20381 }
20382
20383 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20384 if (compare_code != UNKNOWN
20385 && GET_MODE (op0) == GET_MODE (out)
20386 && (cf == -1 || ct == -1))
20387 {
20388 /* If lea code below could be used, only optimize
20389 if it results in a 2 insn sequence. */
20390
20391 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20392 || diff == 3 || diff == 5 || diff == 9)
20393 || (compare_code == LT && ct == -1)
20394 || (compare_code == GE && cf == -1))
20395 {
20396 /*
20397 * notl op1 (if necessary)
20398 * sarl $31, op1
20399 * orl cf, op1
20400 */
20401 if (ct != -1)
20402 {
20403 cf = ct;
20404 ct = -1;
20405 code = reverse_condition (code);
20406 }
20407
20408 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20409
20410 out = expand_simple_binop (mode, IOR,
20411 out, GEN_INT (cf),
20412 out, 1, OPTAB_DIRECT);
20413 if (out != operands[0])
20414 emit_move_insn (operands[0], out);
20415
20416 return true;
20417 }
20418 }
20419
20420
20421 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20422 || diff == 3 || diff == 5 || diff == 9)
20423 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20424 && (mode != DImode
20425 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20426 {
20427 /*
20428 * xorl dest,dest
20429 * cmpl op1,op2
20430 * setcc dest
20431 * lea cf(dest*(ct-cf)),dest
20432 *
20433 * Size 14.
20434 *
20435 * This also catches the degenerate setcc-only case.
20436 */
20437
20438 rtx tmp;
20439 int nops;
20440
20441 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20442
20443 nops = 0;
20444 /* On x86_64 the lea instruction operates on Pmode, so we need
20445 to get arithmetics done in proper mode to match. */
20446 if (diff == 1)
20447 tmp = copy_rtx (out);
20448 else
20449 {
20450 rtx out1;
20451 out1 = copy_rtx (out);
20452 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20453 nops++;
20454 if (diff & 1)
20455 {
20456 tmp = gen_rtx_PLUS (mode, tmp, out1);
20457 nops++;
20458 }
20459 }
20460 if (cf != 0)
20461 {
20462 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20463 nops++;
20464 }
20465 if (!rtx_equal_p (tmp, out))
20466 {
20467 if (nops == 1)
20468 out = force_operand (tmp, copy_rtx (out));
20469 else
20470 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20471 }
20472 if (!rtx_equal_p (out, operands[0]))
20473 emit_move_insn (operands[0], copy_rtx (out));
20474
20475 return true;
20476 }
20477
20478 /*
20479 * General case: Jumpful:
20480 * xorl dest,dest cmpl op1, op2
20481 * cmpl op1, op2 movl ct, dest
20482 * setcc dest jcc 1f
20483 * decl dest movl cf, dest
20484 * andl (cf-ct),dest 1:
20485 * addl ct,dest
20486 *
20487 * Size 20. Size 14.
20488 *
20489 * This is reasonably steep, but branch mispredict costs are
20490 * high on modern cpus, so consider failing only if optimizing
20491 * for space.
20492 */
20493
20494 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20495 && BRANCH_COST (optimize_insn_for_speed_p (),
20496 false) >= 2)
20497 {
20498 if (cf == 0)
20499 {
20500 enum machine_mode cmp_mode = GET_MODE (op0);
20501
20502 cf = ct;
20503 ct = 0;
20504
20505 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20506 {
20507 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20508
20509 /* We may be reversing unordered compare to normal compare,
20510 that is not valid in general (we may convert non-trapping
20511 condition to trapping one), however on i386 we currently
20512 emit all comparisons unordered. */
20513 code = reverse_condition_maybe_unordered (code);
20514 }
20515 else
20516 {
20517 code = reverse_condition (code);
20518 if (compare_code != UNKNOWN)
20519 compare_code = reverse_condition (compare_code);
20520 }
20521 }
20522
20523 if (compare_code != UNKNOWN)
20524 {
20525 /* notl op1 (if needed)
20526 sarl $31, op1
20527 andl (cf-ct), op1
20528 addl ct, op1
20529
20530 For x < 0 (resp. x <= -1) there will be no notl,
20531 so if possible swap the constants to get rid of the
20532 complement.
20533 True/false will be -1/0 while code below (store flag
20534 followed by decrement) is 0/-1, so the constants need
20535 to be exchanged once more. */
20536
20537 if (compare_code == GE || !cf)
20538 {
20539 code = reverse_condition (code);
20540 compare_code = LT;
20541 }
20542 else
20543 {
20544 HOST_WIDE_INT tmp = cf;
20545 cf = ct;
20546 ct = tmp;
20547 }
20548
20549 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20550 }
20551 else
20552 {
20553 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20554
20555 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20556 constm1_rtx,
20557 copy_rtx (out), 1, OPTAB_DIRECT);
20558 }
20559
20560 out = expand_simple_binop (mode, AND, copy_rtx (out),
20561 gen_int_mode (cf - ct, mode),
20562 copy_rtx (out), 1, OPTAB_DIRECT);
20563 if (ct)
20564 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20565 copy_rtx (out), 1, OPTAB_DIRECT);
20566 if (!rtx_equal_p (out, operands[0]))
20567 emit_move_insn (operands[0], copy_rtx (out));
20568
20569 return true;
20570 }
20571 }
20572
20573 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20574 {
20575 /* Try a few things more with specific constants and a variable. */
20576
20577 optab op;
20578 rtx var, orig_out, out, tmp;
20579
20580 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20581 return false;
20582
20583 /* If one of the two operands is an interesting constant, load a
20584 constant with the above and mask it in with a logical operation. */
20585
20586 if (CONST_INT_P (operands[2]))
20587 {
20588 var = operands[3];
20589 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20590 operands[3] = constm1_rtx, op = and_optab;
20591 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20592 operands[3] = const0_rtx, op = ior_optab;
20593 else
20594 return false;
20595 }
20596 else if (CONST_INT_P (operands[3]))
20597 {
20598 var = operands[2];
20599 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20600 operands[2] = constm1_rtx, op = and_optab;
20601 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20602 operands[2] = const0_rtx, op = ior_optab;
20603 else
20604 return false;
20605 }
20606 else
20607 return false;
20608
20609 orig_out = operands[0];
20610 tmp = gen_reg_rtx (mode);
20611 operands[0] = tmp;
20612
20613 /* Recurse to get the constant loaded. */
20614 if (ix86_expand_int_movcc (operands) == 0)
20615 return false;
20616
20617 /* Mask in the interesting variable. */
20618 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20619 OPTAB_WIDEN);
20620 if (!rtx_equal_p (out, orig_out))
20621 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20622
20623 return true;
20624 }
20625
20626 /*
20627 * For comparison with above,
20628 *
20629 * movl cf,dest
20630 * movl ct,tmp
20631 * cmpl op1,op2
20632 * cmovcc tmp,dest
20633 *
20634 * Size 15.
20635 */
20636
20637 if (! nonimmediate_operand (operands[2], mode))
20638 operands[2] = force_reg (mode, operands[2]);
20639 if (! nonimmediate_operand (operands[3], mode))
20640 operands[3] = force_reg (mode, operands[3]);
20641
20642 if (! register_operand (operands[2], VOIDmode)
20643 && (mode == QImode
20644 || ! register_operand (operands[3], VOIDmode)))
20645 operands[2] = force_reg (mode, operands[2]);
20646
20647 if (mode == QImode
20648 && ! register_operand (operands[3], VOIDmode))
20649 operands[3] = force_reg (mode, operands[3]);
20650
20651 emit_insn (compare_seq);
20652 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20653 gen_rtx_IF_THEN_ELSE (mode,
20654 compare_op, operands[2],
20655 operands[3])));
20656 return true;
20657 }
20658
20659 /* Swap, force into registers, or otherwise massage the two operands
20660 to an sse comparison with a mask result. Thus we differ a bit from
20661 ix86_prepare_fp_compare_args which expects to produce a flags result.
20662
20663 The DEST operand exists to help determine whether to commute commutative
20664 operators. The POP0/POP1 operands are updated in place. The new
20665 comparison code is returned, or UNKNOWN if not implementable. */
20666
20667 static enum rtx_code
20668 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20669 rtx *pop0, rtx *pop1)
20670 {
20671 rtx tmp;
20672
20673 switch (code)
20674 {
20675 case LTGT:
20676 case UNEQ:
20677 /* AVX supports all the needed comparisons. */
20678 if (TARGET_AVX)
20679 break;
20680 /* We have no LTGT as an operator. We could implement it with
20681 NE & ORDERED, but this requires an extra temporary. It's
20682 not clear that it's worth it. */
20683 return UNKNOWN;
20684
20685 case LT:
20686 case LE:
20687 case UNGT:
20688 case UNGE:
20689 /* These are supported directly. */
20690 break;
20691
20692 case EQ:
20693 case NE:
20694 case UNORDERED:
20695 case ORDERED:
20696 /* AVX has 3 operand comparisons, no need to swap anything. */
20697 if (TARGET_AVX)
20698 break;
20699 /* For commutative operators, try to canonicalize the destination
20700 operand to be first in the comparison - this helps reload to
20701 avoid extra moves. */
20702 if (!dest || !rtx_equal_p (dest, *pop1))
20703 break;
20704 /* FALLTHRU */
20705
20706 case GE:
20707 case GT:
20708 case UNLE:
20709 case UNLT:
20710 /* These are not supported directly before AVX, and furthermore
20711 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20712 comparison operands to transform into something that is
20713 supported. */
20714 tmp = *pop0;
20715 *pop0 = *pop1;
20716 *pop1 = tmp;
20717 code = swap_condition (code);
20718 break;
20719
20720 default:
20721 gcc_unreachable ();
20722 }
20723
20724 return code;
20725 }
20726
20727 /* Detect conditional moves that exactly match min/max operational
20728 semantics. Note that this is IEEE safe, as long as we don't
20729 interchange the operands.
20730
20731 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20732 and TRUE if the operation is successful and instructions are emitted. */
20733
20734 static bool
20735 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20736 rtx cmp_op1, rtx if_true, rtx if_false)
20737 {
20738 enum machine_mode mode;
20739 bool is_min;
20740 rtx tmp;
20741
20742 if (code == LT)
20743 ;
20744 else if (code == UNGE)
20745 {
20746 tmp = if_true;
20747 if_true = if_false;
20748 if_false = tmp;
20749 }
20750 else
20751 return false;
20752
20753 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20754 is_min = true;
20755 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20756 is_min = false;
20757 else
20758 return false;
20759
20760 mode = GET_MODE (dest);
20761
20762 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20763 but MODE may be a vector mode and thus not appropriate. */
20764 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20765 {
20766 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20767 rtvec v;
20768
20769 if_true = force_reg (mode, if_true);
20770 v = gen_rtvec (2, if_true, if_false);
20771 tmp = gen_rtx_UNSPEC (mode, v, u);
20772 }
20773 else
20774 {
20775 code = is_min ? SMIN : SMAX;
20776 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20777 }
20778
20779 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20780 return true;
20781 }
20782
20783 /* Expand an sse vector comparison. Return the register with the result. */
20784
20785 static rtx
20786 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20787 rtx op_true, rtx op_false)
20788 {
20789 enum machine_mode mode = GET_MODE (dest);
20790 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20791
20792 /* In general case result of comparison can differ from operands' type. */
20793 enum machine_mode cmp_mode;
20794
20795 /* In AVX512F the result of comparison is an integer mask. */
20796 bool maskcmp = false;
20797 rtx x;
20798
20799 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20800 {
20801 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20802 gcc_assert (cmp_mode != BLKmode);
20803
20804 maskcmp = true;
20805 }
20806 else
20807 cmp_mode = cmp_ops_mode;
20808
20809
20810 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20811 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20812 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20813
20814 if (optimize
20815 || reg_overlap_mentioned_p (dest, op_true)
20816 || reg_overlap_mentioned_p (dest, op_false))
20817 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20818
20819 /* Compare patterns for int modes are unspec in AVX512F only. */
20820 if (maskcmp && (code == GT || code == EQ))
20821 {
20822 rtx (*gen)(rtx, rtx, rtx);
20823
20824 switch (cmp_ops_mode)
20825 {
20826 case V16SImode:
20827 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20828 break;
20829 case V8DImode:
20830 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20831 break;
20832 default:
20833 gen = NULL;
20834 }
20835
20836 if (gen)
20837 {
20838 emit_insn (gen (dest, cmp_op0, cmp_op1));
20839 return dest;
20840 }
20841 }
20842 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20843
20844 if (cmp_mode != mode && !maskcmp)
20845 {
20846 x = force_reg (cmp_ops_mode, x);
20847 convert_move (dest, x, false);
20848 }
20849 else
20850 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20851
20852 return dest;
20853 }
20854
20855 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20856 operations. This is used for both scalar and vector conditional moves. */
20857
20858 static void
20859 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20860 {
20861 enum machine_mode mode = GET_MODE (dest);
20862 enum machine_mode cmpmode = GET_MODE (cmp);
20863
20864 /* In AVX512F the result of comparison is an integer mask. */
20865 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20866
20867 rtx t2, t3, x;
20868
20869 if (vector_all_ones_operand (op_true, mode)
20870 && rtx_equal_p (op_false, CONST0_RTX (mode))
20871 && !maskcmp)
20872 {
20873 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20874 }
20875 else if (op_false == CONST0_RTX (mode)
20876 && !maskcmp)
20877 {
20878 op_true = force_reg (mode, op_true);
20879 x = gen_rtx_AND (mode, cmp, op_true);
20880 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20881 }
20882 else if (op_true == CONST0_RTX (mode)
20883 && !maskcmp)
20884 {
20885 op_false = force_reg (mode, op_false);
20886 x = gen_rtx_NOT (mode, cmp);
20887 x = gen_rtx_AND (mode, x, op_false);
20888 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20889 }
20890 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20891 && !maskcmp)
20892 {
20893 op_false = force_reg (mode, op_false);
20894 x = gen_rtx_IOR (mode, cmp, op_false);
20895 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20896 }
20897 else if (TARGET_XOP
20898 && !maskcmp)
20899 {
20900 op_true = force_reg (mode, op_true);
20901
20902 if (!nonimmediate_operand (op_false, mode))
20903 op_false = force_reg (mode, op_false);
20904
20905 emit_insn (gen_rtx_SET (mode, dest,
20906 gen_rtx_IF_THEN_ELSE (mode, cmp,
20907 op_true,
20908 op_false)));
20909 }
20910 else
20911 {
20912 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20913 rtx d = dest;
20914
20915 if (!nonimmediate_operand (op_true, mode))
20916 op_true = force_reg (mode, op_true);
20917
20918 op_false = force_reg (mode, op_false);
20919
20920 switch (mode)
20921 {
20922 case V4SFmode:
20923 if (TARGET_SSE4_1)
20924 gen = gen_sse4_1_blendvps;
20925 break;
20926 case V2DFmode:
20927 if (TARGET_SSE4_1)
20928 gen = gen_sse4_1_blendvpd;
20929 break;
20930 case V16QImode:
20931 case V8HImode:
20932 case V4SImode:
20933 case V2DImode:
20934 if (TARGET_SSE4_1)
20935 {
20936 gen = gen_sse4_1_pblendvb;
20937 if (mode != V16QImode)
20938 d = gen_reg_rtx (V16QImode);
20939 op_false = gen_lowpart (V16QImode, op_false);
20940 op_true = gen_lowpart (V16QImode, op_true);
20941 cmp = gen_lowpart (V16QImode, cmp);
20942 }
20943 break;
20944 case V8SFmode:
20945 if (TARGET_AVX)
20946 gen = gen_avx_blendvps256;
20947 break;
20948 case V4DFmode:
20949 if (TARGET_AVX)
20950 gen = gen_avx_blendvpd256;
20951 break;
20952 case V32QImode:
20953 case V16HImode:
20954 case V8SImode:
20955 case V4DImode:
20956 if (TARGET_AVX2)
20957 {
20958 gen = gen_avx2_pblendvb;
20959 if (mode != V32QImode)
20960 d = gen_reg_rtx (V32QImode);
20961 op_false = gen_lowpart (V32QImode, op_false);
20962 op_true = gen_lowpart (V32QImode, op_true);
20963 cmp = gen_lowpart (V32QImode, cmp);
20964 }
20965 break;
20966
20967 case V16SImode:
20968 gen = gen_avx512f_blendmv16si;
20969 break;
20970 case V8DImode:
20971 gen = gen_avx512f_blendmv8di;
20972 break;
20973 case V8DFmode:
20974 gen = gen_avx512f_blendmv8df;
20975 break;
20976 case V16SFmode:
20977 gen = gen_avx512f_blendmv16sf;
20978 break;
20979
20980 default:
20981 break;
20982 }
20983
20984 if (gen != NULL)
20985 {
20986 emit_insn (gen (d, op_false, op_true, cmp));
20987 if (d != dest)
20988 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
20989 }
20990 else
20991 {
20992 op_true = force_reg (mode, op_true);
20993
20994 t2 = gen_reg_rtx (mode);
20995 if (optimize)
20996 t3 = gen_reg_rtx (mode);
20997 else
20998 t3 = dest;
20999
21000 x = gen_rtx_AND (mode, op_true, cmp);
21001 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21002
21003 x = gen_rtx_NOT (mode, cmp);
21004 x = gen_rtx_AND (mode, x, op_false);
21005 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21006
21007 x = gen_rtx_IOR (mode, t3, t2);
21008 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21009 }
21010 }
21011 }
21012
21013 /* Expand a floating-point conditional move. Return true if successful. */
21014
21015 bool
21016 ix86_expand_fp_movcc (rtx operands[])
21017 {
21018 enum machine_mode mode = GET_MODE (operands[0]);
21019 enum rtx_code code = GET_CODE (operands[1]);
21020 rtx tmp, compare_op;
21021 rtx op0 = XEXP (operands[1], 0);
21022 rtx op1 = XEXP (operands[1], 1);
21023
21024 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21025 {
21026 enum machine_mode cmode;
21027
21028 /* Since we've no cmove for sse registers, don't force bad register
21029 allocation just to gain access to it. Deny movcc when the
21030 comparison mode doesn't match the move mode. */
21031 cmode = GET_MODE (op0);
21032 if (cmode == VOIDmode)
21033 cmode = GET_MODE (op1);
21034 if (cmode != mode)
21035 return false;
21036
21037 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21038 if (code == UNKNOWN)
21039 return false;
21040
21041 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21042 operands[2], operands[3]))
21043 return true;
21044
21045 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21046 operands[2], operands[3]);
21047 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21048 return true;
21049 }
21050
21051 if (GET_MODE (op0) == TImode
21052 || (GET_MODE (op0) == DImode
21053 && !TARGET_64BIT))
21054 return false;
21055
21056 /* The floating point conditional move instructions don't directly
21057 support conditions resulting from a signed integer comparison. */
21058
21059 compare_op = ix86_expand_compare (code, op0, op1);
21060 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21061 {
21062 tmp = gen_reg_rtx (QImode);
21063 ix86_expand_setcc (tmp, code, op0, op1);
21064
21065 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21066 }
21067
21068 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21069 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21070 operands[2], operands[3])));
21071
21072 return true;
21073 }
21074
21075 /* Expand a floating-point vector conditional move; a vcond operation
21076 rather than a movcc operation. */
21077
21078 bool
21079 ix86_expand_fp_vcond (rtx operands[])
21080 {
21081 enum rtx_code code = GET_CODE (operands[3]);
21082 rtx cmp;
21083
21084 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21085 &operands[4], &operands[5]);
21086 if (code == UNKNOWN)
21087 {
21088 rtx temp;
21089 switch (GET_CODE (operands[3]))
21090 {
21091 case LTGT:
21092 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21093 operands[5], operands[0], operands[0]);
21094 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21095 operands[5], operands[1], operands[2]);
21096 code = AND;
21097 break;
21098 case UNEQ:
21099 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21100 operands[5], operands[0], operands[0]);
21101 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21102 operands[5], operands[1], operands[2]);
21103 code = IOR;
21104 break;
21105 default:
21106 gcc_unreachable ();
21107 }
21108 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21109 OPTAB_DIRECT);
21110 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21111 return true;
21112 }
21113
21114 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21115 operands[5], operands[1], operands[2]))
21116 return true;
21117
21118 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21119 operands[1], operands[2]);
21120 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21121 return true;
21122 }
21123
21124 /* Expand a signed/unsigned integral vector conditional move. */
21125
21126 bool
21127 ix86_expand_int_vcond (rtx operands[])
21128 {
21129 enum machine_mode data_mode = GET_MODE (operands[0]);
21130 enum machine_mode mode = GET_MODE (operands[4]);
21131 enum rtx_code code = GET_CODE (operands[3]);
21132 bool negate = false;
21133 rtx x, cop0, cop1;
21134
21135 cop0 = operands[4];
21136 cop1 = operands[5];
21137
21138 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21139 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21140 if ((code == LT || code == GE)
21141 && data_mode == mode
21142 && cop1 == CONST0_RTX (mode)
21143 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21144 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21145 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21146 && (GET_MODE_SIZE (data_mode) == 16
21147 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21148 {
21149 rtx negop = operands[2 - (code == LT)];
21150 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21151 if (negop == CONST1_RTX (data_mode))
21152 {
21153 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21154 operands[0], 1, OPTAB_DIRECT);
21155 if (res != operands[0])
21156 emit_move_insn (operands[0], res);
21157 return true;
21158 }
21159 else if (GET_MODE_INNER (data_mode) != DImode
21160 && vector_all_ones_operand (negop, data_mode))
21161 {
21162 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21163 operands[0], 0, OPTAB_DIRECT);
21164 if (res != operands[0])
21165 emit_move_insn (operands[0], res);
21166 return true;
21167 }
21168 }
21169
21170 if (!nonimmediate_operand (cop1, mode))
21171 cop1 = force_reg (mode, cop1);
21172 if (!general_operand (operands[1], data_mode))
21173 operands[1] = force_reg (data_mode, operands[1]);
21174 if (!general_operand (operands[2], data_mode))
21175 operands[2] = force_reg (data_mode, operands[2]);
21176
21177 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21178 if (TARGET_XOP
21179 && (mode == V16QImode || mode == V8HImode
21180 || mode == V4SImode || mode == V2DImode))
21181 ;
21182 else
21183 {
21184 /* Canonicalize the comparison to EQ, GT, GTU. */
21185 switch (code)
21186 {
21187 case EQ:
21188 case GT:
21189 case GTU:
21190 break;
21191
21192 case NE:
21193 case LE:
21194 case LEU:
21195 code = reverse_condition (code);
21196 negate = true;
21197 break;
21198
21199 case GE:
21200 case GEU:
21201 code = reverse_condition (code);
21202 negate = true;
21203 /* FALLTHRU */
21204
21205 case LT:
21206 case LTU:
21207 code = swap_condition (code);
21208 x = cop0, cop0 = cop1, cop1 = x;
21209 break;
21210
21211 default:
21212 gcc_unreachable ();
21213 }
21214
21215 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21216 if (mode == V2DImode)
21217 {
21218 switch (code)
21219 {
21220 case EQ:
21221 /* SSE4.1 supports EQ. */
21222 if (!TARGET_SSE4_1)
21223 return false;
21224 break;
21225
21226 case GT:
21227 case GTU:
21228 /* SSE4.2 supports GT/GTU. */
21229 if (!TARGET_SSE4_2)
21230 return false;
21231 break;
21232
21233 default:
21234 gcc_unreachable ();
21235 }
21236 }
21237
21238 /* Unsigned parallel compare is not supported by the hardware.
21239 Play some tricks to turn this into a signed comparison
21240 against 0. */
21241 if (code == GTU)
21242 {
21243 cop0 = force_reg (mode, cop0);
21244
21245 switch (mode)
21246 {
21247 case V16SImode:
21248 case V8DImode:
21249 case V8SImode:
21250 case V4DImode:
21251 case V4SImode:
21252 case V2DImode:
21253 {
21254 rtx t1, t2, mask;
21255 rtx (*gen_sub3) (rtx, rtx, rtx);
21256
21257 switch (mode)
21258 {
21259 case V16SImode: gen_sub3 = gen_subv16si3; break;
21260 case V8DImode: gen_sub3 = gen_subv8di3; break;
21261 case V8SImode: gen_sub3 = gen_subv8si3; break;
21262 case V4DImode: gen_sub3 = gen_subv4di3; break;
21263 case V4SImode: gen_sub3 = gen_subv4si3; break;
21264 case V2DImode: gen_sub3 = gen_subv2di3; break;
21265 default:
21266 gcc_unreachable ();
21267 }
21268 /* Subtract (-(INT MAX) - 1) from both operands to make
21269 them signed. */
21270 mask = ix86_build_signbit_mask (mode, true, false);
21271 t1 = gen_reg_rtx (mode);
21272 emit_insn (gen_sub3 (t1, cop0, mask));
21273
21274 t2 = gen_reg_rtx (mode);
21275 emit_insn (gen_sub3 (t2, cop1, mask));
21276
21277 cop0 = t1;
21278 cop1 = t2;
21279 code = GT;
21280 }
21281 break;
21282
21283 case V32QImode:
21284 case V16HImode:
21285 case V16QImode:
21286 case V8HImode:
21287 /* Perform a parallel unsigned saturating subtraction. */
21288 x = gen_reg_rtx (mode);
21289 emit_insn (gen_rtx_SET (VOIDmode, x,
21290 gen_rtx_US_MINUS (mode, cop0, cop1)));
21291
21292 cop0 = x;
21293 cop1 = CONST0_RTX (mode);
21294 code = EQ;
21295 negate = !negate;
21296 break;
21297
21298 default:
21299 gcc_unreachable ();
21300 }
21301 }
21302 }
21303
21304 /* Allow the comparison to be done in one mode, but the movcc to
21305 happen in another mode. */
21306 if (data_mode == mode)
21307 {
21308 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21309 operands[1+negate], operands[2-negate]);
21310 }
21311 else
21312 {
21313 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21314 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21315 operands[1+negate], operands[2-negate]);
21316 if (GET_MODE (x) == mode)
21317 x = gen_lowpart (data_mode, x);
21318 }
21319
21320 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21321 operands[2-negate]);
21322 return true;
21323 }
21324
21325 static bool
21326 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21327 {
21328 enum machine_mode mode = GET_MODE (op0);
21329 switch (mode)
21330 {
21331 case V16SImode:
21332 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21333 force_reg (V16SImode, mask),
21334 op1));
21335 return true;
21336 case V16SFmode:
21337 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21338 force_reg (V16SImode, mask),
21339 op1));
21340 return true;
21341 case V8DImode:
21342 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21343 force_reg (V8DImode, mask), op1));
21344 return true;
21345 case V8DFmode:
21346 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21347 force_reg (V8DImode, mask), op1));
21348 return true;
21349 default:
21350 return false;
21351 }
21352 }
21353
21354 /* Expand a variable vector permutation. */
21355
21356 void
21357 ix86_expand_vec_perm (rtx operands[])
21358 {
21359 rtx target = operands[0];
21360 rtx op0 = operands[1];
21361 rtx op1 = operands[2];
21362 rtx mask = operands[3];
21363 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21364 enum machine_mode mode = GET_MODE (op0);
21365 enum machine_mode maskmode = GET_MODE (mask);
21366 int w, e, i;
21367 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21368
21369 /* Number of elements in the vector. */
21370 w = GET_MODE_NUNITS (mode);
21371 e = GET_MODE_UNIT_SIZE (mode);
21372 gcc_assert (w <= 64);
21373
21374 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21375 return;
21376
21377 if (TARGET_AVX2)
21378 {
21379 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21380 {
21381 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21382 an constant shuffle operand. With a tiny bit of effort we can
21383 use VPERMD instead. A re-interpretation stall for V4DFmode is
21384 unfortunate but there's no avoiding it.
21385 Similarly for V16HImode we don't have instructions for variable
21386 shuffling, while for V32QImode we can use after preparing suitable
21387 masks vpshufb; vpshufb; vpermq; vpor. */
21388
21389 if (mode == V16HImode)
21390 {
21391 maskmode = mode = V32QImode;
21392 w = 32;
21393 e = 1;
21394 }
21395 else
21396 {
21397 maskmode = mode = V8SImode;
21398 w = 8;
21399 e = 4;
21400 }
21401 t1 = gen_reg_rtx (maskmode);
21402
21403 /* Replicate the low bits of the V4DImode mask into V8SImode:
21404 mask = { A B C D }
21405 t1 = { A A B B C C D D }. */
21406 for (i = 0; i < w / 2; ++i)
21407 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21408 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21409 vt = force_reg (maskmode, vt);
21410 mask = gen_lowpart (maskmode, mask);
21411 if (maskmode == V8SImode)
21412 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21413 else
21414 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21415
21416 /* Multiply the shuffle indicies by two. */
21417 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21418 OPTAB_DIRECT);
21419
21420 /* Add one to the odd shuffle indicies:
21421 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21422 for (i = 0; i < w / 2; ++i)
21423 {
21424 vec[i * 2] = const0_rtx;
21425 vec[i * 2 + 1] = const1_rtx;
21426 }
21427 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21428 vt = validize_mem (force_const_mem (maskmode, vt));
21429 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21430 OPTAB_DIRECT);
21431
21432 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21433 operands[3] = mask = t1;
21434 target = gen_reg_rtx (mode);
21435 op0 = gen_lowpart (mode, op0);
21436 op1 = gen_lowpart (mode, op1);
21437 }
21438
21439 switch (mode)
21440 {
21441 case V8SImode:
21442 /* The VPERMD and VPERMPS instructions already properly ignore
21443 the high bits of the shuffle elements. No need for us to
21444 perform an AND ourselves. */
21445 if (one_operand_shuffle)
21446 {
21447 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21448 if (target != operands[0])
21449 emit_move_insn (operands[0],
21450 gen_lowpart (GET_MODE (operands[0]), target));
21451 }
21452 else
21453 {
21454 t1 = gen_reg_rtx (V8SImode);
21455 t2 = gen_reg_rtx (V8SImode);
21456 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21457 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21458 goto merge_two;
21459 }
21460 return;
21461
21462 case V8SFmode:
21463 mask = gen_lowpart (V8SImode, mask);
21464 if (one_operand_shuffle)
21465 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21466 else
21467 {
21468 t1 = gen_reg_rtx (V8SFmode);
21469 t2 = gen_reg_rtx (V8SFmode);
21470 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21471 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21472 goto merge_two;
21473 }
21474 return;
21475
21476 case V4SImode:
21477 /* By combining the two 128-bit input vectors into one 256-bit
21478 input vector, we can use VPERMD and VPERMPS for the full
21479 two-operand shuffle. */
21480 t1 = gen_reg_rtx (V8SImode);
21481 t2 = gen_reg_rtx (V8SImode);
21482 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21483 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21484 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21485 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21486 return;
21487
21488 case V4SFmode:
21489 t1 = gen_reg_rtx (V8SFmode);
21490 t2 = gen_reg_rtx (V8SImode);
21491 mask = gen_lowpart (V4SImode, mask);
21492 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21493 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21494 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21495 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21496 return;
21497
21498 case V32QImode:
21499 t1 = gen_reg_rtx (V32QImode);
21500 t2 = gen_reg_rtx (V32QImode);
21501 t3 = gen_reg_rtx (V32QImode);
21502 vt2 = GEN_INT (128);
21503 for (i = 0; i < 32; i++)
21504 vec[i] = vt2;
21505 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21506 vt = force_reg (V32QImode, vt);
21507 for (i = 0; i < 32; i++)
21508 vec[i] = i < 16 ? vt2 : const0_rtx;
21509 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21510 vt2 = force_reg (V32QImode, vt2);
21511 /* From mask create two adjusted masks, which contain the same
21512 bits as mask in the low 7 bits of each vector element.
21513 The first mask will have the most significant bit clear
21514 if it requests element from the same 128-bit lane
21515 and MSB set if it requests element from the other 128-bit lane.
21516 The second mask will have the opposite values of the MSB,
21517 and additionally will have its 128-bit lanes swapped.
21518 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21519 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21520 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21521 stands for other 12 bytes. */
21522 /* The bit whether element is from the same lane or the other
21523 lane is bit 4, so shift it up by 3 to the MSB position. */
21524 t5 = gen_reg_rtx (V4DImode);
21525 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21526 GEN_INT (3)));
21527 /* Clear MSB bits from the mask just in case it had them set. */
21528 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21529 /* After this t1 will have MSB set for elements from other lane. */
21530 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21531 /* Clear bits other than MSB. */
21532 emit_insn (gen_andv32qi3 (t1, t1, vt));
21533 /* Or in the lower bits from mask into t3. */
21534 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21535 /* And invert MSB bits in t1, so MSB is set for elements from the same
21536 lane. */
21537 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21538 /* Swap 128-bit lanes in t3. */
21539 t6 = gen_reg_rtx (V4DImode);
21540 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21541 const2_rtx, GEN_INT (3),
21542 const0_rtx, const1_rtx));
21543 /* And or in the lower bits from mask into t1. */
21544 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21545 if (one_operand_shuffle)
21546 {
21547 /* Each of these shuffles will put 0s in places where
21548 element from the other 128-bit lane is needed, otherwise
21549 will shuffle in the requested value. */
21550 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21551 gen_lowpart (V32QImode, t6)));
21552 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21553 /* For t3 the 128-bit lanes are swapped again. */
21554 t7 = gen_reg_rtx (V4DImode);
21555 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21556 const2_rtx, GEN_INT (3),
21557 const0_rtx, const1_rtx));
21558 /* And oring both together leads to the result. */
21559 emit_insn (gen_iorv32qi3 (target, t1,
21560 gen_lowpart (V32QImode, t7)));
21561 if (target != operands[0])
21562 emit_move_insn (operands[0],
21563 gen_lowpart (GET_MODE (operands[0]), target));
21564 return;
21565 }
21566
21567 t4 = gen_reg_rtx (V32QImode);
21568 /* Similarly to the above one_operand_shuffle code,
21569 just for repeated twice for each operand. merge_two:
21570 code will merge the two results together. */
21571 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21572 gen_lowpart (V32QImode, t6)));
21573 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21574 gen_lowpart (V32QImode, t6)));
21575 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21576 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21577 t7 = gen_reg_rtx (V4DImode);
21578 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21579 const2_rtx, GEN_INT (3),
21580 const0_rtx, const1_rtx));
21581 t8 = gen_reg_rtx (V4DImode);
21582 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21583 const2_rtx, GEN_INT (3),
21584 const0_rtx, const1_rtx));
21585 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21586 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21587 t1 = t4;
21588 t2 = t3;
21589 goto merge_two;
21590
21591 default:
21592 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21593 break;
21594 }
21595 }
21596
21597 if (TARGET_XOP)
21598 {
21599 /* The XOP VPPERM insn supports three inputs. By ignoring the
21600 one_operand_shuffle special case, we avoid creating another
21601 set of constant vectors in memory. */
21602 one_operand_shuffle = false;
21603
21604 /* mask = mask & {2*w-1, ...} */
21605 vt = GEN_INT (2*w - 1);
21606 }
21607 else
21608 {
21609 /* mask = mask & {w-1, ...} */
21610 vt = GEN_INT (w - 1);
21611 }
21612
21613 for (i = 0; i < w; i++)
21614 vec[i] = vt;
21615 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21616 mask = expand_simple_binop (maskmode, AND, mask, vt,
21617 NULL_RTX, 0, OPTAB_DIRECT);
21618
21619 /* For non-QImode operations, convert the word permutation control
21620 into a byte permutation control. */
21621 if (mode != V16QImode)
21622 {
21623 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21624 GEN_INT (exact_log2 (e)),
21625 NULL_RTX, 0, OPTAB_DIRECT);
21626
21627 /* Convert mask to vector of chars. */
21628 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21629
21630 /* Replicate each of the input bytes into byte positions:
21631 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21632 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21633 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21634 for (i = 0; i < 16; ++i)
21635 vec[i] = GEN_INT (i/e * e);
21636 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21637 vt = validize_mem (force_const_mem (V16QImode, vt));
21638 if (TARGET_XOP)
21639 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21640 else
21641 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21642
21643 /* Convert it into the byte positions by doing
21644 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21645 for (i = 0; i < 16; ++i)
21646 vec[i] = GEN_INT (i % e);
21647 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21648 vt = validize_mem (force_const_mem (V16QImode, vt));
21649 emit_insn (gen_addv16qi3 (mask, mask, vt));
21650 }
21651
21652 /* The actual shuffle operations all operate on V16QImode. */
21653 op0 = gen_lowpart (V16QImode, op0);
21654 op1 = gen_lowpart (V16QImode, op1);
21655
21656 if (TARGET_XOP)
21657 {
21658 if (GET_MODE (target) != V16QImode)
21659 target = gen_reg_rtx (V16QImode);
21660 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21661 if (target != operands[0])
21662 emit_move_insn (operands[0],
21663 gen_lowpart (GET_MODE (operands[0]), target));
21664 }
21665 else if (one_operand_shuffle)
21666 {
21667 if (GET_MODE (target) != V16QImode)
21668 target = gen_reg_rtx (V16QImode);
21669 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21670 if (target != operands[0])
21671 emit_move_insn (operands[0],
21672 gen_lowpart (GET_MODE (operands[0]), target));
21673 }
21674 else
21675 {
21676 rtx xops[6];
21677 bool ok;
21678
21679 /* Shuffle the two input vectors independently. */
21680 t1 = gen_reg_rtx (V16QImode);
21681 t2 = gen_reg_rtx (V16QImode);
21682 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21683 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21684
21685 merge_two:
21686 /* Then merge them together. The key is whether any given control
21687 element contained a bit set that indicates the second word. */
21688 mask = operands[3];
21689 vt = GEN_INT (w);
21690 if (maskmode == V2DImode && !TARGET_SSE4_1)
21691 {
21692 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21693 more shuffle to convert the V2DI input mask into a V4SI
21694 input mask. At which point the masking that expand_int_vcond
21695 will work as desired. */
21696 rtx t3 = gen_reg_rtx (V4SImode);
21697 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21698 const0_rtx, const0_rtx,
21699 const2_rtx, const2_rtx));
21700 mask = t3;
21701 maskmode = V4SImode;
21702 e = w = 4;
21703 }
21704
21705 for (i = 0; i < w; i++)
21706 vec[i] = vt;
21707 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21708 vt = force_reg (maskmode, vt);
21709 mask = expand_simple_binop (maskmode, AND, mask, vt,
21710 NULL_RTX, 0, OPTAB_DIRECT);
21711
21712 if (GET_MODE (target) != mode)
21713 target = gen_reg_rtx (mode);
21714 xops[0] = target;
21715 xops[1] = gen_lowpart (mode, t2);
21716 xops[2] = gen_lowpart (mode, t1);
21717 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21718 xops[4] = mask;
21719 xops[5] = vt;
21720 ok = ix86_expand_int_vcond (xops);
21721 gcc_assert (ok);
21722 if (target != operands[0])
21723 emit_move_insn (operands[0],
21724 gen_lowpart (GET_MODE (operands[0]), target));
21725 }
21726 }
21727
21728 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21729 true if we should do zero extension, else sign extension. HIGH_P is
21730 true if we want the N/2 high elements, else the low elements. */
21731
21732 void
21733 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21734 {
21735 enum machine_mode imode = GET_MODE (src);
21736 rtx tmp;
21737
21738 if (TARGET_SSE4_1)
21739 {
21740 rtx (*unpack)(rtx, rtx);
21741 rtx (*extract)(rtx, rtx) = NULL;
21742 enum machine_mode halfmode = BLKmode;
21743
21744 switch (imode)
21745 {
21746 case V32QImode:
21747 if (unsigned_p)
21748 unpack = gen_avx2_zero_extendv16qiv16hi2;
21749 else
21750 unpack = gen_avx2_sign_extendv16qiv16hi2;
21751 halfmode = V16QImode;
21752 extract
21753 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21754 break;
21755 case V32HImode:
21756 if (unsigned_p)
21757 unpack = gen_avx512f_zero_extendv16hiv16si2;
21758 else
21759 unpack = gen_avx512f_sign_extendv16hiv16si2;
21760 halfmode = V16HImode;
21761 extract
21762 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21763 break;
21764 case V16HImode:
21765 if (unsigned_p)
21766 unpack = gen_avx2_zero_extendv8hiv8si2;
21767 else
21768 unpack = gen_avx2_sign_extendv8hiv8si2;
21769 halfmode = V8HImode;
21770 extract
21771 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21772 break;
21773 case V16SImode:
21774 if (unsigned_p)
21775 unpack = gen_avx512f_zero_extendv8siv8di2;
21776 else
21777 unpack = gen_avx512f_sign_extendv8siv8di2;
21778 halfmode = V8SImode;
21779 extract
21780 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21781 break;
21782 case V8SImode:
21783 if (unsigned_p)
21784 unpack = gen_avx2_zero_extendv4siv4di2;
21785 else
21786 unpack = gen_avx2_sign_extendv4siv4di2;
21787 halfmode = V4SImode;
21788 extract
21789 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21790 break;
21791 case V16QImode:
21792 if (unsigned_p)
21793 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21794 else
21795 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21796 break;
21797 case V8HImode:
21798 if (unsigned_p)
21799 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21800 else
21801 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21802 break;
21803 case V4SImode:
21804 if (unsigned_p)
21805 unpack = gen_sse4_1_zero_extendv2siv2di2;
21806 else
21807 unpack = gen_sse4_1_sign_extendv2siv2di2;
21808 break;
21809 default:
21810 gcc_unreachable ();
21811 }
21812
21813 if (GET_MODE_SIZE (imode) >= 32)
21814 {
21815 tmp = gen_reg_rtx (halfmode);
21816 emit_insn (extract (tmp, src));
21817 }
21818 else if (high_p)
21819 {
21820 /* Shift higher 8 bytes to lower 8 bytes. */
21821 tmp = gen_reg_rtx (V1TImode);
21822 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21823 GEN_INT (64)));
21824 tmp = gen_lowpart (imode, tmp);
21825 }
21826 else
21827 tmp = src;
21828
21829 emit_insn (unpack (dest, tmp));
21830 }
21831 else
21832 {
21833 rtx (*unpack)(rtx, rtx, rtx);
21834
21835 switch (imode)
21836 {
21837 case V16QImode:
21838 if (high_p)
21839 unpack = gen_vec_interleave_highv16qi;
21840 else
21841 unpack = gen_vec_interleave_lowv16qi;
21842 break;
21843 case V8HImode:
21844 if (high_p)
21845 unpack = gen_vec_interleave_highv8hi;
21846 else
21847 unpack = gen_vec_interleave_lowv8hi;
21848 break;
21849 case V4SImode:
21850 if (high_p)
21851 unpack = gen_vec_interleave_highv4si;
21852 else
21853 unpack = gen_vec_interleave_lowv4si;
21854 break;
21855 default:
21856 gcc_unreachable ();
21857 }
21858
21859 if (unsigned_p)
21860 tmp = force_reg (imode, CONST0_RTX (imode));
21861 else
21862 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21863 src, pc_rtx, pc_rtx);
21864
21865 rtx tmp2 = gen_reg_rtx (imode);
21866 emit_insn (unpack (tmp2, src, tmp));
21867 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21868 }
21869 }
21870
21871 /* Expand conditional increment or decrement using adb/sbb instructions.
21872 The default case using setcc followed by the conditional move can be
21873 done by generic code. */
21874 bool
21875 ix86_expand_int_addcc (rtx operands[])
21876 {
21877 enum rtx_code code = GET_CODE (operands[1]);
21878 rtx flags;
21879 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21880 rtx compare_op;
21881 rtx val = const0_rtx;
21882 bool fpcmp = false;
21883 enum machine_mode mode;
21884 rtx op0 = XEXP (operands[1], 0);
21885 rtx op1 = XEXP (operands[1], 1);
21886
21887 if (operands[3] != const1_rtx
21888 && operands[3] != constm1_rtx)
21889 return false;
21890 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21891 return false;
21892 code = GET_CODE (compare_op);
21893
21894 flags = XEXP (compare_op, 0);
21895
21896 if (GET_MODE (flags) == CCFPmode
21897 || GET_MODE (flags) == CCFPUmode)
21898 {
21899 fpcmp = true;
21900 code = ix86_fp_compare_code_to_integer (code);
21901 }
21902
21903 if (code != LTU)
21904 {
21905 val = constm1_rtx;
21906 if (fpcmp)
21907 PUT_CODE (compare_op,
21908 reverse_condition_maybe_unordered
21909 (GET_CODE (compare_op)));
21910 else
21911 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21912 }
21913
21914 mode = GET_MODE (operands[0]);
21915
21916 /* Construct either adc or sbb insn. */
21917 if ((code == LTU) == (operands[3] == constm1_rtx))
21918 {
21919 switch (mode)
21920 {
21921 case QImode:
21922 insn = gen_subqi3_carry;
21923 break;
21924 case HImode:
21925 insn = gen_subhi3_carry;
21926 break;
21927 case SImode:
21928 insn = gen_subsi3_carry;
21929 break;
21930 case DImode:
21931 insn = gen_subdi3_carry;
21932 break;
21933 default:
21934 gcc_unreachable ();
21935 }
21936 }
21937 else
21938 {
21939 switch (mode)
21940 {
21941 case QImode:
21942 insn = gen_addqi3_carry;
21943 break;
21944 case HImode:
21945 insn = gen_addhi3_carry;
21946 break;
21947 case SImode:
21948 insn = gen_addsi3_carry;
21949 break;
21950 case DImode:
21951 insn = gen_adddi3_carry;
21952 break;
21953 default:
21954 gcc_unreachable ();
21955 }
21956 }
21957 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
21958
21959 return true;
21960 }
21961
21962
21963 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21964 but works for floating pointer parameters and nonoffsetable memories.
21965 For pushes, it returns just stack offsets; the values will be saved
21966 in the right order. Maximally three parts are generated. */
21967
21968 static int
21969 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
21970 {
21971 int size;
21972
21973 if (!TARGET_64BIT)
21974 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
21975 else
21976 size = (GET_MODE_SIZE (mode) + 4) / 8;
21977
21978 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
21979 gcc_assert (size >= 2 && size <= 4);
21980
21981 /* Optimize constant pool reference to immediates. This is used by fp
21982 moves, that force all constants to memory to allow combining. */
21983 if (MEM_P (operand) && MEM_READONLY_P (operand))
21984 {
21985 rtx tmp = maybe_get_pool_constant (operand);
21986 if (tmp)
21987 operand = tmp;
21988 }
21989
21990 if (MEM_P (operand) && !offsettable_memref_p (operand))
21991 {
21992 /* The only non-offsetable memories we handle are pushes. */
21993 int ok = push_operand (operand, VOIDmode);
21994
21995 gcc_assert (ok);
21996
21997 operand = copy_rtx (operand);
21998 PUT_MODE (operand, word_mode);
21999 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22000 return size;
22001 }
22002
22003 if (GET_CODE (operand) == CONST_VECTOR)
22004 {
22005 enum machine_mode imode = int_mode_for_mode (mode);
22006 /* Caution: if we looked through a constant pool memory above,
22007 the operand may actually have a different mode now. That's
22008 ok, since we want to pun this all the way back to an integer. */
22009 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22010 gcc_assert (operand != NULL);
22011 mode = imode;
22012 }
22013
22014 if (!TARGET_64BIT)
22015 {
22016 if (mode == DImode)
22017 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22018 else
22019 {
22020 int i;
22021
22022 if (REG_P (operand))
22023 {
22024 gcc_assert (reload_completed);
22025 for (i = 0; i < size; i++)
22026 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22027 }
22028 else if (offsettable_memref_p (operand))
22029 {
22030 operand = adjust_address (operand, SImode, 0);
22031 parts[0] = operand;
22032 for (i = 1; i < size; i++)
22033 parts[i] = adjust_address (operand, SImode, 4 * i);
22034 }
22035 else if (GET_CODE (operand) == CONST_DOUBLE)
22036 {
22037 REAL_VALUE_TYPE r;
22038 long l[4];
22039
22040 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22041 switch (mode)
22042 {
22043 case TFmode:
22044 real_to_target (l, &r, mode);
22045 parts[3] = gen_int_mode (l[3], SImode);
22046 parts[2] = gen_int_mode (l[2], SImode);
22047 break;
22048 case XFmode:
22049 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22050 long double may not be 80-bit. */
22051 real_to_target (l, &r, mode);
22052 parts[2] = gen_int_mode (l[2], SImode);
22053 break;
22054 case DFmode:
22055 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22056 break;
22057 default:
22058 gcc_unreachable ();
22059 }
22060 parts[1] = gen_int_mode (l[1], SImode);
22061 parts[0] = gen_int_mode (l[0], SImode);
22062 }
22063 else
22064 gcc_unreachable ();
22065 }
22066 }
22067 else
22068 {
22069 if (mode == TImode)
22070 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22071 if (mode == XFmode || mode == TFmode)
22072 {
22073 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22074 if (REG_P (operand))
22075 {
22076 gcc_assert (reload_completed);
22077 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22078 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22079 }
22080 else if (offsettable_memref_p (operand))
22081 {
22082 operand = adjust_address (operand, DImode, 0);
22083 parts[0] = operand;
22084 parts[1] = adjust_address (operand, upper_mode, 8);
22085 }
22086 else if (GET_CODE (operand) == CONST_DOUBLE)
22087 {
22088 REAL_VALUE_TYPE r;
22089 long l[4];
22090
22091 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22092 real_to_target (l, &r, mode);
22093
22094 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22095 if (HOST_BITS_PER_WIDE_INT >= 64)
22096 parts[0]
22097 = gen_int_mode
22098 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22099 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22100 DImode);
22101 else
22102 parts[0] = immed_double_const (l[0], l[1], DImode);
22103
22104 if (upper_mode == SImode)
22105 parts[1] = gen_int_mode (l[2], SImode);
22106 else if (HOST_BITS_PER_WIDE_INT >= 64)
22107 parts[1]
22108 = gen_int_mode
22109 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22110 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22111 DImode);
22112 else
22113 parts[1] = immed_double_const (l[2], l[3], DImode);
22114 }
22115 else
22116 gcc_unreachable ();
22117 }
22118 }
22119
22120 return size;
22121 }
22122
22123 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22124 Return false when normal moves are needed; true when all required
22125 insns have been emitted. Operands 2-4 contain the input values
22126 int the correct order; operands 5-7 contain the output values. */
22127
22128 void
22129 ix86_split_long_move (rtx operands[])
22130 {
22131 rtx part[2][4];
22132 int nparts, i, j;
22133 int push = 0;
22134 int collisions = 0;
22135 enum machine_mode mode = GET_MODE (operands[0]);
22136 bool collisionparts[4];
22137
22138 /* The DFmode expanders may ask us to move double.
22139 For 64bit target this is single move. By hiding the fact
22140 here we simplify i386.md splitters. */
22141 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22142 {
22143 /* Optimize constant pool reference to immediates. This is used by
22144 fp moves, that force all constants to memory to allow combining. */
22145
22146 if (MEM_P (operands[1])
22147 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22148 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22149 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22150 if (push_operand (operands[0], VOIDmode))
22151 {
22152 operands[0] = copy_rtx (operands[0]);
22153 PUT_MODE (operands[0], word_mode);
22154 }
22155 else
22156 operands[0] = gen_lowpart (DImode, operands[0]);
22157 operands[1] = gen_lowpart (DImode, operands[1]);
22158 emit_move_insn (operands[0], operands[1]);
22159 return;
22160 }
22161
22162 /* The only non-offsettable memory we handle is push. */
22163 if (push_operand (operands[0], VOIDmode))
22164 push = 1;
22165 else
22166 gcc_assert (!MEM_P (operands[0])
22167 || offsettable_memref_p (operands[0]));
22168
22169 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22170 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22171
22172 /* When emitting push, take care for source operands on the stack. */
22173 if (push && MEM_P (operands[1])
22174 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22175 {
22176 rtx src_base = XEXP (part[1][nparts - 1], 0);
22177
22178 /* Compensate for the stack decrement by 4. */
22179 if (!TARGET_64BIT && nparts == 3
22180 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22181 src_base = plus_constant (Pmode, src_base, 4);
22182
22183 /* src_base refers to the stack pointer and is
22184 automatically decreased by emitted push. */
22185 for (i = 0; i < nparts; i++)
22186 part[1][i] = change_address (part[1][i],
22187 GET_MODE (part[1][i]), src_base);
22188 }
22189
22190 /* We need to do copy in the right order in case an address register
22191 of the source overlaps the destination. */
22192 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22193 {
22194 rtx tmp;
22195
22196 for (i = 0; i < nparts; i++)
22197 {
22198 collisionparts[i]
22199 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22200 if (collisionparts[i])
22201 collisions++;
22202 }
22203
22204 /* Collision in the middle part can be handled by reordering. */
22205 if (collisions == 1 && nparts == 3 && collisionparts [1])
22206 {
22207 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22208 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22209 }
22210 else if (collisions == 1
22211 && nparts == 4
22212 && (collisionparts [1] || collisionparts [2]))
22213 {
22214 if (collisionparts [1])
22215 {
22216 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22217 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22218 }
22219 else
22220 {
22221 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22222 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22223 }
22224 }
22225
22226 /* If there are more collisions, we can't handle it by reordering.
22227 Do an lea to the last part and use only one colliding move. */
22228 else if (collisions > 1)
22229 {
22230 rtx base;
22231
22232 collisions = 1;
22233
22234 base = part[0][nparts - 1];
22235
22236 /* Handle the case when the last part isn't valid for lea.
22237 Happens in 64-bit mode storing the 12-byte XFmode. */
22238 if (GET_MODE (base) != Pmode)
22239 base = gen_rtx_REG (Pmode, REGNO (base));
22240
22241 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22242 part[1][0] = replace_equiv_address (part[1][0], base);
22243 for (i = 1; i < nparts; i++)
22244 {
22245 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22246 part[1][i] = replace_equiv_address (part[1][i], tmp);
22247 }
22248 }
22249 }
22250
22251 if (push)
22252 {
22253 if (!TARGET_64BIT)
22254 {
22255 if (nparts == 3)
22256 {
22257 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22258 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22259 stack_pointer_rtx, GEN_INT (-4)));
22260 emit_move_insn (part[0][2], part[1][2]);
22261 }
22262 else if (nparts == 4)
22263 {
22264 emit_move_insn (part[0][3], part[1][3]);
22265 emit_move_insn (part[0][2], part[1][2]);
22266 }
22267 }
22268 else
22269 {
22270 /* In 64bit mode we don't have 32bit push available. In case this is
22271 register, it is OK - we will just use larger counterpart. We also
22272 retype memory - these comes from attempt to avoid REX prefix on
22273 moving of second half of TFmode value. */
22274 if (GET_MODE (part[1][1]) == SImode)
22275 {
22276 switch (GET_CODE (part[1][1]))
22277 {
22278 case MEM:
22279 part[1][1] = adjust_address (part[1][1], DImode, 0);
22280 break;
22281
22282 case REG:
22283 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22284 break;
22285
22286 default:
22287 gcc_unreachable ();
22288 }
22289
22290 if (GET_MODE (part[1][0]) == SImode)
22291 part[1][0] = part[1][1];
22292 }
22293 }
22294 emit_move_insn (part[0][1], part[1][1]);
22295 emit_move_insn (part[0][0], part[1][0]);
22296 return;
22297 }
22298
22299 /* Choose correct order to not overwrite the source before it is copied. */
22300 if ((REG_P (part[0][0])
22301 && REG_P (part[1][1])
22302 && (REGNO (part[0][0]) == REGNO (part[1][1])
22303 || (nparts == 3
22304 && REGNO (part[0][0]) == REGNO (part[1][2]))
22305 || (nparts == 4
22306 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22307 || (collisions > 0
22308 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22309 {
22310 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22311 {
22312 operands[2 + i] = part[0][j];
22313 operands[6 + i] = part[1][j];
22314 }
22315 }
22316 else
22317 {
22318 for (i = 0; i < nparts; i++)
22319 {
22320 operands[2 + i] = part[0][i];
22321 operands[6 + i] = part[1][i];
22322 }
22323 }
22324
22325 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22326 if (optimize_insn_for_size_p ())
22327 {
22328 for (j = 0; j < nparts - 1; j++)
22329 if (CONST_INT_P (operands[6 + j])
22330 && operands[6 + j] != const0_rtx
22331 && REG_P (operands[2 + j]))
22332 for (i = j; i < nparts - 1; i++)
22333 if (CONST_INT_P (operands[7 + i])
22334 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22335 operands[7 + i] = operands[2 + j];
22336 }
22337
22338 for (i = 0; i < nparts; i++)
22339 emit_move_insn (operands[2 + i], operands[6 + i]);
22340
22341 return;
22342 }
22343
22344 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22345 left shift by a constant, either using a single shift or
22346 a sequence of add instructions. */
22347
22348 static void
22349 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22350 {
22351 rtx (*insn)(rtx, rtx, rtx);
22352
22353 if (count == 1
22354 || (count * ix86_cost->add <= ix86_cost->shift_const
22355 && !optimize_insn_for_size_p ()))
22356 {
22357 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22358 while (count-- > 0)
22359 emit_insn (insn (operand, operand, operand));
22360 }
22361 else
22362 {
22363 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22364 emit_insn (insn (operand, operand, GEN_INT (count)));
22365 }
22366 }
22367
22368 void
22369 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22370 {
22371 rtx (*gen_ashl3)(rtx, rtx, rtx);
22372 rtx (*gen_shld)(rtx, rtx, rtx);
22373 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22374
22375 rtx low[2], high[2];
22376 int count;
22377
22378 if (CONST_INT_P (operands[2]))
22379 {
22380 split_double_mode (mode, operands, 2, low, high);
22381 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22382
22383 if (count >= half_width)
22384 {
22385 emit_move_insn (high[0], low[1]);
22386 emit_move_insn (low[0], const0_rtx);
22387
22388 if (count > half_width)
22389 ix86_expand_ashl_const (high[0], count - half_width, mode);
22390 }
22391 else
22392 {
22393 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22394
22395 if (!rtx_equal_p (operands[0], operands[1]))
22396 emit_move_insn (operands[0], operands[1]);
22397
22398 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22399 ix86_expand_ashl_const (low[0], count, mode);
22400 }
22401 return;
22402 }
22403
22404 split_double_mode (mode, operands, 1, low, high);
22405
22406 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22407
22408 if (operands[1] == const1_rtx)
22409 {
22410 /* Assuming we've chosen a QImode capable registers, then 1 << N
22411 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22412 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22413 {
22414 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22415
22416 ix86_expand_clear (low[0]);
22417 ix86_expand_clear (high[0]);
22418 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22419
22420 d = gen_lowpart (QImode, low[0]);
22421 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22422 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22423 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22424
22425 d = gen_lowpart (QImode, high[0]);
22426 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22427 s = gen_rtx_NE (QImode, flags, const0_rtx);
22428 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22429 }
22430
22431 /* Otherwise, we can get the same results by manually performing
22432 a bit extract operation on bit 5/6, and then performing the two
22433 shifts. The two methods of getting 0/1 into low/high are exactly
22434 the same size. Avoiding the shift in the bit extract case helps
22435 pentium4 a bit; no one else seems to care much either way. */
22436 else
22437 {
22438 enum machine_mode half_mode;
22439 rtx (*gen_lshr3)(rtx, rtx, rtx);
22440 rtx (*gen_and3)(rtx, rtx, rtx);
22441 rtx (*gen_xor3)(rtx, rtx, rtx);
22442 HOST_WIDE_INT bits;
22443 rtx x;
22444
22445 if (mode == DImode)
22446 {
22447 half_mode = SImode;
22448 gen_lshr3 = gen_lshrsi3;
22449 gen_and3 = gen_andsi3;
22450 gen_xor3 = gen_xorsi3;
22451 bits = 5;
22452 }
22453 else
22454 {
22455 half_mode = DImode;
22456 gen_lshr3 = gen_lshrdi3;
22457 gen_and3 = gen_anddi3;
22458 gen_xor3 = gen_xordi3;
22459 bits = 6;
22460 }
22461
22462 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22463 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22464 else
22465 x = gen_lowpart (half_mode, operands[2]);
22466 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22467
22468 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22469 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22470 emit_move_insn (low[0], high[0]);
22471 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22472 }
22473
22474 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22475 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22476 return;
22477 }
22478
22479 if (operands[1] == constm1_rtx)
22480 {
22481 /* For -1 << N, we can avoid the shld instruction, because we
22482 know that we're shifting 0...31/63 ones into a -1. */
22483 emit_move_insn (low[0], constm1_rtx);
22484 if (optimize_insn_for_size_p ())
22485 emit_move_insn (high[0], low[0]);
22486 else
22487 emit_move_insn (high[0], constm1_rtx);
22488 }
22489 else
22490 {
22491 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22492
22493 if (!rtx_equal_p (operands[0], operands[1]))
22494 emit_move_insn (operands[0], operands[1]);
22495
22496 split_double_mode (mode, operands, 1, low, high);
22497 emit_insn (gen_shld (high[0], low[0], operands[2]));
22498 }
22499
22500 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22501
22502 if (TARGET_CMOVE && scratch)
22503 {
22504 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22505 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22506
22507 ix86_expand_clear (scratch);
22508 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22509 }
22510 else
22511 {
22512 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22513 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22514
22515 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22516 }
22517 }
22518
22519 void
22520 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22521 {
22522 rtx (*gen_ashr3)(rtx, rtx, rtx)
22523 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22524 rtx (*gen_shrd)(rtx, rtx, rtx);
22525 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22526
22527 rtx low[2], high[2];
22528 int count;
22529
22530 if (CONST_INT_P (operands[2]))
22531 {
22532 split_double_mode (mode, operands, 2, low, high);
22533 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22534
22535 if (count == GET_MODE_BITSIZE (mode) - 1)
22536 {
22537 emit_move_insn (high[0], high[1]);
22538 emit_insn (gen_ashr3 (high[0], high[0],
22539 GEN_INT (half_width - 1)));
22540 emit_move_insn (low[0], high[0]);
22541
22542 }
22543 else if (count >= half_width)
22544 {
22545 emit_move_insn (low[0], high[1]);
22546 emit_move_insn (high[0], low[0]);
22547 emit_insn (gen_ashr3 (high[0], high[0],
22548 GEN_INT (half_width - 1)));
22549
22550 if (count > half_width)
22551 emit_insn (gen_ashr3 (low[0], low[0],
22552 GEN_INT (count - half_width)));
22553 }
22554 else
22555 {
22556 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22557
22558 if (!rtx_equal_p (operands[0], operands[1]))
22559 emit_move_insn (operands[0], operands[1]);
22560
22561 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22562 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22563 }
22564 }
22565 else
22566 {
22567 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22568
22569 if (!rtx_equal_p (operands[0], operands[1]))
22570 emit_move_insn (operands[0], operands[1]);
22571
22572 split_double_mode (mode, operands, 1, low, high);
22573
22574 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22575 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22576
22577 if (TARGET_CMOVE && scratch)
22578 {
22579 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22580 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22581
22582 emit_move_insn (scratch, high[0]);
22583 emit_insn (gen_ashr3 (scratch, scratch,
22584 GEN_INT (half_width - 1)));
22585 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22586 scratch));
22587 }
22588 else
22589 {
22590 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22591 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22592
22593 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22594 }
22595 }
22596 }
22597
22598 void
22599 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22600 {
22601 rtx (*gen_lshr3)(rtx, rtx, rtx)
22602 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22603 rtx (*gen_shrd)(rtx, rtx, rtx);
22604 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22605
22606 rtx low[2], high[2];
22607 int count;
22608
22609 if (CONST_INT_P (operands[2]))
22610 {
22611 split_double_mode (mode, operands, 2, low, high);
22612 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22613
22614 if (count >= half_width)
22615 {
22616 emit_move_insn (low[0], high[1]);
22617 ix86_expand_clear (high[0]);
22618
22619 if (count > half_width)
22620 emit_insn (gen_lshr3 (low[0], low[0],
22621 GEN_INT (count - half_width)));
22622 }
22623 else
22624 {
22625 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22626
22627 if (!rtx_equal_p (operands[0], operands[1]))
22628 emit_move_insn (operands[0], operands[1]);
22629
22630 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22631 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22632 }
22633 }
22634 else
22635 {
22636 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22637
22638 if (!rtx_equal_p (operands[0], operands[1]))
22639 emit_move_insn (operands[0], operands[1]);
22640
22641 split_double_mode (mode, operands, 1, low, high);
22642
22643 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22644 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22645
22646 if (TARGET_CMOVE && scratch)
22647 {
22648 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22649 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22650
22651 ix86_expand_clear (scratch);
22652 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22653 scratch));
22654 }
22655 else
22656 {
22657 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22658 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22659
22660 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22661 }
22662 }
22663 }
22664
22665 /* Predict just emitted jump instruction to be taken with probability PROB. */
22666 static void
22667 predict_jump (int prob)
22668 {
22669 rtx insn = get_last_insn ();
22670 gcc_assert (JUMP_P (insn));
22671 add_int_reg_note (insn, REG_BR_PROB, prob);
22672 }
22673
22674 /* Helper function for the string operations below. Dest VARIABLE whether
22675 it is aligned to VALUE bytes. If true, jump to the label. */
22676 static rtx
22677 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22678 {
22679 rtx label = gen_label_rtx ();
22680 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22681 if (GET_MODE (variable) == DImode)
22682 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22683 else
22684 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22685 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22686 1, label);
22687 if (epilogue)
22688 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22689 else
22690 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22691 return label;
22692 }
22693
22694 /* Adjust COUNTER by the VALUE. */
22695 static void
22696 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22697 {
22698 rtx (*gen_add)(rtx, rtx, rtx)
22699 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22700
22701 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22702 }
22703
22704 /* Zero extend possibly SImode EXP to Pmode register. */
22705 rtx
22706 ix86_zero_extend_to_Pmode (rtx exp)
22707 {
22708 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22709 }
22710
22711 /* Divide COUNTREG by SCALE. */
22712 static rtx
22713 scale_counter (rtx countreg, int scale)
22714 {
22715 rtx sc;
22716
22717 if (scale == 1)
22718 return countreg;
22719 if (CONST_INT_P (countreg))
22720 return GEN_INT (INTVAL (countreg) / scale);
22721 gcc_assert (REG_P (countreg));
22722
22723 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22724 GEN_INT (exact_log2 (scale)),
22725 NULL, 1, OPTAB_DIRECT);
22726 return sc;
22727 }
22728
22729 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22730 DImode for constant loop counts. */
22731
22732 static enum machine_mode
22733 counter_mode (rtx count_exp)
22734 {
22735 if (GET_MODE (count_exp) != VOIDmode)
22736 return GET_MODE (count_exp);
22737 if (!CONST_INT_P (count_exp))
22738 return Pmode;
22739 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22740 return DImode;
22741 return SImode;
22742 }
22743
22744 /* Copy the address to a Pmode register. This is used for x32 to
22745 truncate DImode TLS address to a SImode register. */
22746
22747 static rtx
22748 ix86_copy_addr_to_reg (rtx addr)
22749 {
22750 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22751 return copy_addr_to_reg (addr);
22752 else
22753 {
22754 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22755 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22756 }
22757 }
22758
22759 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22760 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22761 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22762 memory by VALUE (supposed to be in MODE).
22763
22764 The size is rounded down to whole number of chunk size moved at once.
22765 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22766
22767
22768 static void
22769 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22770 rtx destptr, rtx srcptr, rtx value,
22771 rtx count, enum machine_mode mode, int unroll,
22772 int expected_size, bool issetmem)
22773 {
22774 rtx out_label, top_label, iter, tmp;
22775 enum machine_mode iter_mode = counter_mode (count);
22776 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22777 rtx piece_size = GEN_INT (piece_size_n);
22778 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22779 rtx size;
22780 int i;
22781
22782 top_label = gen_label_rtx ();
22783 out_label = gen_label_rtx ();
22784 iter = gen_reg_rtx (iter_mode);
22785
22786 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22787 NULL, 1, OPTAB_DIRECT);
22788 /* Those two should combine. */
22789 if (piece_size == const1_rtx)
22790 {
22791 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22792 true, out_label);
22793 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22794 }
22795 emit_move_insn (iter, const0_rtx);
22796
22797 emit_label (top_label);
22798
22799 tmp = convert_modes (Pmode, iter_mode, iter, true);
22800
22801 /* This assert could be relaxed - in this case we'll need to compute
22802 smallest power of two, containing in PIECE_SIZE_N and pass it to
22803 offset_address. */
22804 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22805 destmem = offset_address (destmem, tmp, piece_size_n);
22806 destmem = adjust_address (destmem, mode, 0);
22807
22808 if (!issetmem)
22809 {
22810 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22811 srcmem = adjust_address (srcmem, mode, 0);
22812
22813 /* When unrolling for chips that reorder memory reads and writes,
22814 we can save registers by using single temporary.
22815 Also using 4 temporaries is overkill in 32bit mode. */
22816 if (!TARGET_64BIT && 0)
22817 {
22818 for (i = 0; i < unroll; i++)
22819 {
22820 if (i)
22821 {
22822 destmem =
22823 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22824 srcmem =
22825 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22826 }
22827 emit_move_insn (destmem, srcmem);
22828 }
22829 }
22830 else
22831 {
22832 rtx tmpreg[4];
22833 gcc_assert (unroll <= 4);
22834 for (i = 0; i < unroll; i++)
22835 {
22836 tmpreg[i] = gen_reg_rtx (mode);
22837 if (i)
22838 {
22839 srcmem =
22840 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22841 }
22842 emit_move_insn (tmpreg[i], srcmem);
22843 }
22844 for (i = 0; i < unroll; i++)
22845 {
22846 if (i)
22847 {
22848 destmem =
22849 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22850 }
22851 emit_move_insn (destmem, tmpreg[i]);
22852 }
22853 }
22854 }
22855 else
22856 for (i = 0; i < unroll; i++)
22857 {
22858 if (i)
22859 destmem =
22860 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22861 emit_move_insn (destmem, value);
22862 }
22863
22864 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22865 true, OPTAB_LIB_WIDEN);
22866 if (tmp != iter)
22867 emit_move_insn (iter, tmp);
22868
22869 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22870 true, top_label);
22871 if (expected_size != -1)
22872 {
22873 expected_size /= GET_MODE_SIZE (mode) * unroll;
22874 if (expected_size == 0)
22875 predict_jump (0);
22876 else if (expected_size > REG_BR_PROB_BASE)
22877 predict_jump (REG_BR_PROB_BASE - 1);
22878 else
22879 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22880 }
22881 else
22882 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22883 iter = ix86_zero_extend_to_Pmode (iter);
22884 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22885 true, OPTAB_LIB_WIDEN);
22886 if (tmp != destptr)
22887 emit_move_insn (destptr, tmp);
22888 if (!issetmem)
22889 {
22890 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22891 true, OPTAB_LIB_WIDEN);
22892 if (tmp != srcptr)
22893 emit_move_insn (srcptr, tmp);
22894 }
22895 emit_label (out_label);
22896 }
22897
22898 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22899 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22900 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22901 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22902 ORIG_VALUE is the original value passed to memset to fill the memory with.
22903 Other arguments have same meaning as for previous function. */
22904
22905 static void
22906 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22907 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22908 rtx count,
22909 enum machine_mode mode, bool issetmem)
22910 {
22911 rtx destexp;
22912 rtx srcexp;
22913 rtx countreg;
22914 HOST_WIDE_INT rounded_count;
22915
22916 /* If possible, it is shorter to use rep movs.
22917 TODO: Maybe it is better to move this logic to decide_alg. */
22918 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22919 && (!issetmem || orig_value == const0_rtx))
22920 mode = SImode;
22921
22922 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22923 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22924
22925 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22926 GET_MODE_SIZE (mode)));
22927 if (mode != QImode)
22928 {
22929 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22930 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22931 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22932 }
22933 else
22934 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22935 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22936 {
22937 rounded_count = (INTVAL (count)
22938 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22939 destmem = shallow_copy_rtx (destmem);
22940 set_mem_size (destmem, rounded_count);
22941 }
22942 else if (MEM_SIZE_KNOWN_P (destmem))
22943 clear_mem_size (destmem);
22944
22945 if (issetmem)
22946 {
22947 value = force_reg (mode, gen_lowpart (mode, value));
22948 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22949 }
22950 else
22951 {
22952 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22953 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22954 if (mode != QImode)
22955 {
22956 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
22957 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22958 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
22959 }
22960 else
22961 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
22962 if (CONST_INT_P (count))
22963 {
22964 rounded_count = (INTVAL (count)
22965 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22966 srcmem = shallow_copy_rtx (srcmem);
22967 set_mem_size (srcmem, rounded_count);
22968 }
22969 else
22970 {
22971 if (MEM_SIZE_KNOWN_P (srcmem))
22972 clear_mem_size (srcmem);
22973 }
22974 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
22975 destexp, srcexp));
22976 }
22977 }
22978
22979 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
22980 DESTMEM.
22981 SRC is passed by pointer to be updated on return.
22982 Return value is updated DST. */
22983 static rtx
22984 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
22985 HOST_WIDE_INT size_to_move)
22986 {
22987 rtx dst = destmem, src = *srcmem, adjust, tempreg;
22988 enum insn_code code;
22989 enum machine_mode move_mode;
22990 int piece_size, i;
22991
22992 /* Find the widest mode in which we could perform moves.
22993 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22994 it until move of such size is supported. */
22995 piece_size = 1 << floor_log2 (size_to_move);
22996 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
22997 code = optab_handler (mov_optab, move_mode);
22998 while (code == CODE_FOR_nothing && piece_size > 1)
22999 {
23000 piece_size >>= 1;
23001 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23002 code = optab_handler (mov_optab, move_mode);
23003 }
23004
23005 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23006 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23007 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23008 {
23009 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23010 move_mode = mode_for_vector (word_mode, nunits);
23011 code = optab_handler (mov_optab, move_mode);
23012 if (code == CODE_FOR_nothing)
23013 {
23014 move_mode = word_mode;
23015 piece_size = GET_MODE_SIZE (move_mode);
23016 code = optab_handler (mov_optab, move_mode);
23017 }
23018 }
23019 gcc_assert (code != CODE_FOR_nothing);
23020
23021 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23022 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23023
23024 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23025 gcc_assert (size_to_move % piece_size == 0);
23026 adjust = GEN_INT (piece_size);
23027 for (i = 0; i < size_to_move; i += piece_size)
23028 {
23029 /* We move from memory to memory, so we'll need to do it via
23030 a temporary register. */
23031 tempreg = gen_reg_rtx (move_mode);
23032 emit_insn (GEN_FCN (code) (tempreg, src));
23033 emit_insn (GEN_FCN (code) (dst, tempreg));
23034
23035 emit_move_insn (destptr,
23036 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23037 emit_move_insn (srcptr,
23038 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23039
23040 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23041 piece_size);
23042 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23043 piece_size);
23044 }
23045
23046 /* Update DST and SRC rtx. */
23047 *srcmem = src;
23048 return dst;
23049 }
23050
23051 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23052 static void
23053 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23054 rtx destptr, rtx srcptr, rtx count, int max_size)
23055 {
23056 rtx src, dest;
23057 if (CONST_INT_P (count))
23058 {
23059 HOST_WIDE_INT countval = INTVAL (count);
23060 HOST_WIDE_INT epilogue_size = countval % max_size;
23061 int i;
23062
23063 /* For now MAX_SIZE should be a power of 2. This assert could be
23064 relaxed, but it'll require a bit more complicated epilogue
23065 expanding. */
23066 gcc_assert ((max_size & (max_size - 1)) == 0);
23067 for (i = max_size; i >= 1; i >>= 1)
23068 {
23069 if (epilogue_size & i)
23070 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23071 }
23072 return;
23073 }
23074 if (max_size > 8)
23075 {
23076 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23077 count, 1, OPTAB_DIRECT);
23078 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23079 count, QImode, 1, 4, false);
23080 return;
23081 }
23082
23083 /* When there are stringops, we can cheaply increase dest and src pointers.
23084 Otherwise we save code size by maintaining offset (zero is readily
23085 available from preceding rep operation) and using x86 addressing modes.
23086 */
23087 if (TARGET_SINGLE_STRINGOP)
23088 {
23089 if (max_size > 4)
23090 {
23091 rtx label = ix86_expand_aligntest (count, 4, true);
23092 src = change_address (srcmem, SImode, srcptr);
23093 dest = change_address (destmem, SImode, destptr);
23094 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23095 emit_label (label);
23096 LABEL_NUSES (label) = 1;
23097 }
23098 if (max_size > 2)
23099 {
23100 rtx label = ix86_expand_aligntest (count, 2, true);
23101 src = change_address (srcmem, HImode, srcptr);
23102 dest = change_address (destmem, HImode, destptr);
23103 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23104 emit_label (label);
23105 LABEL_NUSES (label) = 1;
23106 }
23107 if (max_size > 1)
23108 {
23109 rtx label = ix86_expand_aligntest (count, 1, true);
23110 src = change_address (srcmem, QImode, srcptr);
23111 dest = change_address (destmem, QImode, destptr);
23112 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23113 emit_label (label);
23114 LABEL_NUSES (label) = 1;
23115 }
23116 }
23117 else
23118 {
23119 rtx offset = force_reg (Pmode, const0_rtx);
23120 rtx tmp;
23121
23122 if (max_size > 4)
23123 {
23124 rtx label = ix86_expand_aligntest (count, 4, true);
23125 src = change_address (srcmem, SImode, srcptr);
23126 dest = change_address (destmem, SImode, destptr);
23127 emit_move_insn (dest, src);
23128 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23129 true, OPTAB_LIB_WIDEN);
23130 if (tmp != offset)
23131 emit_move_insn (offset, tmp);
23132 emit_label (label);
23133 LABEL_NUSES (label) = 1;
23134 }
23135 if (max_size > 2)
23136 {
23137 rtx label = ix86_expand_aligntest (count, 2, true);
23138 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23139 src = change_address (srcmem, HImode, tmp);
23140 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23141 dest = change_address (destmem, HImode, tmp);
23142 emit_move_insn (dest, src);
23143 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23144 true, OPTAB_LIB_WIDEN);
23145 if (tmp != offset)
23146 emit_move_insn (offset, tmp);
23147 emit_label (label);
23148 LABEL_NUSES (label) = 1;
23149 }
23150 if (max_size > 1)
23151 {
23152 rtx label = ix86_expand_aligntest (count, 1, true);
23153 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23154 src = change_address (srcmem, QImode, tmp);
23155 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23156 dest = change_address (destmem, QImode, tmp);
23157 emit_move_insn (dest, src);
23158 emit_label (label);
23159 LABEL_NUSES (label) = 1;
23160 }
23161 }
23162 }
23163
23164 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23165 with value PROMOTED_VAL.
23166 SRC is passed by pointer to be updated on return.
23167 Return value is updated DST. */
23168 static rtx
23169 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23170 HOST_WIDE_INT size_to_move)
23171 {
23172 rtx dst = destmem, adjust;
23173 enum insn_code code;
23174 enum machine_mode move_mode;
23175 int piece_size, i;
23176
23177 /* Find the widest mode in which we could perform moves.
23178 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23179 it until move of such size is supported. */
23180 move_mode = GET_MODE (promoted_val);
23181 if (move_mode == VOIDmode)
23182 move_mode = QImode;
23183 if (size_to_move < GET_MODE_SIZE (move_mode))
23184 {
23185 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23186 promoted_val = gen_lowpart (move_mode, promoted_val);
23187 }
23188 piece_size = GET_MODE_SIZE (move_mode);
23189 code = optab_handler (mov_optab, move_mode);
23190 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23191
23192 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23193
23194 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23195 gcc_assert (size_to_move % piece_size == 0);
23196 adjust = GEN_INT (piece_size);
23197 for (i = 0; i < size_to_move; i += piece_size)
23198 {
23199 if (piece_size <= GET_MODE_SIZE (word_mode))
23200 {
23201 emit_insn (gen_strset (destptr, dst, promoted_val));
23202 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23203 piece_size);
23204 continue;
23205 }
23206
23207 emit_insn (GEN_FCN (code) (dst, promoted_val));
23208
23209 emit_move_insn (destptr,
23210 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23211
23212 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23213 piece_size);
23214 }
23215
23216 /* Update DST rtx. */
23217 return dst;
23218 }
23219 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23220 static void
23221 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23222 rtx count, int max_size)
23223 {
23224 count =
23225 expand_simple_binop (counter_mode (count), AND, count,
23226 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23227 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23228 gen_lowpart (QImode, value), count, QImode,
23229 1, max_size / 2, true);
23230 }
23231
23232 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23233 static void
23234 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23235 rtx count, int max_size)
23236 {
23237 rtx dest;
23238
23239 if (CONST_INT_P (count))
23240 {
23241 HOST_WIDE_INT countval = INTVAL (count);
23242 HOST_WIDE_INT epilogue_size = countval % max_size;
23243 int i;
23244
23245 /* For now MAX_SIZE should be a power of 2. This assert could be
23246 relaxed, but it'll require a bit more complicated epilogue
23247 expanding. */
23248 gcc_assert ((max_size & (max_size - 1)) == 0);
23249 for (i = max_size; i >= 1; i >>= 1)
23250 {
23251 if (epilogue_size & i)
23252 {
23253 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23254 destmem = emit_memset (destmem, destptr, vec_value, i);
23255 else
23256 destmem = emit_memset (destmem, destptr, value, i);
23257 }
23258 }
23259 return;
23260 }
23261 if (max_size > 32)
23262 {
23263 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23264 return;
23265 }
23266 if (max_size > 16)
23267 {
23268 rtx label = ix86_expand_aligntest (count, 16, true);
23269 if (TARGET_64BIT)
23270 {
23271 dest = change_address (destmem, DImode, destptr);
23272 emit_insn (gen_strset (destptr, dest, value));
23273 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23274 emit_insn (gen_strset (destptr, dest, value));
23275 }
23276 else
23277 {
23278 dest = change_address (destmem, SImode, destptr);
23279 emit_insn (gen_strset (destptr, dest, value));
23280 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23281 emit_insn (gen_strset (destptr, dest, value));
23282 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23283 emit_insn (gen_strset (destptr, dest, value));
23284 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23285 emit_insn (gen_strset (destptr, dest, value));
23286 }
23287 emit_label (label);
23288 LABEL_NUSES (label) = 1;
23289 }
23290 if (max_size > 8)
23291 {
23292 rtx label = ix86_expand_aligntest (count, 8, true);
23293 if (TARGET_64BIT)
23294 {
23295 dest = change_address (destmem, DImode, destptr);
23296 emit_insn (gen_strset (destptr, dest, value));
23297 }
23298 else
23299 {
23300 dest = change_address (destmem, SImode, destptr);
23301 emit_insn (gen_strset (destptr, dest, value));
23302 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23303 emit_insn (gen_strset (destptr, dest, value));
23304 }
23305 emit_label (label);
23306 LABEL_NUSES (label) = 1;
23307 }
23308 if (max_size > 4)
23309 {
23310 rtx label = ix86_expand_aligntest (count, 4, true);
23311 dest = change_address (destmem, SImode, destptr);
23312 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23313 emit_label (label);
23314 LABEL_NUSES (label) = 1;
23315 }
23316 if (max_size > 2)
23317 {
23318 rtx label = ix86_expand_aligntest (count, 2, true);
23319 dest = change_address (destmem, HImode, destptr);
23320 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23321 emit_label (label);
23322 LABEL_NUSES (label) = 1;
23323 }
23324 if (max_size > 1)
23325 {
23326 rtx label = ix86_expand_aligntest (count, 1, true);
23327 dest = change_address (destmem, QImode, destptr);
23328 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23329 emit_label (label);
23330 LABEL_NUSES (label) = 1;
23331 }
23332 }
23333
23334 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23335 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23336 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23337 ignored.
23338 Return value is updated DESTMEM. */
23339 static rtx
23340 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23341 rtx destptr, rtx srcptr, rtx value,
23342 rtx vec_value, rtx count, int align,
23343 int desired_alignment, bool issetmem)
23344 {
23345 int i;
23346 for (i = 1; i < desired_alignment; i <<= 1)
23347 {
23348 if (align <= i)
23349 {
23350 rtx label = ix86_expand_aligntest (destptr, i, false);
23351 if (issetmem)
23352 {
23353 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23354 destmem = emit_memset (destmem, destptr, vec_value, i);
23355 else
23356 destmem = emit_memset (destmem, destptr, value, i);
23357 }
23358 else
23359 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23360 ix86_adjust_counter (count, i);
23361 emit_label (label);
23362 LABEL_NUSES (label) = 1;
23363 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23364 }
23365 }
23366 return destmem;
23367 }
23368
23369 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23370 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23371 and jump to DONE_LABEL. */
23372 static void
23373 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23374 rtx destptr, rtx srcptr,
23375 rtx value, rtx vec_value,
23376 rtx count, int size,
23377 rtx done_label, bool issetmem)
23378 {
23379 rtx label = ix86_expand_aligntest (count, size, false);
23380 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23381 rtx modesize;
23382 int n;
23383
23384 /* If we do not have vector value to copy, we must reduce size. */
23385 if (issetmem)
23386 {
23387 if (!vec_value)
23388 {
23389 if (GET_MODE (value) == VOIDmode && size > 8)
23390 mode = Pmode;
23391 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23392 mode = GET_MODE (value);
23393 }
23394 else
23395 mode = GET_MODE (vec_value), value = vec_value;
23396 }
23397 else
23398 {
23399 /* Choose appropriate vector mode. */
23400 if (size >= 32)
23401 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23402 else if (size >= 16)
23403 mode = TARGET_SSE ? V16QImode : DImode;
23404 srcmem = change_address (srcmem, mode, srcptr);
23405 }
23406 destmem = change_address (destmem, mode, destptr);
23407 modesize = GEN_INT (GET_MODE_SIZE (mode));
23408 gcc_assert (GET_MODE_SIZE (mode) <= size);
23409 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23410 {
23411 if (issetmem)
23412 emit_move_insn (destmem, gen_lowpart (mode, value));
23413 else
23414 {
23415 emit_move_insn (destmem, srcmem);
23416 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23417 }
23418 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23419 }
23420
23421 destmem = offset_address (destmem, count, 1);
23422 destmem = offset_address (destmem, GEN_INT (-2 * size),
23423 GET_MODE_SIZE (mode));
23424 if (!issetmem)
23425 {
23426 srcmem = offset_address (srcmem, count, 1);
23427 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23428 GET_MODE_SIZE (mode));
23429 }
23430 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23431 {
23432 if (issetmem)
23433 emit_move_insn (destmem, gen_lowpart (mode, value));
23434 else
23435 {
23436 emit_move_insn (destmem, srcmem);
23437 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23438 }
23439 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23440 }
23441 emit_jump_insn (gen_jump (done_label));
23442 emit_barrier ();
23443
23444 emit_label (label);
23445 LABEL_NUSES (label) = 1;
23446 }
23447
23448 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23449 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23450 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23451 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23452 DONE_LABEL is a label after the whole copying sequence. The label is created
23453 on demand if *DONE_LABEL is NULL.
23454 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23455 bounds after the initial copies.
23456
23457 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23458 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23459 we will dispatch to a library call for large blocks.
23460
23461 In pseudocode we do:
23462
23463 if (COUNT < SIZE)
23464 {
23465 Assume that SIZE is 4. Bigger sizes are handled analogously
23466 if (COUNT & 4)
23467 {
23468 copy 4 bytes from SRCPTR to DESTPTR
23469 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23470 goto done_label
23471 }
23472 if (!COUNT)
23473 goto done_label;
23474 copy 1 byte from SRCPTR to DESTPTR
23475 if (COUNT & 2)
23476 {
23477 copy 2 bytes from SRCPTR to DESTPTR
23478 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23479 }
23480 }
23481 else
23482 {
23483 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23484 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23485
23486 OLD_DESPTR = DESTPTR;
23487 Align DESTPTR up to DESIRED_ALIGN
23488 SRCPTR += DESTPTR - OLD_DESTPTR
23489 COUNT -= DEST_PTR - OLD_DESTPTR
23490 if (DYNAMIC_CHECK)
23491 Round COUNT down to multiple of SIZE
23492 << optional caller supplied zero size guard is here >>
23493 << optional caller suppplied dynamic check is here >>
23494 << caller supplied main copy loop is here >>
23495 }
23496 done_label:
23497 */
23498 static void
23499 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23500 rtx *destptr, rtx *srcptr,
23501 enum machine_mode mode,
23502 rtx value, rtx vec_value,
23503 rtx *count,
23504 rtx *done_label,
23505 int size,
23506 int desired_align,
23507 int align,
23508 unsigned HOST_WIDE_INT *min_size,
23509 bool dynamic_check,
23510 bool issetmem)
23511 {
23512 rtx loop_label = NULL, label;
23513 int n;
23514 rtx modesize;
23515 int prolog_size = 0;
23516 rtx mode_value;
23517
23518 /* Chose proper value to copy. */
23519 if (issetmem && VECTOR_MODE_P (mode))
23520 mode_value = vec_value;
23521 else
23522 mode_value = value;
23523 gcc_assert (GET_MODE_SIZE (mode) <= size);
23524
23525 /* See if block is big or small, handle small blocks. */
23526 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23527 {
23528 int size2 = size;
23529 loop_label = gen_label_rtx ();
23530
23531 if (!*done_label)
23532 *done_label = gen_label_rtx ();
23533
23534 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23535 1, loop_label);
23536 size2 >>= 1;
23537
23538 /* Handle sizes > 3. */
23539 for (;size2 > 2; size2 >>= 1)
23540 expand_small_movmem_or_setmem (destmem, srcmem,
23541 *destptr, *srcptr,
23542 value, vec_value,
23543 *count,
23544 size2, *done_label, issetmem);
23545 /* Nothing to copy? Jump to DONE_LABEL if so */
23546 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23547 1, *done_label);
23548
23549 /* Do a byte copy. */
23550 destmem = change_address (destmem, QImode, *destptr);
23551 if (issetmem)
23552 emit_move_insn (destmem, gen_lowpart (QImode, value));
23553 else
23554 {
23555 srcmem = change_address (srcmem, QImode, *srcptr);
23556 emit_move_insn (destmem, srcmem);
23557 }
23558
23559 /* Handle sizes 2 and 3. */
23560 label = ix86_expand_aligntest (*count, 2, false);
23561 destmem = change_address (destmem, HImode, *destptr);
23562 destmem = offset_address (destmem, *count, 1);
23563 destmem = offset_address (destmem, GEN_INT (-2), 2);
23564 if (issetmem)
23565 emit_move_insn (destmem, gen_lowpart (HImode, value));
23566 else
23567 {
23568 srcmem = change_address (srcmem, HImode, *srcptr);
23569 srcmem = offset_address (srcmem, *count, 1);
23570 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23571 emit_move_insn (destmem, srcmem);
23572 }
23573
23574 emit_label (label);
23575 LABEL_NUSES (label) = 1;
23576 emit_jump_insn (gen_jump (*done_label));
23577 emit_barrier ();
23578 }
23579 else
23580 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23581 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23582
23583 /* Start memcpy for COUNT >= SIZE. */
23584 if (loop_label)
23585 {
23586 emit_label (loop_label);
23587 LABEL_NUSES (loop_label) = 1;
23588 }
23589
23590 /* Copy first desired_align bytes. */
23591 if (!issetmem)
23592 srcmem = change_address (srcmem, mode, *srcptr);
23593 destmem = change_address (destmem, mode, *destptr);
23594 modesize = GEN_INT (GET_MODE_SIZE (mode));
23595 for (n = 0; prolog_size < desired_align - align; n++)
23596 {
23597 if (issetmem)
23598 emit_move_insn (destmem, mode_value);
23599 else
23600 {
23601 emit_move_insn (destmem, srcmem);
23602 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23603 }
23604 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23605 prolog_size += GET_MODE_SIZE (mode);
23606 }
23607
23608
23609 /* Copy last SIZE bytes. */
23610 destmem = offset_address (destmem, *count, 1);
23611 destmem = offset_address (destmem,
23612 GEN_INT (-size - prolog_size),
23613 1);
23614 if (issetmem)
23615 emit_move_insn (destmem, mode_value);
23616 else
23617 {
23618 srcmem = offset_address (srcmem, *count, 1);
23619 srcmem = offset_address (srcmem,
23620 GEN_INT (-size - prolog_size),
23621 1);
23622 emit_move_insn (destmem, srcmem);
23623 }
23624 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23625 {
23626 destmem = offset_address (destmem, modesize, 1);
23627 if (issetmem)
23628 emit_move_insn (destmem, mode_value);
23629 else
23630 {
23631 srcmem = offset_address (srcmem, modesize, 1);
23632 emit_move_insn (destmem, srcmem);
23633 }
23634 }
23635
23636 /* Align destination. */
23637 if (desired_align > 1 && desired_align > align)
23638 {
23639 rtx saveddest = *destptr;
23640
23641 gcc_assert (desired_align <= size);
23642 /* Align destptr up, place it to new register. */
23643 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23644 GEN_INT (prolog_size),
23645 NULL_RTX, 1, OPTAB_DIRECT);
23646 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23647 GEN_INT (-desired_align),
23648 *destptr, 1, OPTAB_DIRECT);
23649 /* See how many bytes we skipped. */
23650 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23651 *destptr,
23652 saveddest, 1, OPTAB_DIRECT);
23653 /* Adjust srcptr and count. */
23654 if (!issetmem)
23655 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23656 *srcptr, 1, OPTAB_DIRECT);
23657 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23658 saveddest, *count, 1, OPTAB_DIRECT);
23659 /* We copied at most size + prolog_size. */
23660 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23661 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23662 else
23663 *min_size = 0;
23664
23665 /* Our loops always round down the bock size, but for dispatch to library
23666 we need precise value. */
23667 if (dynamic_check)
23668 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23669 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23670 }
23671 else
23672 {
23673 gcc_assert (prolog_size == 0);
23674 /* Decrease count, so we won't end up copying last word twice. */
23675 if (!CONST_INT_P (*count))
23676 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23677 constm1_rtx, *count, 1, OPTAB_DIRECT);
23678 else
23679 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23680 if (*min_size)
23681 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23682 }
23683 }
23684
23685
23686 /* This function is like the previous one, except here we know how many bytes
23687 need to be copied. That allows us to update alignment not only of DST, which
23688 is returned, but also of SRC, which is passed as a pointer for that
23689 reason. */
23690 static rtx
23691 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23692 rtx srcreg, rtx value, rtx vec_value,
23693 int desired_align, int align_bytes,
23694 bool issetmem)
23695 {
23696 rtx src = NULL;
23697 rtx orig_dst = dst;
23698 rtx orig_src = NULL;
23699 int piece_size = 1;
23700 int copied_bytes = 0;
23701
23702 if (!issetmem)
23703 {
23704 gcc_assert (srcp != NULL);
23705 src = *srcp;
23706 orig_src = src;
23707 }
23708
23709 for (piece_size = 1;
23710 piece_size <= desired_align && copied_bytes < align_bytes;
23711 piece_size <<= 1)
23712 {
23713 if (align_bytes & piece_size)
23714 {
23715 if (issetmem)
23716 {
23717 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23718 dst = emit_memset (dst, destreg, vec_value, piece_size);
23719 else
23720 dst = emit_memset (dst, destreg, value, piece_size);
23721 }
23722 else
23723 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23724 copied_bytes += piece_size;
23725 }
23726 }
23727 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23728 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23729 if (MEM_SIZE_KNOWN_P (orig_dst))
23730 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23731
23732 if (!issetmem)
23733 {
23734 int src_align_bytes = get_mem_align_offset (src, desired_align
23735 * BITS_PER_UNIT);
23736 if (src_align_bytes >= 0)
23737 src_align_bytes = desired_align - src_align_bytes;
23738 if (src_align_bytes >= 0)
23739 {
23740 unsigned int src_align;
23741 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23742 {
23743 if ((src_align_bytes & (src_align - 1))
23744 == (align_bytes & (src_align - 1)))
23745 break;
23746 }
23747 if (src_align > (unsigned int) desired_align)
23748 src_align = desired_align;
23749 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23750 set_mem_align (src, src_align * BITS_PER_UNIT);
23751 }
23752 if (MEM_SIZE_KNOWN_P (orig_src))
23753 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23754 *srcp = src;
23755 }
23756
23757 return dst;
23758 }
23759
23760 /* Return true if ALG can be used in current context.
23761 Assume we expand memset if MEMSET is true. */
23762 static bool
23763 alg_usable_p (enum stringop_alg alg, bool memset)
23764 {
23765 if (alg == no_stringop)
23766 return false;
23767 if (alg == vector_loop)
23768 return TARGET_SSE || TARGET_AVX;
23769 /* Algorithms using the rep prefix want at least edi and ecx;
23770 additionally, memset wants eax and memcpy wants esi. Don't
23771 consider such algorithms if the user has appropriated those
23772 registers for their own purposes. */
23773 if (alg == rep_prefix_1_byte
23774 || alg == rep_prefix_4_byte
23775 || alg == rep_prefix_8_byte)
23776 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23777 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23778 return true;
23779 }
23780
23781 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23782 static enum stringop_alg
23783 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23784 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23785 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23786 {
23787 const struct stringop_algs * algs;
23788 bool optimize_for_speed;
23789 int max = -1;
23790 const struct processor_costs *cost;
23791 int i;
23792 bool any_alg_usable_p = false;
23793
23794 *noalign = false;
23795 *dynamic_check = -1;
23796
23797 /* Even if the string operation call is cold, we still might spend a lot
23798 of time processing large blocks. */
23799 if (optimize_function_for_size_p (cfun)
23800 || (optimize_insn_for_size_p ()
23801 && (max_size < 256
23802 || (expected_size != -1 && expected_size < 256))))
23803 optimize_for_speed = false;
23804 else
23805 optimize_for_speed = true;
23806
23807 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23808 if (memset)
23809 algs = &cost->memset[TARGET_64BIT != 0];
23810 else
23811 algs = &cost->memcpy[TARGET_64BIT != 0];
23812
23813 /* See maximal size for user defined algorithm. */
23814 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23815 {
23816 enum stringop_alg candidate = algs->size[i].alg;
23817 bool usable = alg_usable_p (candidate, memset);
23818 any_alg_usable_p |= usable;
23819
23820 if (candidate != libcall && candidate && usable)
23821 max = algs->size[i].max;
23822 }
23823
23824 /* If expected size is not known but max size is small enough
23825 so inline version is a win, set expected size into
23826 the range. */
23827 if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
23828 && expected_size == -1)
23829 expected_size = min_size / 2 + max_size / 2;
23830
23831 /* If user specified the algorithm, honnor it if possible. */
23832 if (ix86_stringop_alg != no_stringop
23833 && alg_usable_p (ix86_stringop_alg, memset))
23834 return ix86_stringop_alg;
23835 /* rep; movq or rep; movl is the smallest variant. */
23836 else if (!optimize_for_speed)
23837 {
23838 *noalign = true;
23839 if (!count || (count & 3) || (memset && !zero_memset))
23840 return alg_usable_p (rep_prefix_1_byte, memset)
23841 ? rep_prefix_1_byte : loop_1_byte;
23842 else
23843 return alg_usable_p (rep_prefix_4_byte, memset)
23844 ? rep_prefix_4_byte : loop;
23845 }
23846 /* Very tiny blocks are best handled via the loop, REP is expensive to
23847 setup. */
23848 else if (expected_size != -1 && expected_size < 4)
23849 return loop_1_byte;
23850 else if (expected_size != -1)
23851 {
23852 enum stringop_alg alg = libcall;
23853 bool alg_noalign = false;
23854 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23855 {
23856 /* We get here if the algorithms that were not libcall-based
23857 were rep-prefix based and we are unable to use rep prefixes
23858 based on global register usage. Break out of the loop and
23859 use the heuristic below. */
23860 if (algs->size[i].max == 0)
23861 break;
23862 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23863 {
23864 enum stringop_alg candidate = algs->size[i].alg;
23865
23866 if (candidate != libcall && alg_usable_p (candidate, memset))
23867 {
23868 alg = candidate;
23869 alg_noalign = algs->size[i].noalign;
23870 }
23871 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23872 last non-libcall inline algorithm. */
23873 if (TARGET_INLINE_ALL_STRINGOPS)
23874 {
23875 /* When the current size is best to be copied by a libcall,
23876 but we are still forced to inline, run the heuristic below
23877 that will pick code for medium sized blocks. */
23878 if (alg != libcall)
23879 {
23880 *noalign = alg_noalign;
23881 return alg;
23882 }
23883 break;
23884 }
23885 else if (alg_usable_p (candidate, memset))
23886 {
23887 *noalign = algs->size[i].noalign;
23888 return candidate;
23889 }
23890 }
23891 }
23892 }
23893 /* When asked to inline the call anyway, try to pick meaningful choice.
23894 We look for maximal size of block that is faster to copy by hand and
23895 take blocks of at most of that size guessing that average size will
23896 be roughly half of the block.
23897
23898 If this turns out to be bad, we might simply specify the preferred
23899 choice in ix86_costs. */
23900 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23901 && (algs->unknown_size == libcall
23902 || !alg_usable_p (algs->unknown_size, memset)))
23903 {
23904 enum stringop_alg alg;
23905
23906 /* If there aren't any usable algorithms, then recursing on
23907 smaller sizes isn't going to find anything. Just return the
23908 simple byte-at-a-time copy loop. */
23909 if (!any_alg_usable_p)
23910 {
23911 /* Pick something reasonable. */
23912 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23913 *dynamic_check = 128;
23914 return loop_1_byte;
23915 }
23916 if (max == -1)
23917 max = 4096;
23918 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23919 zero_memset, dynamic_check, noalign);
23920 gcc_assert (*dynamic_check == -1);
23921 gcc_assert (alg != libcall);
23922 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23923 *dynamic_check = max;
23924 return alg;
23925 }
23926 return (alg_usable_p (algs->unknown_size, memset)
23927 ? algs->unknown_size : libcall);
23928 }
23929
23930 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23931 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23932 static int
23933 decide_alignment (int align,
23934 enum stringop_alg alg,
23935 int expected_size,
23936 enum machine_mode move_mode)
23937 {
23938 int desired_align = 0;
23939
23940 gcc_assert (alg != no_stringop);
23941
23942 if (alg == libcall)
23943 return 0;
23944 if (move_mode == VOIDmode)
23945 return 0;
23946
23947 desired_align = GET_MODE_SIZE (move_mode);
23948 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23949 copying whole cacheline at once. */
23950 if (TARGET_PENTIUMPRO
23951 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23952 desired_align = 8;
23953
23954 if (optimize_size)
23955 desired_align = 1;
23956 if (desired_align < align)
23957 desired_align = align;
23958 if (expected_size != -1 && expected_size < 4)
23959 desired_align = align;
23960
23961 return desired_align;
23962 }
23963
23964
23965 /* Helper function for memcpy. For QImode value 0xXY produce
23966 0xXYXYXYXY of wide specified by MODE. This is essentially
23967 a * 0x10101010, but we can do slightly better than
23968 synth_mult by unwinding the sequence by hand on CPUs with
23969 slow multiply. */
23970 static rtx
23971 promote_duplicated_reg (enum machine_mode mode, rtx val)
23972 {
23973 enum machine_mode valmode = GET_MODE (val);
23974 rtx tmp;
23975 int nops = mode == DImode ? 3 : 2;
23976
23977 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
23978 if (val == const0_rtx)
23979 return copy_to_mode_reg (mode, CONST0_RTX (mode));
23980 if (CONST_INT_P (val))
23981 {
23982 HOST_WIDE_INT v = INTVAL (val) & 255;
23983
23984 v |= v << 8;
23985 v |= v << 16;
23986 if (mode == DImode)
23987 v |= (v << 16) << 16;
23988 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
23989 }
23990
23991 if (valmode == VOIDmode)
23992 valmode = QImode;
23993 if (valmode != QImode)
23994 val = gen_lowpart (QImode, val);
23995 if (mode == QImode)
23996 return val;
23997 if (!TARGET_PARTIAL_REG_STALL)
23998 nops--;
23999 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24000 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24001 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24002 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24003 {
24004 rtx reg = convert_modes (mode, QImode, val, true);
24005 tmp = promote_duplicated_reg (mode, const1_rtx);
24006 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24007 OPTAB_DIRECT);
24008 }
24009 else
24010 {
24011 rtx reg = convert_modes (mode, QImode, val, true);
24012
24013 if (!TARGET_PARTIAL_REG_STALL)
24014 if (mode == SImode)
24015 emit_insn (gen_movsi_insv_1 (reg, reg));
24016 else
24017 emit_insn (gen_movdi_insv_1 (reg, reg));
24018 else
24019 {
24020 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24021 NULL, 1, OPTAB_DIRECT);
24022 reg =
24023 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24024 }
24025 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24026 NULL, 1, OPTAB_DIRECT);
24027 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24028 if (mode == SImode)
24029 return reg;
24030 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24031 NULL, 1, OPTAB_DIRECT);
24032 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24033 return reg;
24034 }
24035 }
24036
24037 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24038 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24039 alignment from ALIGN to DESIRED_ALIGN. */
24040 static rtx
24041 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24042 int align)
24043 {
24044 rtx promoted_val;
24045
24046 if (TARGET_64BIT
24047 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24048 promoted_val = promote_duplicated_reg (DImode, val);
24049 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24050 promoted_val = promote_duplicated_reg (SImode, val);
24051 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24052 promoted_val = promote_duplicated_reg (HImode, val);
24053 else
24054 promoted_val = val;
24055
24056 return promoted_val;
24057 }
24058
24059 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24060 operations when profitable. The code depends upon architecture, block size
24061 and alignment, but always has one of the following overall structures:
24062
24063 Aligned move sequence:
24064
24065 1) Prologue guard: Conditional that jumps up to epilogues for small
24066 blocks that can be handled by epilogue alone. This is faster
24067 but also needed for correctness, since prologue assume the block
24068 is larger than the desired alignment.
24069
24070 Optional dynamic check for size and libcall for large
24071 blocks is emitted here too, with -minline-stringops-dynamically.
24072
24073 2) Prologue: copy first few bytes in order to get destination
24074 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24075 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24076 copied. We emit either a jump tree on power of two sized
24077 blocks, or a byte loop.
24078
24079 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24080 with specified algorithm.
24081
24082 4) Epilogue: code copying tail of the block that is too small to be
24083 handled by main body (or up to size guarded by prologue guard).
24084
24085 Misaligned move sequence
24086
24087 1) missaligned move prologue/epilogue containing:
24088 a) Prologue handling small memory blocks and jumping to done_label
24089 (skipped if blocks are known to be large enough)
24090 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24091 needed by single possibly misaligned move
24092 (skipped if alignment is not needed)
24093 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24094
24095 2) Zero size guard dispatching to done_label, if needed
24096
24097 3) dispatch to library call, if needed,
24098
24099 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24100 with specified algorithm. */
24101 bool
24102 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24103 rtx align_exp, rtx expected_align_exp,
24104 rtx expected_size_exp, rtx min_size_exp,
24105 rtx max_size_exp, rtx probable_max_size_exp,
24106 bool issetmem)
24107 {
24108 rtx destreg;
24109 rtx srcreg = NULL;
24110 rtx label = NULL;
24111 rtx tmp;
24112 rtx jump_around_label = NULL;
24113 HOST_WIDE_INT align = 1;
24114 unsigned HOST_WIDE_INT count = 0;
24115 HOST_WIDE_INT expected_size = -1;
24116 int size_needed = 0, epilogue_size_needed;
24117 int desired_align = 0, align_bytes = 0;
24118 enum stringop_alg alg;
24119 rtx promoted_val = NULL;
24120 rtx vec_promoted_val = NULL;
24121 bool force_loopy_epilogue = false;
24122 int dynamic_check;
24123 bool need_zero_guard = false;
24124 bool noalign;
24125 enum machine_mode move_mode = VOIDmode;
24126 int unroll_factor = 1;
24127 /* TODO: Once value ranges are available, fill in proper data. */
24128 unsigned HOST_WIDE_INT min_size = 0;
24129 unsigned HOST_WIDE_INT max_size = -1;
24130 unsigned HOST_WIDE_INT probable_max_size = -1;
24131 bool misaligned_prologue_used = false;
24132
24133 if (CONST_INT_P (align_exp))
24134 align = INTVAL (align_exp);
24135 /* i386 can do misaligned access on reasonably increased cost. */
24136 if (CONST_INT_P (expected_align_exp)
24137 && INTVAL (expected_align_exp) > align)
24138 align = INTVAL (expected_align_exp);
24139 /* ALIGN is the minimum of destination and source alignment, but we care here
24140 just about destination alignment. */
24141 else if (!issetmem
24142 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24143 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24144
24145 if (CONST_INT_P (count_exp))
24146 min_size = max_size = probable_max_size = count = expected_size
24147 = INTVAL (count_exp);
24148 else
24149 {
24150 if (min_size_exp)
24151 min_size = INTVAL (min_size_exp);
24152 if (max_size_exp)
24153 max_size = INTVAL (max_size_exp);
24154 if (probable_max_size_exp)
24155 probable_max_size = INTVAL (probable_max_size_exp);
24156 if (CONST_INT_P (expected_size_exp) && count == 0)
24157 expected_size = INTVAL (expected_size_exp);
24158 }
24159
24160 /* Make sure we don't need to care about overflow later on. */
24161 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24162 return false;
24163
24164 /* Step 0: Decide on preferred algorithm, desired alignment and
24165 size of chunks to be copied by main loop. */
24166 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24167 issetmem,
24168 issetmem && val_exp == const0_rtx,
24169 &dynamic_check, &noalign);
24170 if (alg == libcall)
24171 return false;
24172 gcc_assert (alg != no_stringop);
24173
24174 /* For now vector-version of memset is generated only for memory zeroing, as
24175 creating of promoted vector value is very cheap in this case. */
24176 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24177 alg = unrolled_loop;
24178
24179 if (!count)
24180 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24181 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24182 if (!issetmem)
24183 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24184
24185 unroll_factor = 1;
24186 move_mode = word_mode;
24187 switch (alg)
24188 {
24189 case libcall:
24190 case no_stringop:
24191 case last_alg:
24192 gcc_unreachable ();
24193 case loop_1_byte:
24194 need_zero_guard = true;
24195 move_mode = QImode;
24196 break;
24197 case loop:
24198 need_zero_guard = true;
24199 break;
24200 case unrolled_loop:
24201 need_zero_guard = true;
24202 unroll_factor = (TARGET_64BIT ? 4 : 2);
24203 break;
24204 case vector_loop:
24205 need_zero_guard = true;
24206 unroll_factor = 4;
24207 /* Find the widest supported mode. */
24208 move_mode = word_mode;
24209 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24210 != CODE_FOR_nothing)
24211 move_mode = GET_MODE_WIDER_MODE (move_mode);
24212
24213 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24214 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24215 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24216 {
24217 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24218 move_mode = mode_for_vector (word_mode, nunits);
24219 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24220 move_mode = word_mode;
24221 }
24222 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24223 break;
24224 case rep_prefix_8_byte:
24225 move_mode = DImode;
24226 break;
24227 case rep_prefix_4_byte:
24228 move_mode = SImode;
24229 break;
24230 case rep_prefix_1_byte:
24231 move_mode = QImode;
24232 break;
24233 }
24234 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24235 epilogue_size_needed = size_needed;
24236
24237 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24238 if (!TARGET_ALIGN_STRINGOPS || noalign)
24239 align = desired_align;
24240
24241 /* Step 1: Prologue guard. */
24242
24243 /* Alignment code needs count to be in register. */
24244 if (CONST_INT_P (count_exp) && desired_align > align)
24245 {
24246 if (INTVAL (count_exp) > desired_align
24247 && INTVAL (count_exp) > size_needed)
24248 {
24249 align_bytes
24250 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24251 if (align_bytes <= 0)
24252 align_bytes = 0;
24253 else
24254 align_bytes = desired_align - align_bytes;
24255 }
24256 if (align_bytes == 0)
24257 count_exp = force_reg (counter_mode (count_exp), count_exp);
24258 }
24259 gcc_assert (desired_align >= 1 && align >= 1);
24260
24261 /* Misaligned move sequences handle both prologue and epilogue at once.
24262 Default code generation results in a smaller code for large alignments
24263 and also avoids redundant job when sizes are known precisely. */
24264 misaligned_prologue_used
24265 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24266 && MAX (desired_align, epilogue_size_needed) <= 32
24267 && desired_align <= epilogue_size_needed
24268 && ((desired_align > align && !align_bytes)
24269 || (!count && epilogue_size_needed > 1)));
24270
24271 /* Do the cheap promotion to allow better CSE across the
24272 main loop and epilogue (ie one load of the big constant in the
24273 front of all code.
24274 For now the misaligned move sequences do not have fast path
24275 without broadcasting. */
24276 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24277 {
24278 if (alg == vector_loop)
24279 {
24280 gcc_assert (val_exp == const0_rtx);
24281 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24282 promoted_val = promote_duplicated_reg_to_size (val_exp,
24283 GET_MODE_SIZE (word_mode),
24284 desired_align, align);
24285 }
24286 else
24287 {
24288 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24289 desired_align, align);
24290 }
24291 }
24292 /* Misaligned move sequences handles both prologues and epilogues at once.
24293 Default code generation results in smaller code for large alignments and
24294 also avoids redundant job when sizes are known precisely. */
24295 if (misaligned_prologue_used)
24296 {
24297 /* Misaligned move prologue handled small blocks by itself. */
24298 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24299 (dst, src, &destreg, &srcreg,
24300 move_mode, promoted_val, vec_promoted_val,
24301 &count_exp,
24302 &jump_around_label,
24303 desired_align < align
24304 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24305 desired_align, align, &min_size, dynamic_check, issetmem);
24306 if (!issetmem)
24307 src = change_address (src, BLKmode, srcreg);
24308 dst = change_address (dst, BLKmode, destreg);
24309 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24310 epilogue_size_needed = 0;
24311 if (need_zero_guard && !min_size)
24312 {
24313 /* It is possible that we copied enough so the main loop will not
24314 execute. */
24315 gcc_assert (size_needed > 1);
24316 if (jump_around_label == NULL_RTX)
24317 jump_around_label = gen_label_rtx ();
24318 emit_cmp_and_jump_insns (count_exp,
24319 GEN_INT (size_needed),
24320 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24321 if (expected_size == -1
24322 || expected_size < (desired_align - align) / 2 + size_needed)
24323 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24324 else
24325 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24326 }
24327 }
24328 /* Ensure that alignment prologue won't copy past end of block. */
24329 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24330 {
24331 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24332 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24333 Make sure it is power of 2. */
24334 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24335
24336 /* To improve performance of small blocks, we jump around the VAL
24337 promoting mode. This mean that if the promoted VAL is not constant,
24338 we might not use it in the epilogue and have to use byte
24339 loop variant. */
24340 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24341 force_loopy_epilogue = true;
24342 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24343 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24344 {
24345 /* If main algorithm works on QImode, no epilogue is needed.
24346 For small sizes just don't align anything. */
24347 if (size_needed == 1)
24348 desired_align = align;
24349 else
24350 goto epilogue;
24351 }
24352 else if (!count
24353 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24354 {
24355 label = gen_label_rtx ();
24356 emit_cmp_and_jump_insns (count_exp,
24357 GEN_INT (epilogue_size_needed),
24358 LTU, 0, counter_mode (count_exp), 1, label);
24359 if (expected_size == -1 || expected_size < epilogue_size_needed)
24360 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24361 else
24362 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24363 }
24364 }
24365
24366 /* Emit code to decide on runtime whether library call or inline should be
24367 used. */
24368 if (dynamic_check != -1)
24369 {
24370 if (!issetmem && CONST_INT_P (count_exp))
24371 {
24372 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24373 {
24374 emit_block_move_via_libcall (dst, src, count_exp, false);
24375 count_exp = const0_rtx;
24376 goto epilogue;
24377 }
24378 }
24379 else
24380 {
24381 rtx hot_label = gen_label_rtx ();
24382 if (jump_around_label == NULL_RTX)
24383 jump_around_label = gen_label_rtx ();
24384 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24385 LEU, 0, GET_MODE (count_exp), 1, hot_label);
24386 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24387 if (issetmem)
24388 set_storage_via_libcall (dst, count_exp, val_exp, false);
24389 else
24390 emit_block_move_via_libcall (dst, src, count_exp, false);
24391 emit_jump (jump_around_label);
24392 emit_label (hot_label);
24393 }
24394 }
24395
24396 /* Step 2: Alignment prologue. */
24397 /* Do the expensive promotion once we branched off the small blocks. */
24398 if (issetmem && !promoted_val)
24399 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24400 desired_align, align);
24401
24402 if (desired_align > align && !misaligned_prologue_used)
24403 {
24404 if (align_bytes == 0)
24405 {
24406 /* Except for the first move in prologue, we no longer know
24407 constant offset in aliasing info. It don't seems to worth
24408 the pain to maintain it for the first move, so throw away
24409 the info early. */
24410 dst = change_address (dst, BLKmode, destreg);
24411 if (!issetmem)
24412 src = change_address (src, BLKmode, srcreg);
24413 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24414 promoted_val, vec_promoted_val,
24415 count_exp, align, desired_align,
24416 issetmem);
24417 /* At most desired_align - align bytes are copied. */
24418 if (min_size < (unsigned)(desired_align - align))
24419 min_size = 0;
24420 else
24421 min_size -= desired_align - align;
24422 }
24423 else
24424 {
24425 /* If we know how many bytes need to be stored before dst is
24426 sufficiently aligned, maintain aliasing info accurately. */
24427 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24428 srcreg,
24429 promoted_val,
24430 vec_promoted_val,
24431 desired_align,
24432 align_bytes,
24433 issetmem);
24434
24435 count_exp = plus_constant (counter_mode (count_exp),
24436 count_exp, -align_bytes);
24437 count -= align_bytes;
24438 min_size -= align_bytes;
24439 max_size -= align_bytes;
24440 }
24441 if (need_zero_guard
24442 && !min_size
24443 && (count < (unsigned HOST_WIDE_INT) size_needed
24444 || (align_bytes == 0
24445 && count < ((unsigned HOST_WIDE_INT) size_needed
24446 + desired_align - align))))
24447 {
24448 /* It is possible that we copied enough so the main loop will not
24449 execute. */
24450 gcc_assert (size_needed > 1);
24451 if (label == NULL_RTX)
24452 label = gen_label_rtx ();
24453 emit_cmp_and_jump_insns (count_exp,
24454 GEN_INT (size_needed),
24455 LTU, 0, counter_mode (count_exp), 1, label);
24456 if (expected_size == -1
24457 || expected_size < (desired_align - align) / 2 + size_needed)
24458 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24459 else
24460 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24461 }
24462 }
24463 if (label && size_needed == 1)
24464 {
24465 emit_label (label);
24466 LABEL_NUSES (label) = 1;
24467 label = NULL;
24468 epilogue_size_needed = 1;
24469 if (issetmem)
24470 promoted_val = val_exp;
24471 }
24472 else if (label == NULL_RTX && !misaligned_prologue_used)
24473 epilogue_size_needed = size_needed;
24474
24475 /* Step 3: Main loop. */
24476
24477 switch (alg)
24478 {
24479 case libcall:
24480 case no_stringop:
24481 case last_alg:
24482 gcc_unreachable ();
24483 case loop_1_byte:
24484 case loop:
24485 case unrolled_loop:
24486 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24487 count_exp, move_mode, unroll_factor,
24488 expected_size, issetmem);
24489 break;
24490 case vector_loop:
24491 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24492 vec_promoted_val, count_exp, move_mode,
24493 unroll_factor, expected_size, issetmem);
24494 break;
24495 case rep_prefix_8_byte:
24496 case rep_prefix_4_byte:
24497 case rep_prefix_1_byte:
24498 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24499 val_exp, count_exp, move_mode, issetmem);
24500 break;
24501 }
24502 /* Adjust properly the offset of src and dest memory for aliasing. */
24503 if (CONST_INT_P (count_exp))
24504 {
24505 if (!issetmem)
24506 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24507 (count / size_needed) * size_needed);
24508 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24509 (count / size_needed) * size_needed);
24510 }
24511 else
24512 {
24513 if (!issetmem)
24514 src = change_address (src, BLKmode, srcreg);
24515 dst = change_address (dst, BLKmode, destreg);
24516 }
24517
24518 /* Step 4: Epilogue to copy the remaining bytes. */
24519 epilogue:
24520 if (label)
24521 {
24522 /* When the main loop is done, COUNT_EXP might hold original count,
24523 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24524 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24525 bytes. Compensate if needed. */
24526
24527 if (size_needed < epilogue_size_needed)
24528 {
24529 tmp =
24530 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24531 GEN_INT (size_needed - 1), count_exp, 1,
24532 OPTAB_DIRECT);
24533 if (tmp != count_exp)
24534 emit_move_insn (count_exp, tmp);
24535 }
24536 emit_label (label);
24537 LABEL_NUSES (label) = 1;
24538 }
24539
24540 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24541 {
24542 if (force_loopy_epilogue)
24543 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24544 epilogue_size_needed);
24545 else
24546 {
24547 if (issetmem)
24548 expand_setmem_epilogue (dst, destreg, promoted_val,
24549 vec_promoted_val, count_exp,
24550 epilogue_size_needed);
24551 else
24552 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24553 epilogue_size_needed);
24554 }
24555 }
24556 if (jump_around_label)
24557 emit_label (jump_around_label);
24558 return true;
24559 }
24560
24561
24562 /* Expand the appropriate insns for doing strlen if not just doing
24563 repnz; scasb
24564
24565 out = result, initialized with the start address
24566 align_rtx = alignment of the address.
24567 scratch = scratch register, initialized with the startaddress when
24568 not aligned, otherwise undefined
24569
24570 This is just the body. It needs the initializations mentioned above and
24571 some address computing at the end. These things are done in i386.md. */
24572
24573 static void
24574 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24575 {
24576 int align;
24577 rtx tmp;
24578 rtx align_2_label = NULL_RTX;
24579 rtx align_3_label = NULL_RTX;
24580 rtx align_4_label = gen_label_rtx ();
24581 rtx end_0_label = gen_label_rtx ();
24582 rtx mem;
24583 rtx tmpreg = gen_reg_rtx (SImode);
24584 rtx scratch = gen_reg_rtx (SImode);
24585 rtx cmp;
24586
24587 align = 0;
24588 if (CONST_INT_P (align_rtx))
24589 align = INTVAL (align_rtx);
24590
24591 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24592
24593 /* Is there a known alignment and is it less than 4? */
24594 if (align < 4)
24595 {
24596 rtx scratch1 = gen_reg_rtx (Pmode);
24597 emit_move_insn (scratch1, out);
24598 /* Is there a known alignment and is it not 2? */
24599 if (align != 2)
24600 {
24601 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24602 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24603
24604 /* Leave just the 3 lower bits. */
24605 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24606 NULL_RTX, 0, OPTAB_WIDEN);
24607
24608 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24609 Pmode, 1, align_4_label);
24610 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24611 Pmode, 1, align_2_label);
24612 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24613 Pmode, 1, align_3_label);
24614 }
24615 else
24616 {
24617 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24618 check if is aligned to 4 - byte. */
24619
24620 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24621 NULL_RTX, 0, OPTAB_WIDEN);
24622
24623 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24624 Pmode, 1, align_4_label);
24625 }
24626
24627 mem = change_address (src, QImode, out);
24628
24629 /* Now compare the bytes. */
24630
24631 /* Compare the first n unaligned byte on a byte per byte basis. */
24632 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24633 QImode, 1, end_0_label);
24634
24635 /* Increment the address. */
24636 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24637
24638 /* Not needed with an alignment of 2 */
24639 if (align != 2)
24640 {
24641 emit_label (align_2_label);
24642
24643 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24644 end_0_label);
24645
24646 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24647
24648 emit_label (align_3_label);
24649 }
24650
24651 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24652 end_0_label);
24653
24654 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24655 }
24656
24657 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24658 align this loop. It gives only huge programs, but does not help to
24659 speed up. */
24660 emit_label (align_4_label);
24661
24662 mem = change_address (src, SImode, out);
24663 emit_move_insn (scratch, mem);
24664 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24665
24666 /* This formula yields a nonzero result iff one of the bytes is zero.
24667 This saves three branches inside loop and many cycles. */
24668
24669 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24670 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24671 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24672 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24673 gen_int_mode (0x80808080, SImode)));
24674 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24675 align_4_label);
24676
24677 if (TARGET_CMOVE)
24678 {
24679 rtx reg = gen_reg_rtx (SImode);
24680 rtx reg2 = gen_reg_rtx (Pmode);
24681 emit_move_insn (reg, tmpreg);
24682 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24683
24684 /* If zero is not in the first two bytes, move two bytes forward. */
24685 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24686 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24687 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24688 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24689 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24690 reg,
24691 tmpreg)));
24692 /* Emit lea manually to avoid clobbering of flags. */
24693 emit_insn (gen_rtx_SET (SImode, reg2,
24694 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24695
24696 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24697 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24698 emit_insn (gen_rtx_SET (VOIDmode, out,
24699 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24700 reg2,
24701 out)));
24702 }
24703 else
24704 {
24705 rtx end_2_label = gen_label_rtx ();
24706 /* Is zero in the first two bytes? */
24707
24708 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24709 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24710 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24711 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24712 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24713 pc_rtx);
24714 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24715 JUMP_LABEL (tmp) = end_2_label;
24716
24717 /* Not in the first two. Move two bytes forward. */
24718 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24719 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24720
24721 emit_label (end_2_label);
24722
24723 }
24724
24725 /* Avoid branch in fixing the byte. */
24726 tmpreg = gen_lowpart (QImode, tmpreg);
24727 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24728 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24729 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24730 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24731
24732 emit_label (end_0_label);
24733 }
24734
24735 /* Expand strlen. */
24736
24737 bool
24738 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24739 {
24740 rtx addr, scratch1, scratch2, scratch3, scratch4;
24741
24742 /* The generic case of strlen expander is long. Avoid it's
24743 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24744
24745 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24746 && !TARGET_INLINE_ALL_STRINGOPS
24747 && !optimize_insn_for_size_p ()
24748 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24749 return false;
24750
24751 addr = force_reg (Pmode, XEXP (src, 0));
24752 scratch1 = gen_reg_rtx (Pmode);
24753
24754 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24755 && !optimize_insn_for_size_p ())
24756 {
24757 /* Well it seems that some optimizer does not combine a call like
24758 foo(strlen(bar), strlen(bar));
24759 when the move and the subtraction is done here. It does calculate
24760 the length just once when these instructions are done inside of
24761 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24762 often used and I use one fewer register for the lifetime of
24763 output_strlen_unroll() this is better. */
24764
24765 emit_move_insn (out, addr);
24766
24767 ix86_expand_strlensi_unroll_1 (out, src, align);
24768
24769 /* strlensi_unroll_1 returns the address of the zero at the end of
24770 the string, like memchr(), so compute the length by subtracting
24771 the start address. */
24772 emit_insn (ix86_gen_sub3 (out, out, addr));
24773 }
24774 else
24775 {
24776 rtx unspec;
24777
24778 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24779 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24780 return false;
24781
24782 scratch2 = gen_reg_rtx (Pmode);
24783 scratch3 = gen_reg_rtx (Pmode);
24784 scratch4 = force_reg (Pmode, constm1_rtx);
24785
24786 emit_move_insn (scratch3, addr);
24787 eoschar = force_reg (QImode, eoschar);
24788
24789 src = replace_equiv_address_nv (src, scratch3);
24790
24791 /* If .md starts supporting :P, this can be done in .md. */
24792 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24793 scratch4), UNSPEC_SCAS);
24794 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24795 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24796 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24797 }
24798 return true;
24799 }
24800
24801 /* For given symbol (function) construct code to compute address of it's PLT
24802 entry in large x86-64 PIC model. */
24803 static rtx
24804 construct_plt_address (rtx symbol)
24805 {
24806 rtx tmp, unspec;
24807
24808 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24809 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24810 gcc_assert (Pmode == DImode);
24811
24812 tmp = gen_reg_rtx (Pmode);
24813 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24814
24815 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24816 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24817 return tmp;
24818 }
24819
24820 rtx
24821 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24822 rtx callarg2,
24823 rtx pop, bool sibcall)
24824 {
24825 unsigned int const cregs_size
24826 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24827 rtx vec[3 + cregs_size];
24828 rtx use = NULL, call;
24829 unsigned int vec_len = 0;
24830
24831 if (pop == const0_rtx)
24832 pop = NULL;
24833 gcc_assert (!TARGET_64BIT || !pop);
24834
24835 if (TARGET_MACHO && !TARGET_64BIT)
24836 {
24837 #if TARGET_MACHO
24838 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24839 fnaddr = machopic_indirect_call_target (fnaddr);
24840 #endif
24841 }
24842 else
24843 {
24844 /* Static functions and indirect calls don't need the pic register. */
24845 if (flag_pic
24846 && (!TARGET_64BIT
24847 || (ix86_cmodel == CM_LARGE_PIC
24848 && DEFAULT_ABI != MS_ABI))
24849 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24850 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24851 use_reg (&use, pic_offset_table_rtx);
24852 }
24853
24854 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24855 {
24856 rtx al = gen_rtx_REG (QImode, AX_REG);
24857 emit_move_insn (al, callarg2);
24858 use_reg (&use, al);
24859 }
24860
24861 if (ix86_cmodel == CM_LARGE_PIC
24862 && !TARGET_PECOFF
24863 && MEM_P (fnaddr)
24864 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24865 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24866 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24867 else if (sibcall
24868 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24869 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24870 {
24871 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24872 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24873 }
24874
24875 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24876 if (retval)
24877 call = gen_rtx_SET (VOIDmode, retval, call);
24878 vec[vec_len++] = call;
24879
24880 if (pop)
24881 {
24882 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24883 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24884 vec[vec_len++] = pop;
24885 }
24886
24887 if (TARGET_64BIT_MS_ABI
24888 && (!callarg2 || INTVAL (callarg2) != -2))
24889 {
24890 unsigned i;
24891
24892 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24893 UNSPEC_MS_TO_SYSV_CALL);
24894
24895 for (i = 0; i < cregs_size; i++)
24896 {
24897 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24898 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24899
24900 vec[vec_len++]
24901 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24902 }
24903 }
24904
24905 if (vec_len > 1)
24906 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24907 call = emit_call_insn (call);
24908 if (use)
24909 CALL_INSN_FUNCTION_USAGE (call) = use;
24910
24911 return call;
24912 }
24913
24914 /* Output the assembly for a call instruction. */
24915
24916 const char *
24917 ix86_output_call_insn (rtx insn, rtx call_op)
24918 {
24919 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24920 bool seh_nop_p = false;
24921 const char *xasm;
24922
24923 if (SIBLING_CALL_P (insn))
24924 {
24925 if (direct_p)
24926 xasm = "jmp\t%P0";
24927 /* SEH epilogue detection requires the indirect branch case
24928 to include REX.W. */
24929 else if (TARGET_SEH)
24930 xasm = "rex.W jmp %A0";
24931 else
24932 xasm = "jmp\t%A0";
24933
24934 output_asm_insn (xasm, &call_op);
24935 return "";
24936 }
24937
24938 /* SEH unwinding can require an extra nop to be emitted in several
24939 circumstances. Determine if we have one of those. */
24940 if (TARGET_SEH)
24941 {
24942 rtx i;
24943
24944 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24945 {
24946 /* If we get to another real insn, we don't need the nop. */
24947 if (INSN_P (i))
24948 break;
24949
24950 /* If we get to the epilogue note, prevent a catch region from
24951 being adjacent to the standard epilogue sequence. If non-
24952 call-exceptions, we'll have done this during epilogue emission. */
24953 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
24954 && !flag_non_call_exceptions
24955 && !can_throw_internal (insn))
24956 {
24957 seh_nop_p = true;
24958 break;
24959 }
24960 }
24961
24962 /* If we didn't find a real insn following the call, prevent the
24963 unwinder from looking into the next function. */
24964 if (i == NULL)
24965 seh_nop_p = true;
24966 }
24967
24968 if (direct_p)
24969 xasm = "call\t%P0";
24970 else
24971 xasm = "call\t%A0";
24972
24973 output_asm_insn (xasm, &call_op);
24974
24975 if (seh_nop_p)
24976 return "nop";
24977
24978 return "";
24979 }
24980 \f
24981 /* Clear stack slot assignments remembered from previous functions.
24982 This is called from INIT_EXPANDERS once before RTL is emitted for each
24983 function. */
24984
24985 static struct machine_function *
24986 ix86_init_machine_status (void)
24987 {
24988 struct machine_function *f;
24989
24990 f = ggc_alloc_cleared_machine_function ();
24991 f->use_fast_prologue_epilogue_nregs = -1;
24992 f->call_abi = ix86_abi;
24993
24994 return f;
24995 }
24996
24997 /* Return a MEM corresponding to a stack slot with mode MODE.
24998 Allocate a new slot if necessary.
24999
25000 The RTL for a function can have several slots available: N is
25001 which slot to use. */
25002
25003 rtx
25004 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25005 {
25006 struct stack_local_entry *s;
25007
25008 gcc_assert (n < MAX_386_STACK_LOCALS);
25009
25010 for (s = ix86_stack_locals; s; s = s->next)
25011 if (s->mode == mode && s->n == n)
25012 return validize_mem (copy_rtx (s->rtl));
25013
25014 s = ggc_alloc_stack_local_entry ();
25015 s->n = n;
25016 s->mode = mode;
25017 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25018
25019 s->next = ix86_stack_locals;
25020 ix86_stack_locals = s;
25021 return validize_mem (s->rtl);
25022 }
25023
25024 static void
25025 ix86_instantiate_decls (void)
25026 {
25027 struct stack_local_entry *s;
25028
25029 for (s = ix86_stack_locals; s; s = s->next)
25030 if (s->rtl != NULL_RTX)
25031 instantiate_decl_rtl (s->rtl);
25032 }
25033 \f
25034 /* Check whether x86 address PARTS is a pc-relative address. */
25035
25036 static bool
25037 rip_relative_addr_p (struct ix86_address *parts)
25038 {
25039 rtx base, index, disp;
25040
25041 base = parts->base;
25042 index = parts->index;
25043 disp = parts->disp;
25044
25045 if (disp && !base && !index)
25046 {
25047 if (TARGET_64BIT)
25048 {
25049 rtx symbol = disp;
25050
25051 if (GET_CODE (disp) == CONST)
25052 symbol = XEXP (disp, 0);
25053 if (GET_CODE (symbol) == PLUS
25054 && CONST_INT_P (XEXP (symbol, 1)))
25055 symbol = XEXP (symbol, 0);
25056
25057 if (GET_CODE (symbol) == LABEL_REF
25058 || (GET_CODE (symbol) == SYMBOL_REF
25059 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25060 || (GET_CODE (symbol) == UNSPEC
25061 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25062 || XINT (symbol, 1) == UNSPEC_PCREL
25063 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25064 return true;
25065 }
25066 }
25067 return false;
25068 }
25069
25070 /* Calculate the length of the memory address in the instruction encoding.
25071 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25072 or other prefixes. We never generate addr32 prefix for LEA insn. */
25073
25074 int
25075 memory_address_length (rtx addr, bool lea)
25076 {
25077 struct ix86_address parts;
25078 rtx base, index, disp;
25079 int len;
25080 int ok;
25081
25082 if (GET_CODE (addr) == PRE_DEC
25083 || GET_CODE (addr) == POST_INC
25084 || GET_CODE (addr) == PRE_MODIFY
25085 || GET_CODE (addr) == POST_MODIFY)
25086 return 0;
25087
25088 ok = ix86_decompose_address (addr, &parts);
25089 gcc_assert (ok);
25090
25091 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25092
25093 /* If this is not LEA instruction, add the length of addr32 prefix. */
25094 if (TARGET_64BIT && !lea
25095 && (SImode_address_operand (addr, VOIDmode)
25096 || (parts.base && GET_MODE (parts.base) == SImode)
25097 || (parts.index && GET_MODE (parts.index) == SImode)))
25098 len++;
25099
25100 base = parts.base;
25101 index = parts.index;
25102 disp = parts.disp;
25103
25104 if (base && GET_CODE (base) == SUBREG)
25105 base = SUBREG_REG (base);
25106 if (index && GET_CODE (index) == SUBREG)
25107 index = SUBREG_REG (index);
25108
25109 gcc_assert (base == NULL_RTX || REG_P (base));
25110 gcc_assert (index == NULL_RTX || REG_P (index));
25111
25112 /* Rule of thumb:
25113 - esp as the base always wants an index,
25114 - ebp as the base always wants a displacement,
25115 - r12 as the base always wants an index,
25116 - r13 as the base always wants a displacement. */
25117
25118 /* Register Indirect. */
25119 if (base && !index && !disp)
25120 {
25121 /* esp (for its index) and ebp (for its displacement) need
25122 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25123 code. */
25124 if (base == arg_pointer_rtx
25125 || base == frame_pointer_rtx
25126 || REGNO (base) == SP_REG
25127 || REGNO (base) == BP_REG
25128 || REGNO (base) == R12_REG
25129 || REGNO (base) == R13_REG)
25130 len++;
25131 }
25132
25133 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25134 is not disp32, but disp32(%rip), so for disp32
25135 SIB byte is needed, unless print_operand_address
25136 optimizes it into disp32(%rip) or (%rip) is implied
25137 by UNSPEC. */
25138 else if (disp && !base && !index)
25139 {
25140 len += 4;
25141 if (rip_relative_addr_p (&parts))
25142 len++;
25143 }
25144 else
25145 {
25146 /* Find the length of the displacement constant. */
25147 if (disp)
25148 {
25149 if (base && satisfies_constraint_K (disp))
25150 len += 1;
25151 else
25152 len += 4;
25153 }
25154 /* ebp always wants a displacement. Similarly r13. */
25155 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25156 len++;
25157
25158 /* An index requires the two-byte modrm form.... */
25159 if (index
25160 /* ...like esp (or r12), which always wants an index. */
25161 || base == arg_pointer_rtx
25162 || base == frame_pointer_rtx
25163 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25164 len++;
25165 }
25166
25167 return len;
25168 }
25169
25170 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25171 is set, expect that insn have 8bit immediate alternative. */
25172 int
25173 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25174 {
25175 int len = 0;
25176 int i;
25177 extract_insn_cached (insn);
25178 for (i = recog_data.n_operands - 1; i >= 0; --i)
25179 if (CONSTANT_P (recog_data.operand[i]))
25180 {
25181 enum attr_mode mode = get_attr_mode (insn);
25182
25183 gcc_assert (!len);
25184 if (shortform && CONST_INT_P (recog_data.operand[i]))
25185 {
25186 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25187 switch (mode)
25188 {
25189 case MODE_QI:
25190 len = 1;
25191 continue;
25192 case MODE_HI:
25193 ival = trunc_int_for_mode (ival, HImode);
25194 break;
25195 case MODE_SI:
25196 ival = trunc_int_for_mode (ival, SImode);
25197 break;
25198 default:
25199 break;
25200 }
25201 if (IN_RANGE (ival, -128, 127))
25202 {
25203 len = 1;
25204 continue;
25205 }
25206 }
25207 switch (mode)
25208 {
25209 case MODE_QI:
25210 len = 1;
25211 break;
25212 case MODE_HI:
25213 len = 2;
25214 break;
25215 case MODE_SI:
25216 len = 4;
25217 break;
25218 /* Immediates for DImode instructions are encoded
25219 as 32bit sign extended values. */
25220 case MODE_DI:
25221 len = 4;
25222 break;
25223 default:
25224 fatal_insn ("unknown insn mode", insn);
25225 }
25226 }
25227 return len;
25228 }
25229
25230 /* Compute default value for "length_address" attribute. */
25231 int
25232 ix86_attr_length_address_default (rtx insn)
25233 {
25234 int i;
25235
25236 if (get_attr_type (insn) == TYPE_LEA)
25237 {
25238 rtx set = PATTERN (insn), addr;
25239
25240 if (GET_CODE (set) == PARALLEL)
25241 set = XVECEXP (set, 0, 0);
25242
25243 gcc_assert (GET_CODE (set) == SET);
25244
25245 addr = SET_SRC (set);
25246
25247 return memory_address_length (addr, true);
25248 }
25249
25250 extract_insn_cached (insn);
25251 for (i = recog_data.n_operands - 1; i >= 0; --i)
25252 if (MEM_P (recog_data.operand[i]))
25253 {
25254 constrain_operands_cached (reload_completed);
25255 if (which_alternative != -1)
25256 {
25257 const char *constraints = recog_data.constraints[i];
25258 int alt = which_alternative;
25259
25260 while (*constraints == '=' || *constraints == '+')
25261 constraints++;
25262 while (alt-- > 0)
25263 while (*constraints++ != ',')
25264 ;
25265 /* Skip ignored operands. */
25266 if (*constraints == 'X')
25267 continue;
25268 }
25269 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25270 }
25271 return 0;
25272 }
25273
25274 /* Compute default value for "length_vex" attribute. It includes
25275 2 or 3 byte VEX prefix and 1 opcode byte. */
25276
25277 int
25278 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25279 {
25280 int i;
25281
25282 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25283 byte VEX prefix. */
25284 if (!has_0f_opcode || has_vex_w)
25285 return 3 + 1;
25286
25287 /* We can always use 2 byte VEX prefix in 32bit. */
25288 if (!TARGET_64BIT)
25289 return 2 + 1;
25290
25291 extract_insn_cached (insn);
25292
25293 for (i = recog_data.n_operands - 1; i >= 0; --i)
25294 if (REG_P (recog_data.operand[i]))
25295 {
25296 /* REX.W bit uses 3 byte VEX prefix. */
25297 if (GET_MODE (recog_data.operand[i]) == DImode
25298 && GENERAL_REG_P (recog_data.operand[i]))
25299 return 3 + 1;
25300 }
25301 else
25302 {
25303 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25304 if (MEM_P (recog_data.operand[i])
25305 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25306 return 3 + 1;
25307 }
25308
25309 return 2 + 1;
25310 }
25311 \f
25312 /* Return the maximum number of instructions a cpu can issue. */
25313
25314 static int
25315 ix86_issue_rate (void)
25316 {
25317 switch (ix86_tune)
25318 {
25319 case PROCESSOR_PENTIUM:
25320 case PROCESSOR_BONNELL:
25321 case PROCESSOR_SILVERMONT:
25322 case PROCESSOR_INTEL:
25323 case PROCESSOR_K6:
25324 case PROCESSOR_BTVER2:
25325 case PROCESSOR_PENTIUM4:
25326 case PROCESSOR_NOCONA:
25327 return 2;
25328
25329 case PROCESSOR_PENTIUMPRO:
25330 case PROCESSOR_ATHLON:
25331 case PROCESSOR_K8:
25332 case PROCESSOR_AMDFAM10:
25333 case PROCESSOR_GENERIC:
25334 case PROCESSOR_BTVER1:
25335 return 3;
25336
25337 case PROCESSOR_BDVER1:
25338 case PROCESSOR_BDVER2:
25339 case PROCESSOR_BDVER3:
25340 case PROCESSOR_BDVER4:
25341 case PROCESSOR_CORE2:
25342 case PROCESSOR_NEHALEM:
25343 case PROCESSOR_SANDYBRIDGE:
25344 case PROCESSOR_HASWELL:
25345 return 4;
25346
25347 default:
25348 return 1;
25349 }
25350 }
25351
25352 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25353 by DEP_INSN and nothing set by DEP_INSN. */
25354
25355 static bool
25356 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25357 {
25358 rtx set, set2;
25359
25360 /* Simplify the test for uninteresting insns. */
25361 if (insn_type != TYPE_SETCC
25362 && insn_type != TYPE_ICMOV
25363 && insn_type != TYPE_FCMOV
25364 && insn_type != TYPE_IBR)
25365 return false;
25366
25367 if ((set = single_set (dep_insn)) != 0)
25368 {
25369 set = SET_DEST (set);
25370 set2 = NULL_RTX;
25371 }
25372 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25373 && XVECLEN (PATTERN (dep_insn), 0) == 2
25374 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25375 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25376 {
25377 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25378 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25379 }
25380 else
25381 return false;
25382
25383 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25384 return false;
25385
25386 /* This test is true if the dependent insn reads the flags but
25387 not any other potentially set register. */
25388 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25389 return false;
25390
25391 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25392 return false;
25393
25394 return true;
25395 }
25396
25397 /* Return true iff USE_INSN has a memory address with operands set by
25398 SET_INSN. */
25399
25400 bool
25401 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25402 {
25403 int i;
25404 extract_insn_cached (use_insn);
25405 for (i = recog_data.n_operands - 1; i >= 0; --i)
25406 if (MEM_P (recog_data.operand[i]))
25407 {
25408 rtx addr = XEXP (recog_data.operand[i], 0);
25409 return modified_in_p (addr, set_insn) != 0;
25410 }
25411 return false;
25412 }
25413
25414 /* Helper function for exact_store_load_dependency.
25415 Return true if addr is found in insn. */
25416 static bool
25417 exact_dependency_1 (rtx addr, rtx insn)
25418 {
25419 enum rtx_code code;
25420 const char *format_ptr;
25421 int i, j;
25422
25423 code = GET_CODE (insn);
25424 switch (code)
25425 {
25426 case MEM:
25427 if (rtx_equal_p (addr, insn))
25428 return true;
25429 break;
25430 case REG:
25431 CASE_CONST_ANY:
25432 case SYMBOL_REF:
25433 case CODE_LABEL:
25434 case PC:
25435 case CC0:
25436 case EXPR_LIST:
25437 return false;
25438 default:
25439 break;
25440 }
25441
25442 format_ptr = GET_RTX_FORMAT (code);
25443 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25444 {
25445 switch (*format_ptr++)
25446 {
25447 case 'e':
25448 if (exact_dependency_1 (addr, XEXP (insn, i)))
25449 return true;
25450 break;
25451 case 'E':
25452 for (j = 0; j < XVECLEN (insn, i); j++)
25453 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25454 return true;
25455 break;
25456 }
25457 }
25458 return false;
25459 }
25460
25461 /* Return true if there exists exact dependency for store & load, i.e.
25462 the same memory address is used in them. */
25463 static bool
25464 exact_store_load_dependency (rtx store, rtx load)
25465 {
25466 rtx set1, set2;
25467
25468 set1 = single_set (store);
25469 if (!set1)
25470 return false;
25471 if (!MEM_P (SET_DEST (set1)))
25472 return false;
25473 set2 = single_set (load);
25474 if (!set2)
25475 return false;
25476 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25477 return true;
25478 return false;
25479 }
25480
25481 static int
25482 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25483 {
25484 enum attr_type insn_type, dep_insn_type;
25485 enum attr_memory memory;
25486 rtx set, set2;
25487 int dep_insn_code_number;
25488
25489 /* Anti and output dependencies have zero cost on all CPUs. */
25490 if (REG_NOTE_KIND (link) != 0)
25491 return 0;
25492
25493 dep_insn_code_number = recog_memoized (dep_insn);
25494
25495 /* If we can't recognize the insns, we can't really do anything. */
25496 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25497 return cost;
25498
25499 insn_type = get_attr_type (insn);
25500 dep_insn_type = get_attr_type (dep_insn);
25501
25502 switch (ix86_tune)
25503 {
25504 case PROCESSOR_PENTIUM:
25505 /* Address Generation Interlock adds a cycle of latency. */
25506 if (insn_type == TYPE_LEA)
25507 {
25508 rtx addr = PATTERN (insn);
25509
25510 if (GET_CODE (addr) == PARALLEL)
25511 addr = XVECEXP (addr, 0, 0);
25512
25513 gcc_assert (GET_CODE (addr) == SET);
25514
25515 addr = SET_SRC (addr);
25516 if (modified_in_p (addr, dep_insn))
25517 cost += 1;
25518 }
25519 else if (ix86_agi_dependent (dep_insn, insn))
25520 cost += 1;
25521
25522 /* ??? Compares pair with jump/setcc. */
25523 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25524 cost = 0;
25525
25526 /* Floating point stores require value to be ready one cycle earlier. */
25527 if (insn_type == TYPE_FMOV
25528 && get_attr_memory (insn) == MEMORY_STORE
25529 && !ix86_agi_dependent (dep_insn, insn))
25530 cost += 1;
25531 break;
25532
25533 case PROCESSOR_PENTIUMPRO:
25534 /* INT->FP conversion is expensive. */
25535 if (get_attr_fp_int_src (dep_insn))
25536 cost += 5;
25537
25538 /* There is one cycle extra latency between an FP op and a store. */
25539 if (insn_type == TYPE_FMOV
25540 && (set = single_set (dep_insn)) != NULL_RTX
25541 && (set2 = single_set (insn)) != NULL_RTX
25542 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25543 && MEM_P (SET_DEST (set2)))
25544 cost += 1;
25545
25546 memory = get_attr_memory (insn);
25547
25548 /* Show ability of reorder buffer to hide latency of load by executing
25549 in parallel with previous instruction in case
25550 previous instruction is not needed to compute the address. */
25551 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25552 && !ix86_agi_dependent (dep_insn, insn))
25553 {
25554 /* Claim moves to take one cycle, as core can issue one load
25555 at time and the next load can start cycle later. */
25556 if (dep_insn_type == TYPE_IMOV
25557 || dep_insn_type == TYPE_FMOV)
25558 cost = 1;
25559 else if (cost > 1)
25560 cost--;
25561 }
25562 break;
25563
25564 case PROCESSOR_K6:
25565 /* The esp dependency is resolved before
25566 the instruction is really finished. */
25567 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25568 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25569 return 1;
25570
25571 /* INT->FP conversion is expensive. */
25572 if (get_attr_fp_int_src (dep_insn))
25573 cost += 5;
25574
25575 memory = get_attr_memory (insn);
25576
25577 /* Show ability of reorder buffer to hide latency of load by executing
25578 in parallel with previous instruction in case
25579 previous instruction is not needed to compute the address. */
25580 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25581 && !ix86_agi_dependent (dep_insn, insn))
25582 {
25583 /* Claim moves to take one cycle, as core can issue one load
25584 at time and the next load can start cycle later. */
25585 if (dep_insn_type == TYPE_IMOV
25586 || dep_insn_type == TYPE_FMOV)
25587 cost = 1;
25588 else if (cost > 2)
25589 cost -= 2;
25590 else
25591 cost = 1;
25592 }
25593 break;
25594
25595 case PROCESSOR_AMDFAM10:
25596 case PROCESSOR_BDVER1:
25597 case PROCESSOR_BDVER2:
25598 case PROCESSOR_BDVER3:
25599 case PROCESSOR_BDVER4:
25600 case PROCESSOR_BTVER1:
25601 case PROCESSOR_BTVER2:
25602 case PROCESSOR_GENERIC:
25603 /* Stack engine allows to execute push&pop instructions in parall. */
25604 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25605 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25606 return 0;
25607 /* FALLTHRU */
25608
25609 case PROCESSOR_ATHLON:
25610 case PROCESSOR_K8:
25611 memory = get_attr_memory (insn);
25612
25613 /* Show ability of reorder buffer to hide latency of load by executing
25614 in parallel with previous instruction in case
25615 previous instruction is not needed to compute the address. */
25616 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25617 && !ix86_agi_dependent (dep_insn, insn))
25618 {
25619 enum attr_unit unit = get_attr_unit (insn);
25620 int loadcost = 3;
25621
25622 /* Because of the difference between the length of integer and
25623 floating unit pipeline preparation stages, the memory operands
25624 for floating point are cheaper.
25625
25626 ??? For Athlon it the difference is most probably 2. */
25627 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25628 loadcost = 3;
25629 else
25630 loadcost = TARGET_ATHLON ? 2 : 0;
25631
25632 if (cost >= loadcost)
25633 cost -= loadcost;
25634 else
25635 cost = 0;
25636 }
25637 break;
25638
25639 case PROCESSOR_CORE2:
25640 case PROCESSOR_NEHALEM:
25641 case PROCESSOR_SANDYBRIDGE:
25642 case PROCESSOR_HASWELL:
25643 /* Stack engine allows to execute push&pop instructions in parall. */
25644 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25645 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25646 return 0;
25647
25648 memory = get_attr_memory (insn);
25649
25650 /* Show ability of reorder buffer to hide latency of load by executing
25651 in parallel with previous instruction in case
25652 previous instruction is not needed to compute the address. */
25653 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25654 && !ix86_agi_dependent (dep_insn, insn))
25655 {
25656 if (cost >= 4)
25657 cost -= 4;
25658 else
25659 cost = 0;
25660 }
25661 break;
25662
25663 case PROCESSOR_SILVERMONT:
25664 case PROCESSOR_INTEL:
25665 if (!reload_completed)
25666 return cost;
25667
25668 /* Increase cost of integer loads. */
25669 memory = get_attr_memory (dep_insn);
25670 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25671 {
25672 enum attr_unit unit = get_attr_unit (dep_insn);
25673 if (unit == UNIT_INTEGER && cost == 1)
25674 {
25675 if (memory == MEMORY_LOAD)
25676 cost = 3;
25677 else
25678 {
25679 /* Increase cost of ld/st for short int types only
25680 because of store forwarding issue. */
25681 rtx set = single_set (dep_insn);
25682 if (set && (GET_MODE (SET_DEST (set)) == QImode
25683 || GET_MODE (SET_DEST (set)) == HImode))
25684 {
25685 /* Increase cost of store/load insn if exact
25686 dependence exists and it is load insn. */
25687 enum attr_memory insn_memory = get_attr_memory (insn);
25688 if (insn_memory == MEMORY_LOAD
25689 && exact_store_load_dependency (dep_insn, insn))
25690 cost = 3;
25691 }
25692 }
25693 }
25694 }
25695
25696 default:
25697 break;
25698 }
25699
25700 return cost;
25701 }
25702
25703 /* How many alternative schedules to try. This should be as wide as the
25704 scheduling freedom in the DFA, but no wider. Making this value too
25705 large results extra work for the scheduler. */
25706
25707 static int
25708 ia32_multipass_dfa_lookahead (void)
25709 {
25710 switch (ix86_tune)
25711 {
25712 case PROCESSOR_PENTIUM:
25713 return 2;
25714
25715 case PROCESSOR_PENTIUMPRO:
25716 case PROCESSOR_K6:
25717 return 1;
25718
25719 case PROCESSOR_BDVER1:
25720 case PROCESSOR_BDVER2:
25721 case PROCESSOR_BDVER3:
25722 case PROCESSOR_BDVER4:
25723 /* We use lookahead value 4 for BD both before and after reload
25724 schedules. Plan is to have value 8 included for O3. */
25725 return 4;
25726
25727 case PROCESSOR_CORE2:
25728 case PROCESSOR_NEHALEM:
25729 case PROCESSOR_SANDYBRIDGE:
25730 case PROCESSOR_HASWELL:
25731 case PROCESSOR_BONNELL:
25732 case PROCESSOR_SILVERMONT:
25733 case PROCESSOR_INTEL:
25734 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25735 as many instructions can be executed on a cycle, i.e.,
25736 issue_rate. I wonder why tuning for many CPUs does not do this. */
25737 if (reload_completed)
25738 return ix86_issue_rate ();
25739 /* Don't use lookahead for pre-reload schedule to save compile time. */
25740 return 0;
25741
25742 default:
25743 return 0;
25744 }
25745 }
25746
25747 /* Return true if target platform supports macro-fusion. */
25748
25749 static bool
25750 ix86_macro_fusion_p ()
25751 {
25752 return TARGET_FUSE_CMP_AND_BRANCH;
25753 }
25754
25755 /* Check whether current microarchitecture support macro fusion
25756 for insn pair "CONDGEN + CONDJMP". Refer to
25757 "Intel Architectures Optimization Reference Manual". */
25758
25759 static bool
25760 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25761 {
25762 rtx src, dest;
25763 rtx single_set = single_set (condgen);
25764 enum rtx_code ccode;
25765 rtx compare_set = NULL_RTX, test_if, cond;
25766 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25767
25768 if (get_attr_type (condgen) != TYPE_TEST
25769 && get_attr_type (condgen) != TYPE_ICMP
25770 && get_attr_type (condgen) != TYPE_INCDEC
25771 && get_attr_type (condgen) != TYPE_ALU)
25772 return false;
25773
25774 if (single_set == NULL_RTX
25775 && !TARGET_FUSE_ALU_AND_BRANCH)
25776 return false;
25777
25778 if (single_set != NULL_RTX)
25779 compare_set = single_set;
25780 else
25781 {
25782 int i;
25783 rtx pat = PATTERN (condgen);
25784 for (i = 0; i < XVECLEN (pat, 0); i++)
25785 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25786 {
25787 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25788 if (GET_CODE (set_src) == COMPARE)
25789 compare_set = XVECEXP (pat, 0, i);
25790 else
25791 alu_set = XVECEXP (pat, 0, i);
25792 }
25793 }
25794 if (compare_set == NULL_RTX)
25795 return false;
25796 src = SET_SRC (compare_set);
25797 if (GET_CODE (src) != COMPARE)
25798 return false;
25799
25800 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25801 supported. */
25802 if ((MEM_P (XEXP (src, 0))
25803 && CONST_INT_P (XEXP (src, 1)))
25804 || (MEM_P (XEXP (src, 1))
25805 && CONST_INT_P (XEXP (src, 0))))
25806 return false;
25807
25808 /* No fusion for RIP-relative address. */
25809 if (MEM_P (XEXP (src, 0)))
25810 addr = XEXP (XEXP (src, 0), 0);
25811 else if (MEM_P (XEXP (src, 1)))
25812 addr = XEXP (XEXP (src, 1), 0);
25813
25814 if (addr) {
25815 ix86_address parts;
25816 int ok = ix86_decompose_address (addr, &parts);
25817 gcc_assert (ok);
25818
25819 if (rip_relative_addr_p (&parts))
25820 return false;
25821 }
25822
25823 test_if = SET_SRC (pc_set (condjmp));
25824 cond = XEXP (test_if, 0);
25825 ccode = GET_CODE (cond);
25826 /* Check whether conditional jump use Sign or Overflow Flags. */
25827 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25828 && (ccode == GE
25829 || ccode == GT
25830 || ccode == LE
25831 || ccode == LT))
25832 return false;
25833
25834 /* Return true for TYPE_TEST and TYPE_ICMP. */
25835 if (get_attr_type (condgen) == TYPE_TEST
25836 || get_attr_type (condgen) == TYPE_ICMP)
25837 return true;
25838
25839 /* The following is the case that macro-fusion for alu + jmp. */
25840 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25841 return false;
25842
25843 /* No fusion for alu op with memory destination operand. */
25844 dest = SET_DEST (alu_set);
25845 if (MEM_P (dest))
25846 return false;
25847
25848 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25849 supported. */
25850 if (get_attr_type (condgen) == TYPE_INCDEC
25851 && (ccode == GEU
25852 || ccode == GTU
25853 || ccode == LEU
25854 || ccode == LTU))
25855 return false;
25856
25857 return true;
25858 }
25859
25860 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25861 execution. It is applied if
25862 (1) IMUL instruction is on the top of list;
25863 (2) There exists the only producer of independent IMUL instruction in
25864 ready list.
25865 Return index of IMUL producer if it was found and -1 otherwise. */
25866 static int
25867 do_reorder_for_imul (rtx *ready, int n_ready)
25868 {
25869 rtx insn, set, insn1, insn2;
25870 sd_iterator_def sd_it;
25871 dep_t dep;
25872 int index = -1;
25873 int i;
25874
25875 if (!TARGET_BONNELL)
25876 return index;
25877
25878 /* Check that IMUL instruction is on the top of ready list. */
25879 insn = ready[n_ready - 1];
25880 set = single_set (insn);
25881 if (!set)
25882 return index;
25883 if (!(GET_CODE (SET_SRC (set)) == MULT
25884 && GET_MODE (SET_SRC (set)) == SImode))
25885 return index;
25886
25887 /* Search for producer of independent IMUL instruction. */
25888 for (i = n_ready - 2; i >= 0; i--)
25889 {
25890 insn = ready[i];
25891 if (!NONDEBUG_INSN_P (insn))
25892 continue;
25893 /* Skip IMUL instruction. */
25894 insn2 = PATTERN (insn);
25895 if (GET_CODE (insn2) == PARALLEL)
25896 insn2 = XVECEXP (insn2, 0, 0);
25897 if (GET_CODE (insn2) == SET
25898 && GET_CODE (SET_SRC (insn2)) == MULT
25899 && GET_MODE (SET_SRC (insn2)) == SImode)
25900 continue;
25901
25902 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25903 {
25904 rtx con;
25905 con = DEP_CON (dep);
25906 if (!NONDEBUG_INSN_P (con))
25907 continue;
25908 insn1 = PATTERN (con);
25909 if (GET_CODE (insn1) == PARALLEL)
25910 insn1 = XVECEXP (insn1, 0, 0);
25911
25912 if (GET_CODE (insn1) == SET
25913 && GET_CODE (SET_SRC (insn1)) == MULT
25914 && GET_MODE (SET_SRC (insn1)) == SImode)
25915 {
25916 sd_iterator_def sd_it1;
25917 dep_t dep1;
25918 /* Check if there is no other dependee for IMUL. */
25919 index = i;
25920 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25921 {
25922 rtx pro;
25923 pro = DEP_PRO (dep1);
25924 if (!NONDEBUG_INSN_P (pro))
25925 continue;
25926 if (pro != insn)
25927 index = -1;
25928 }
25929 if (index >= 0)
25930 break;
25931 }
25932 }
25933 if (index >= 0)
25934 break;
25935 }
25936 return index;
25937 }
25938
25939 /* Try to find the best candidate on the top of ready list if two insns
25940 have the same priority - candidate is best if its dependees were
25941 scheduled earlier. Applied for Silvermont only.
25942 Return true if top 2 insns must be interchanged. */
25943 static bool
25944 swap_top_of_ready_list (rtx *ready, int n_ready)
25945 {
25946 rtx top = ready[n_ready - 1];
25947 rtx next = ready[n_ready - 2];
25948 rtx set;
25949 sd_iterator_def sd_it;
25950 dep_t dep;
25951 int clock1 = -1;
25952 int clock2 = -1;
25953 #define INSN_TICK(INSN) (HID (INSN)->tick)
25954
25955 if (!TARGET_SILVERMONT && !TARGET_INTEL)
25956 return false;
25957
25958 if (!NONDEBUG_INSN_P (top))
25959 return false;
25960 if (!NONJUMP_INSN_P (top))
25961 return false;
25962 if (!NONDEBUG_INSN_P (next))
25963 return false;
25964 if (!NONJUMP_INSN_P (next))
25965 return false;
25966 set = single_set (top);
25967 if (!set)
25968 return false;
25969 set = single_set (next);
25970 if (!set)
25971 return false;
25972
25973 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
25974 {
25975 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
25976 return false;
25977 /* Determine winner more precise. */
25978 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
25979 {
25980 rtx pro;
25981 pro = DEP_PRO (dep);
25982 if (!NONDEBUG_INSN_P (pro))
25983 continue;
25984 if (INSN_TICK (pro) > clock1)
25985 clock1 = INSN_TICK (pro);
25986 }
25987 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
25988 {
25989 rtx pro;
25990 pro = DEP_PRO (dep);
25991 if (!NONDEBUG_INSN_P (pro))
25992 continue;
25993 if (INSN_TICK (pro) > clock2)
25994 clock2 = INSN_TICK (pro);
25995 }
25996
25997 if (clock1 == clock2)
25998 {
25999 /* Determine winner - load must win. */
26000 enum attr_memory memory1, memory2;
26001 memory1 = get_attr_memory (top);
26002 memory2 = get_attr_memory (next);
26003 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26004 return true;
26005 }
26006 return (bool) (clock2 < clock1);
26007 }
26008 return false;
26009 #undef INSN_TICK
26010 }
26011
26012 /* Perform possible reodering of ready list for Atom/Silvermont only.
26013 Return issue rate. */
26014 static int
26015 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26016 int clock_var)
26017 {
26018 int issue_rate = -1;
26019 int n_ready = *pn_ready;
26020 int i;
26021 rtx insn;
26022 int index = -1;
26023
26024 /* Set up issue rate. */
26025 issue_rate = ix86_issue_rate ();
26026
26027 /* Do reodering for BONNELL/SILVERMONT only. */
26028 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26029 return issue_rate;
26030
26031 /* Nothing to do if ready list contains only 1 instruction. */
26032 if (n_ready <= 1)
26033 return issue_rate;
26034
26035 /* Do reodering for post-reload scheduler only. */
26036 if (!reload_completed)
26037 return issue_rate;
26038
26039 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26040 {
26041 if (sched_verbose > 1)
26042 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26043 INSN_UID (ready[index]));
26044
26045 /* Put IMUL producer (ready[index]) at the top of ready list. */
26046 insn = ready[index];
26047 for (i = index; i < n_ready - 1; i++)
26048 ready[i] = ready[i + 1];
26049 ready[n_ready - 1] = insn;
26050 return issue_rate;
26051 }
26052 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26053 {
26054 if (sched_verbose > 1)
26055 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26056 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26057 /* Swap 2 top elements of ready list. */
26058 insn = ready[n_ready - 1];
26059 ready[n_ready - 1] = ready[n_ready - 2];
26060 ready[n_ready - 2] = insn;
26061 }
26062 return issue_rate;
26063 }
26064
26065 static bool
26066 ix86_class_likely_spilled_p (reg_class_t);
26067
26068 /* Returns true if lhs of insn is HW function argument register and set up
26069 is_spilled to true if it is likely spilled HW register. */
26070 static bool
26071 insn_is_function_arg (rtx insn, bool* is_spilled)
26072 {
26073 rtx dst;
26074
26075 if (!NONDEBUG_INSN_P (insn))
26076 return false;
26077 /* Call instructions are not movable, ignore it. */
26078 if (CALL_P (insn))
26079 return false;
26080 insn = PATTERN (insn);
26081 if (GET_CODE (insn) == PARALLEL)
26082 insn = XVECEXP (insn, 0, 0);
26083 if (GET_CODE (insn) != SET)
26084 return false;
26085 dst = SET_DEST (insn);
26086 if (REG_P (dst) && HARD_REGISTER_P (dst)
26087 && ix86_function_arg_regno_p (REGNO (dst)))
26088 {
26089 /* Is it likely spilled HW register? */
26090 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26091 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26092 *is_spilled = true;
26093 return true;
26094 }
26095 return false;
26096 }
26097
26098 /* Add output dependencies for chain of function adjacent arguments if only
26099 there is a move to likely spilled HW register. Return first argument
26100 if at least one dependence was added or NULL otherwise. */
26101 static rtx
26102 add_parameter_dependencies (rtx call, rtx head)
26103 {
26104 rtx insn;
26105 rtx last = call;
26106 rtx first_arg = NULL;
26107 bool is_spilled = false;
26108
26109 head = PREV_INSN (head);
26110
26111 /* Find nearest to call argument passing instruction. */
26112 while (true)
26113 {
26114 last = PREV_INSN (last);
26115 if (last == head)
26116 return NULL;
26117 if (!NONDEBUG_INSN_P (last))
26118 continue;
26119 if (insn_is_function_arg (last, &is_spilled))
26120 break;
26121 return NULL;
26122 }
26123
26124 first_arg = last;
26125 while (true)
26126 {
26127 insn = PREV_INSN (last);
26128 if (!INSN_P (insn))
26129 break;
26130 if (insn == head)
26131 break;
26132 if (!NONDEBUG_INSN_P (insn))
26133 {
26134 last = insn;
26135 continue;
26136 }
26137 if (insn_is_function_arg (insn, &is_spilled))
26138 {
26139 /* Add output depdendence between two function arguments if chain
26140 of output arguments contains likely spilled HW registers. */
26141 if (is_spilled)
26142 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26143 first_arg = last = insn;
26144 }
26145 else
26146 break;
26147 }
26148 if (!is_spilled)
26149 return NULL;
26150 return first_arg;
26151 }
26152
26153 /* Add output or anti dependency from insn to first_arg to restrict its code
26154 motion. */
26155 static void
26156 avoid_func_arg_motion (rtx first_arg, rtx insn)
26157 {
26158 rtx set;
26159 rtx tmp;
26160
26161 set = single_set (insn);
26162 if (!set)
26163 return;
26164 tmp = SET_DEST (set);
26165 if (REG_P (tmp))
26166 {
26167 /* Add output dependency to the first function argument. */
26168 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26169 return;
26170 }
26171 /* Add anti dependency. */
26172 add_dependence (first_arg, insn, REG_DEP_ANTI);
26173 }
26174
26175 /* Avoid cross block motion of function argument through adding dependency
26176 from the first non-jump instruction in bb. */
26177 static void
26178 add_dependee_for_func_arg (rtx arg, basic_block bb)
26179 {
26180 rtx insn = BB_END (bb);
26181
26182 while (insn)
26183 {
26184 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26185 {
26186 rtx set = single_set (insn);
26187 if (set)
26188 {
26189 avoid_func_arg_motion (arg, insn);
26190 return;
26191 }
26192 }
26193 if (insn == BB_HEAD (bb))
26194 return;
26195 insn = PREV_INSN (insn);
26196 }
26197 }
26198
26199 /* Hook for pre-reload schedule - avoid motion of function arguments
26200 passed in likely spilled HW registers. */
26201 static void
26202 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26203 {
26204 rtx insn;
26205 rtx first_arg = NULL;
26206 if (reload_completed)
26207 return;
26208 while (head != tail && DEBUG_INSN_P (head))
26209 head = NEXT_INSN (head);
26210 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26211 if (INSN_P (insn) && CALL_P (insn))
26212 {
26213 first_arg = add_parameter_dependencies (insn, head);
26214 if (first_arg)
26215 {
26216 /* Add dependee for first argument to predecessors if only
26217 region contains more than one block. */
26218 basic_block bb = BLOCK_FOR_INSN (insn);
26219 int rgn = CONTAINING_RGN (bb->index);
26220 int nr_blks = RGN_NR_BLOCKS (rgn);
26221 /* Skip trivial regions and region head blocks that can have
26222 predecessors outside of region. */
26223 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26224 {
26225 edge e;
26226 edge_iterator ei;
26227 /* Assume that region is SCC, i.e. all immediate predecessors
26228 of non-head block are in the same region. */
26229 FOR_EACH_EDGE (e, ei, bb->preds)
26230 {
26231 /* Avoid creating of loop-carried dependencies through
26232 using topological odering in region. */
26233 if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26234 add_dependee_for_func_arg (first_arg, e->src);
26235 }
26236 }
26237 insn = first_arg;
26238 if (insn == head)
26239 break;
26240 }
26241 }
26242 else if (first_arg)
26243 avoid_func_arg_motion (first_arg, insn);
26244 }
26245
26246 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26247 HW registers to maximum, to schedule them at soon as possible. These are
26248 moves from function argument registers at the top of the function entry
26249 and moves from function return value registers after call. */
26250 static int
26251 ix86_adjust_priority (rtx insn, int priority)
26252 {
26253 rtx set;
26254
26255 if (reload_completed)
26256 return priority;
26257
26258 if (!NONDEBUG_INSN_P (insn))
26259 return priority;
26260
26261 set = single_set (insn);
26262 if (set)
26263 {
26264 rtx tmp = SET_SRC (set);
26265 if (REG_P (tmp)
26266 && HARD_REGISTER_P (tmp)
26267 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26268 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26269 return current_sched_info->sched_max_insns_priority;
26270 }
26271
26272 return priority;
26273 }
26274
26275 /* Model decoder of Core 2/i7.
26276 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26277 track the instruction fetch block boundaries and make sure that long
26278 (9+ bytes) instructions are assigned to D0. */
26279
26280 /* Maximum length of an insn that can be handled by
26281 a secondary decoder unit. '8' for Core 2/i7. */
26282 static int core2i7_secondary_decoder_max_insn_size;
26283
26284 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26285 '16' for Core 2/i7. */
26286 static int core2i7_ifetch_block_size;
26287
26288 /* Maximum number of instructions decoder can handle per cycle.
26289 '6' for Core 2/i7. */
26290 static int core2i7_ifetch_block_max_insns;
26291
26292 typedef struct ix86_first_cycle_multipass_data_ *
26293 ix86_first_cycle_multipass_data_t;
26294 typedef const struct ix86_first_cycle_multipass_data_ *
26295 const_ix86_first_cycle_multipass_data_t;
26296
26297 /* A variable to store target state across calls to max_issue within
26298 one cycle. */
26299 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26300 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26301
26302 /* Initialize DATA. */
26303 static void
26304 core2i7_first_cycle_multipass_init (void *_data)
26305 {
26306 ix86_first_cycle_multipass_data_t data
26307 = (ix86_first_cycle_multipass_data_t) _data;
26308
26309 data->ifetch_block_len = 0;
26310 data->ifetch_block_n_insns = 0;
26311 data->ready_try_change = NULL;
26312 data->ready_try_change_size = 0;
26313 }
26314
26315 /* Advancing the cycle; reset ifetch block counts. */
26316 static void
26317 core2i7_dfa_post_advance_cycle (void)
26318 {
26319 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26320
26321 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26322
26323 data->ifetch_block_len = 0;
26324 data->ifetch_block_n_insns = 0;
26325 }
26326
26327 static int min_insn_size (rtx);
26328
26329 /* Filter out insns from ready_try that the core will not be able to issue
26330 on current cycle due to decoder. */
26331 static void
26332 core2i7_first_cycle_multipass_filter_ready_try
26333 (const_ix86_first_cycle_multipass_data_t data,
26334 char *ready_try, int n_ready, bool first_cycle_insn_p)
26335 {
26336 while (n_ready--)
26337 {
26338 rtx insn;
26339 int insn_size;
26340
26341 if (ready_try[n_ready])
26342 continue;
26343
26344 insn = get_ready_element (n_ready);
26345 insn_size = min_insn_size (insn);
26346
26347 if (/* If this is a too long an insn for a secondary decoder ... */
26348 (!first_cycle_insn_p
26349 && insn_size > core2i7_secondary_decoder_max_insn_size)
26350 /* ... or it would not fit into the ifetch block ... */
26351 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26352 /* ... or the decoder is full already ... */
26353 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26354 /* ... mask the insn out. */
26355 {
26356 ready_try[n_ready] = 1;
26357
26358 if (data->ready_try_change)
26359 bitmap_set_bit (data->ready_try_change, n_ready);
26360 }
26361 }
26362 }
26363
26364 /* Prepare for a new round of multipass lookahead scheduling. */
26365 static void
26366 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
26367 bool first_cycle_insn_p)
26368 {
26369 ix86_first_cycle_multipass_data_t data
26370 = (ix86_first_cycle_multipass_data_t) _data;
26371 const_ix86_first_cycle_multipass_data_t prev_data
26372 = ix86_first_cycle_multipass_data;
26373
26374 /* Restore the state from the end of the previous round. */
26375 data->ifetch_block_len = prev_data->ifetch_block_len;
26376 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26377
26378 /* Filter instructions that cannot be issued on current cycle due to
26379 decoder restrictions. */
26380 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26381 first_cycle_insn_p);
26382 }
26383
26384 /* INSN is being issued in current solution. Account for its impact on
26385 the decoder model. */
26386 static void
26387 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
26388 rtx insn, const void *_prev_data)
26389 {
26390 ix86_first_cycle_multipass_data_t data
26391 = (ix86_first_cycle_multipass_data_t) _data;
26392 const_ix86_first_cycle_multipass_data_t prev_data
26393 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26394
26395 int insn_size = min_insn_size (insn);
26396
26397 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26398 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26399 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26400 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26401
26402 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26403 if (!data->ready_try_change)
26404 {
26405 data->ready_try_change = sbitmap_alloc (n_ready);
26406 data->ready_try_change_size = n_ready;
26407 }
26408 else if (data->ready_try_change_size < n_ready)
26409 {
26410 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26411 n_ready, 0);
26412 data->ready_try_change_size = n_ready;
26413 }
26414 bitmap_clear (data->ready_try_change);
26415
26416 /* Filter out insns from ready_try that the core will not be able to issue
26417 on current cycle due to decoder. */
26418 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26419 false);
26420 }
26421
26422 /* Revert the effect on ready_try. */
26423 static void
26424 core2i7_first_cycle_multipass_backtrack (const void *_data,
26425 char *ready_try,
26426 int n_ready ATTRIBUTE_UNUSED)
26427 {
26428 const_ix86_first_cycle_multipass_data_t data
26429 = (const_ix86_first_cycle_multipass_data_t) _data;
26430 unsigned int i = 0;
26431 sbitmap_iterator sbi;
26432
26433 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26434 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26435 {
26436 ready_try[i] = 0;
26437 }
26438 }
26439
26440 /* Save the result of multipass lookahead scheduling for the next round. */
26441 static void
26442 core2i7_first_cycle_multipass_end (const void *_data)
26443 {
26444 const_ix86_first_cycle_multipass_data_t data
26445 = (const_ix86_first_cycle_multipass_data_t) _data;
26446 ix86_first_cycle_multipass_data_t next_data
26447 = ix86_first_cycle_multipass_data;
26448
26449 if (data != NULL)
26450 {
26451 next_data->ifetch_block_len = data->ifetch_block_len;
26452 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26453 }
26454 }
26455
26456 /* Deallocate target data. */
26457 static void
26458 core2i7_first_cycle_multipass_fini (void *_data)
26459 {
26460 ix86_first_cycle_multipass_data_t data
26461 = (ix86_first_cycle_multipass_data_t) _data;
26462
26463 if (data->ready_try_change)
26464 {
26465 sbitmap_free (data->ready_try_change);
26466 data->ready_try_change = NULL;
26467 data->ready_try_change_size = 0;
26468 }
26469 }
26470
26471 /* Prepare for scheduling pass. */
26472 static void
26473 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26474 int verbose ATTRIBUTE_UNUSED,
26475 int max_uid ATTRIBUTE_UNUSED)
26476 {
26477 /* Install scheduling hooks for current CPU. Some of these hooks are used
26478 in time-critical parts of the scheduler, so we only set them up when
26479 they are actually used. */
26480 switch (ix86_tune)
26481 {
26482 case PROCESSOR_CORE2:
26483 case PROCESSOR_NEHALEM:
26484 case PROCESSOR_SANDYBRIDGE:
26485 case PROCESSOR_HASWELL:
26486 /* Do not perform multipass scheduling for pre-reload schedule
26487 to save compile time. */
26488 if (reload_completed)
26489 {
26490 targetm.sched.dfa_post_advance_cycle
26491 = core2i7_dfa_post_advance_cycle;
26492 targetm.sched.first_cycle_multipass_init
26493 = core2i7_first_cycle_multipass_init;
26494 targetm.sched.first_cycle_multipass_begin
26495 = core2i7_first_cycle_multipass_begin;
26496 targetm.sched.first_cycle_multipass_issue
26497 = core2i7_first_cycle_multipass_issue;
26498 targetm.sched.first_cycle_multipass_backtrack
26499 = core2i7_first_cycle_multipass_backtrack;
26500 targetm.sched.first_cycle_multipass_end
26501 = core2i7_first_cycle_multipass_end;
26502 targetm.sched.first_cycle_multipass_fini
26503 = core2i7_first_cycle_multipass_fini;
26504
26505 /* Set decoder parameters. */
26506 core2i7_secondary_decoder_max_insn_size = 8;
26507 core2i7_ifetch_block_size = 16;
26508 core2i7_ifetch_block_max_insns = 6;
26509 break;
26510 }
26511 /* ... Fall through ... */
26512 default:
26513 targetm.sched.dfa_post_advance_cycle = NULL;
26514 targetm.sched.first_cycle_multipass_init = NULL;
26515 targetm.sched.first_cycle_multipass_begin = NULL;
26516 targetm.sched.first_cycle_multipass_issue = NULL;
26517 targetm.sched.first_cycle_multipass_backtrack = NULL;
26518 targetm.sched.first_cycle_multipass_end = NULL;
26519 targetm.sched.first_cycle_multipass_fini = NULL;
26520 break;
26521 }
26522 }
26523
26524 \f
26525 /* Compute the alignment given to a constant that is being placed in memory.
26526 EXP is the constant and ALIGN is the alignment that the object would
26527 ordinarily have.
26528 The value of this function is used instead of that alignment to align
26529 the object. */
26530
26531 int
26532 ix86_constant_alignment (tree exp, int align)
26533 {
26534 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26535 || TREE_CODE (exp) == INTEGER_CST)
26536 {
26537 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26538 return 64;
26539 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26540 return 128;
26541 }
26542 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26543 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26544 return BITS_PER_WORD;
26545
26546 return align;
26547 }
26548
26549 /* Compute the alignment for a static variable.
26550 TYPE is the data type, and ALIGN is the alignment that
26551 the object would ordinarily have. The value of this function is used
26552 instead of that alignment to align the object. */
26553
26554 int
26555 ix86_data_alignment (tree type, int align, bool opt)
26556 {
26557 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26558 for symbols from other compilation units or symbols that don't need
26559 to bind locally. In order to preserve some ABI compatibility with
26560 those compilers, ensure we don't decrease alignment from what we
26561 used to assume. */
26562
26563 int max_align_compat
26564 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26565
26566 /* A data structure, equal or greater than the size of a cache line
26567 (64 bytes in the Pentium 4 and other recent Intel processors, including
26568 processors based on Intel Core microarchitecture) should be aligned
26569 so that its base address is a multiple of a cache line size. */
26570
26571 int max_align
26572 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26573
26574 if (max_align < BITS_PER_WORD)
26575 max_align = BITS_PER_WORD;
26576
26577 if (opt
26578 && AGGREGATE_TYPE_P (type)
26579 && TYPE_SIZE (type)
26580 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26581 {
26582 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align_compat
26583 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26584 && align < max_align_compat)
26585 align = max_align_compat;
26586 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
26587 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26588 && align < max_align)
26589 align = max_align;
26590 }
26591
26592 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26593 to 16byte boundary. */
26594 if (TARGET_64BIT)
26595 {
26596 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26597 && TYPE_SIZE (type)
26598 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26599 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
26600 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26601 return 128;
26602 }
26603
26604 if (!opt)
26605 return align;
26606
26607 if (TREE_CODE (type) == ARRAY_TYPE)
26608 {
26609 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26610 return 64;
26611 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26612 return 128;
26613 }
26614 else if (TREE_CODE (type) == COMPLEX_TYPE)
26615 {
26616
26617 if (TYPE_MODE (type) == DCmode && align < 64)
26618 return 64;
26619 if ((TYPE_MODE (type) == XCmode
26620 || TYPE_MODE (type) == TCmode) && align < 128)
26621 return 128;
26622 }
26623 else if ((TREE_CODE (type) == RECORD_TYPE
26624 || TREE_CODE (type) == UNION_TYPE
26625 || TREE_CODE (type) == QUAL_UNION_TYPE)
26626 && TYPE_FIELDS (type))
26627 {
26628 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26629 return 64;
26630 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26631 return 128;
26632 }
26633 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26634 || TREE_CODE (type) == INTEGER_TYPE)
26635 {
26636 if (TYPE_MODE (type) == DFmode && align < 64)
26637 return 64;
26638 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26639 return 128;
26640 }
26641
26642 return align;
26643 }
26644
26645 /* Compute the alignment for a local variable or a stack slot. EXP is
26646 the data type or decl itself, MODE is the widest mode available and
26647 ALIGN is the alignment that the object would ordinarily have. The
26648 value of this macro is used instead of that alignment to align the
26649 object. */
26650
26651 unsigned int
26652 ix86_local_alignment (tree exp, enum machine_mode mode,
26653 unsigned int align)
26654 {
26655 tree type, decl;
26656
26657 if (exp && DECL_P (exp))
26658 {
26659 type = TREE_TYPE (exp);
26660 decl = exp;
26661 }
26662 else
26663 {
26664 type = exp;
26665 decl = NULL;
26666 }
26667
26668 /* Don't do dynamic stack realignment for long long objects with
26669 -mpreferred-stack-boundary=2. */
26670 if (!TARGET_64BIT
26671 && align == 64
26672 && ix86_preferred_stack_boundary < 64
26673 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26674 && (!type || !TYPE_USER_ALIGN (type))
26675 && (!decl || !DECL_USER_ALIGN (decl)))
26676 align = 32;
26677
26678 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26679 register in MODE. We will return the largest alignment of XF
26680 and DF. */
26681 if (!type)
26682 {
26683 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26684 align = GET_MODE_ALIGNMENT (DFmode);
26685 return align;
26686 }
26687
26688 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26689 to 16byte boundary. Exact wording is:
26690
26691 An array uses the same alignment as its elements, except that a local or
26692 global array variable of length at least 16 bytes or
26693 a C99 variable-length array variable always has alignment of at least 16 bytes.
26694
26695 This was added to allow use of aligned SSE instructions at arrays. This
26696 rule is meant for static storage (where compiler can not do the analysis
26697 by itself). We follow it for automatic variables only when convenient.
26698 We fully control everything in the function compiled and functions from
26699 other unit can not rely on the alignment.
26700
26701 Exclude va_list type. It is the common case of local array where
26702 we can not benefit from the alignment.
26703
26704 TODO: Probably one should optimize for size only when var is not escaping. */
26705 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26706 && TARGET_SSE)
26707 {
26708 if (AGGREGATE_TYPE_P (type)
26709 && (va_list_type_node == NULL_TREE
26710 || (TYPE_MAIN_VARIANT (type)
26711 != TYPE_MAIN_VARIANT (va_list_type_node)))
26712 && TYPE_SIZE (type)
26713 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26714 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
26715 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26716 return 128;
26717 }
26718 if (TREE_CODE (type) == ARRAY_TYPE)
26719 {
26720 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26721 return 64;
26722 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26723 return 128;
26724 }
26725 else if (TREE_CODE (type) == COMPLEX_TYPE)
26726 {
26727 if (TYPE_MODE (type) == DCmode && align < 64)
26728 return 64;
26729 if ((TYPE_MODE (type) == XCmode
26730 || TYPE_MODE (type) == TCmode) && align < 128)
26731 return 128;
26732 }
26733 else if ((TREE_CODE (type) == RECORD_TYPE
26734 || TREE_CODE (type) == UNION_TYPE
26735 || TREE_CODE (type) == QUAL_UNION_TYPE)
26736 && TYPE_FIELDS (type))
26737 {
26738 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26739 return 64;
26740 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26741 return 128;
26742 }
26743 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26744 || TREE_CODE (type) == INTEGER_TYPE)
26745 {
26746
26747 if (TYPE_MODE (type) == DFmode && align < 64)
26748 return 64;
26749 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26750 return 128;
26751 }
26752 return align;
26753 }
26754
26755 /* Compute the minimum required alignment for dynamic stack realignment
26756 purposes for a local variable, parameter or a stack slot. EXP is
26757 the data type or decl itself, MODE is its mode and ALIGN is the
26758 alignment that the object would ordinarily have. */
26759
26760 unsigned int
26761 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26762 unsigned int align)
26763 {
26764 tree type, decl;
26765
26766 if (exp && DECL_P (exp))
26767 {
26768 type = TREE_TYPE (exp);
26769 decl = exp;
26770 }
26771 else
26772 {
26773 type = exp;
26774 decl = NULL;
26775 }
26776
26777 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26778 return align;
26779
26780 /* Don't do dynamic stack realignment for long long objects with
26781 -mpreferred-stack-boundary=2. */
26782 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26783 && (!type || !TYPE_USER_ALIGN (type))
26784 && (!decl || !DECL_USER_ALIGN (decl)))
26785 return 32;
26786
26787 return align;
26788 }
26789 \f
26790 /* Find a location for the static chain incoming to a nested function.
26791 This is a register, unless all free registers are used by arguments. */
26792
26793 static rtx
26794 ix86_static_chain (const_tree fndecl, bool incoming_p)
26795 {
26796 unsigned regno;
26797
26798 if (!DECL_STATIC_CHAIN (fndecl))
26799 return NULL;
26800
26801 if (TARGET_64BIT)
26802 {
26803 /* We always use R10 in 64-bit mode. */
26804 regno = R10_REG;
26805 }
26806 else
26807 {
26808 tree fntype;
26809 unsigned int ccvt;
26810
26811 /* By default in 32-bit mode we use ECX to pass the static chain. */
26812 regno = CX_REG;
26813
26814 fntype = TREE_TYPE (fndecl);
26815 ccvt = ix86_get_callcvt (fntype);
26816 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26817 {
26818 /* Fastcall functions use ecx/edx for arguments, which leaves
26819 us with EAX for the static chain.
26820 Thiscall functions use ecx for arguments, which also
26821 leaves us with EAX for the static chain. */
26822 regno = AX_REG;
26823 }
26824 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26825 {
26826 /* Thiscall functions use ecx for arguments, which leaves
26827 us with EAX and EDX for the static chain.
26828 We are using for abi-compatibility EAX. */
26829 regno = AX_REG;
26830 }
26831 else if (ix86_function_regparm (fntype, fndecl) == 3)
26832 {
26833 /* For regparm 3, we have no free call-clobbered registers in
26834 which to store the static chain. In order to implement this,
26835 we have the trampoline push the static chain to the stack.
26836 However, we can't push a value below the return address when
26837 we call the nested function directly, so we have to use an
26838 alternate entry point. For this we use ESI, and have the
26839 alternate entry point push ESI, so that things appear the
26840 same once we're executing the nested function. */
26841 if (incoming_p)
26842 {
26843 if (fndecl == current_function_decl)
26844 ix86_static_chain_on_stack = true;
26845 return gen_frame_mem (SImode,
26846 plus_constant (Pmode,
26847 arg_pointer_rtx, -8));
26848 }
26849 regno = SI_REG;
26850 }
26851 }
26852
26853 return gen_rtx_REG (Pmode, regno);
26854 }
26855
26856 /* Emit RTL insns to initialize the variable parts of a trampoline.
26857 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26858 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26859 to be passed to the target function. */
26860
26861 static void
26862 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26863 {
26864 rtx mem, fnaddr;
26865 int opcode;
26866 int offset = 0;
26867
26868 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26869
26870 if (TARGET_64BIT)
26871 {
26872 int size;
26873
26874 /* Load the function address to r11. Try to load address using
26875 the shorter movl instead of movabs. We may want to support
26876 movq for kernel mode, but kernel does not use trampolines at
26877 the moment. FNADDR is a 32bit address and may not be in
26878 DImode when ptr_mode == SImode. Always use movl in this
26879 case. */
26880 if (ptr_mode == SImode
26881 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26882 {
26883 fnaddr = copy_addr_to_reg (fnaddr);
26884
26885 mem = adjust_address (m_tramp, HImode, offset);
26886 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26887
26888 mem = adjust_address (m_tramp, SImode, offset + 2);
26889 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26890 offset += 6;
26891 }
26892 else
26893 {
26894 mem = adjust_address (m_tramp, HImode, offset);
26895 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26896
26897 mem = adjust_address (m_tramp, DImode, offset + 2);
26898 emit_move_insn (mem, fnaddr);
26899 offset += 10;
26900 }
26901
26902 /* Load static chain using movabs to r10. Use the shorter movl
26903 instead of movabs when ptr_mode == SImode. */
26904 if (ptr_mode == SImode)
26905 {
26906 opcode = 0xba41;
26907 size = 6;
26908 }
26909 else
26910 {
26911 opcode = 0xba49;
26912 size = 10;
26913 }
26914
26915 mem = adjust_address (m_tramp, HImode, offset);
26916 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26917
26918 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26919 emit_move_insn (mem, chain_value);
26920 offset += size;
26921
26922 /* Jump to r11; the last (unused) byte is a nop, only there to
26923 pad the write out to a single 32-bit store. */
26924 mem = adjust_address (m_tramp, SImode, offset);
26925 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26926 offset += 4;
26927 }
26928 else
26929 {
26930 rtx disp, chain;
26931
26932 /* Depending on the static chain location, either load a register
26933 with a constant, or push the constant to the stack. All of the
26934 instructions are the same size. */
26935 chain = ix86_static_chain (fndecl, true);
26936 if (REG_P (chain))
26937 {
26938 switch (REGNO (chain))
26939 {
26940 case AX_REG:
26941 opcode = 0xb8; break;
26942 case CX_REG:
26943 opcode = 0xb9; break;
26944 default:
26945 gcc_unreachable ();
26946 }
26947 }
26948 else
26949 opcode = 0x68;
26950
26951 mem = adjust_address (m_tramp, QImode, offset);
26952 emit_move_insn (mem, gen_int_mode (opcode, QImode));
26953
26954 mem = adjust_address (m_tramp, SImode, offset + 1);
26955 emit_move_insn (mem, chain_value);
26956 offset += 5;
26957
26958 mem = adjust_address (m_tramp, QImode, offset);
26959 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
26960
26961 mem = adjust_address (m_tramp, SImode, offset + 1);
26962
26963 /* Compute offset from the end of the jmp to the target function.
26964 In the case in which the trampoline stores the static chain on
26965 the stack, we need to skip the first insn which pushes the
26966 (call-saved) register static chain; this push is 1 byte. */
26967 offset += 5;
26968 disp = expand_binop (SImode, sub_optab, fnaddr,
26969 plus_constant (Pmode, XEXP (m_tramp, 0),
26970 offset - (MEM_P (chain) ? 1 : 0)),
26971 NULL_RTX, 1, OPTAB_DIRECT);
26972 emit_move_insn (mem, disp);
26973 }
26974
26975 gcc_assert (offset <= TRAMPOLINE_SIZE);
26976
26977 #ifdef HAVE_ENABLE_EXECUTE_STACK
26978 #ifdef CHECK_EXECUTE_STACK_ENABLED
26979 if (CHECK_EXECUTE_STACK_ENABLED)
26980 #endif
26981 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
26982 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
26983 #endif
26984 }
26985 \f
26986 /* The following file contains several enumerations and data structures
26987 built from the definitions in i386-builtin-types.def. */
26988
26989 #include "i386-builtin-types.inc"
26990
26991 /* Table for the ix86 builtin non-function types. */
26992 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
26993
26994 /* Retrieve an element from the above table, building some of
26995 the types lazily. */
26996
26997 static tree
26998 ix86_get_builtin_type (enum ix86_builtin_type tcode)
26999 {
27000 unsigned int index;
27001 tree type, itype;
27002
27003 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27004
27005 type = ix86_builtin_type_tab[(int) tcode];
27006 if (type != NULL)
27007 return type;
27008
27009 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27010 if (tcode <= IX86_BT_LAST_VECT)
27011 {
27012 enum machine_mode mode;
27013
27014 index = tcode - IX86_BT_LAST_PRIM - 1;
27015 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27016 mode = ix86_builtin_type_vect_mode[index];
27017
27018 type = build_vector_type_for_mode (itype, mode);
27019 }
27020 else
27021 {
27022 int quals;
27023
27024 index = tcode - IX86_BT_LAST_VECT - 1;
27025 if (tcode <= IX86_BT_LAST_PTR)
27026 quals = TYPE_UNQUALIFIED;
27027 else
27028 quals = TYPE_QUAL_CONST;
27029
27030 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27031 if (quals != TYPE_UNQUALIFIED)
27032 itype = build_qualified_type (itype, quals);
27033
27034 type = build_pointer_type (itype);
27035 }
27036
27037 ix86_builtin_type_tab[(int) tcode] = type;
27038 return type;
27039 }
27040
27041 /* Table for the ix86 builtin function types. */
27042 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27043
27044 /* Retrieve an element from the above table, building some of
27045 the types lazily. */
27046
27047 static tree
27048 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27049 {
27050 tree type;
27051
27052 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27053
27054 type = ix86_builtin_func_type_tab[(int) tcode];
27055 if (type != NULL)
27056 return type;
27057
27058 if (tcode <= IX86_BT_LAST_FUNC)
27059 {
27060 unsigned start = ix86_builtin_func_start[(int) tcode];
27061 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27062 tree rtype, atype, args = void_list_node;
27063 unsigned i;
27064
27065 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27066 for (i = after - 1; i > start; --i)
27067 {
27068 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27069 args = tree_cons (NULL, atype, args);
27070 }
27071
27072 type = build_function_type (rtype, args);
27073 }
27074 else
27075 {
27076 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27077 enum ix86_builtin_func_type icode;
27078
27079 icode = ix86_builtin_func_alias_base[index];
27080 type = ix86_get_builtin_func_type (icode);
27081 }
27082
27083 ix86_builtin_func_type_tab[(int) tcode] = type;
27084 return type;
27085 }
27086
27087
27088 /* Codes for all the SSE/MMX builtins. */
27089 enum ix86_builtins
27090 {
27091 IX86_BUILTIN_ADDPS,
27092 IX86_BUILTIN_ADDSS,
27093 IX86_BUILTIN_DIVPS,
27094 IX86_BUILTIN_DIVSS,
27095 IX86_BUILTIN_MULPS,
27096 IX86_BUILTIN_MULSS,
27097 IX86_BUILTIN_SUBPS,
27098 IX86_BUILTIN_SUBSS,
27099
27100 IX86_BUILTIN_CMPEQPS,
27101 IX86_BUILTIN_CMPLTPS,
27102 IX86_BUILTIN_CMPLEPS,
27103 IX86_BUILTIN_CMPGTPS,
27104 IX86_BUILTIN_CMPGEPS,
27105 IX86_BUILTIN_CMPNEQPS,
27106 IX86_BUILTIN_CMPNLTPS,
27107 IX86_BUILTIN_CMPNLEPS,
27108 IX86_BUILTIN_CMPNGTPS,
27109 IX86_BUILTIN_CMPNGEPS,
27110 IX86_BUILTIN_CMPORDPS,
27111 IX86_BUILTIN_CMPUNORDPS,
27112 IX86_BUILTIN_CMPEQSS,
27113 IX86_BUILTIN_CMPLTSS,
27114 IX86_BUILTIN_CMPLESS,
27115 IX86_BUILTIN_CMPNEQSS,
27116 IX86_BUILTIN_CMPNLTSS,
27117 IX86_BUILTIN_CMPNLESS,
27118 IX86_BUILTIN_CMPORDSS,
27119 IX86_BUILTIN_CMPUNORDSS,
27120
27121 IX86_BUILTIN_COMIEQSS,
27122 IX86_BUILTIN_COMILTSS,
27123 IX86_BUILTIN_COMILESS,
27124 IX86_BUILTIN_COMIGTSS,
27125 IX86_BUILTIN_COMIGESS,
27126 IX86_BUILTIN_COMINEQSS,
27127 IX86_BUILTIN_UCOMIEQSS,
27128 IX86_BUILTIN_UCOMILTSS,
27129 IX86_BUILTIN_UCOMILESS,
27130 IX86_BUILTIN_UCOMIGTSS,
27131 IX86_BUILTIN_UCOMIGESS,
27132 IX86_BUILTIN_UCOMINEQSS,
27133
27134 IX86_BUILTIN_CVTPI2PS,
27135 IX86_BUILTIN_CVTPS2PI,
27136 IX86_BUILTIN_CVTSI2SS,
27137 IX86_BUILTIN_CVTSI642SS,
27138 IX86_BUILTIN_CVTSS2SI,
27139 IX86_BUILTIN_CVTSS2SI64,
27140 IX86_BUILTIN_CVTTPS2PI,
27141 IX86_BUILTIN_CVTTSS2SI,
27142 IX86_BUILTIN_CVTTSS2SI64,
27143
27144 IX86_BUILTIN_MAXPS,
27145 IX86_BUILTIN_MAXSS,
27146 IX86_BUILTIN_MINPS,
27147 IX86_BUILTIN_MINSS,
27148
27149 IX86_BUILTIN_LOADUPS,
27150 IX86_BUILTIN_STOREUPS,
27151 IX86_BUILTIN_MOVSS,
27152
27153 IX86_BUILTIN_MOVHLPS,
27154 IX86_BUILTIN_MOVLHPS,
27155 IX86_BUILTIN_LOADHPS,
27156 IX86_BUILTIN_LOADLPS,
27157 IX86_BUILTIN_STOREHPS,
27158 IX86_BUILTIN_STORELPS,
27159
27160 IX86_BUILTIN_MASKMOVQ,
27161 IX86_BUILTIN_MOVMSKPS,
27162 IX86_BUILTIN_PMOVMSKB,
27163
27164 IX86_BUILTIN_MOVNTPS,
27165 IX86_BUILTIN_MOVNTQ,
27166
27167 IX86_BUILTIN_LOADDQU,
27168 IX86_BUILTIN_STOREDQU,
27169
27170 IX86_BUILTIN_PACKSSWB,
27171 IX86_BUILTIN_PACKSSDW,
27172 IX86_BUILTIN_PACKUSWB,
27173
27174 IX86_BUILTIN_PADDB,
27175 IX86_BUILTIN_PADDW,
27176 IX86_BUILTIN_PADDD,
27177 IX86_BUILTIN_PADDQ,
27178 IX86_BUILTIN_PADDSB,
27179 IX86_BUILTIN_PADDSW,
27180 IX86_BUILTIN_PADDUSB,
27181 IX86_BUILTIN_PADDUSW,
27182 IX86_BUILTIN_PSUBB,
27183 IX86_BUILTIN_PSUBW,
27184 IX86_BUILTIN_PSUBD,
27185 IX86_BUILTIN_PSUBQ,
27186 IX86_BUILTIN_PSUBSB,
27187 IX86_BUILTIN_PSUBSW,
27188 IX86_BUILTIN_PSUBUSB,
27189 IX86_BUILTIN_PSUBUSW,
27190
27191 IX86_BUILTIN_PAND,
27192 IX86_BUILTIN_PANDN,
27193 IX86_BUILTIN_POR,
27194 IX86_BUILTIN_PXOR,
27195
27196 IX86_BUILTIN_PAVGB,
27197 IX86_BUILTIN_PAVGW,
27198
27199 IX86_BUILTIN_PCMPEQB,
27200 IX86_BUILTIN_PCMPEQW,
27201 IX86_BUILTIN_PCMPEQD,
27202 IX86_BUILTIN_PCMPGTB,
27203 IX86_BUILTIN_PCMPGTW,
27204 IX86_BUILTIN_PCMPGTD,
27205
27206 IX86_BUILTIN_PMADDWD,
27207
27208 IX86_BUILTIN_PMAXSW,
27209 IX86_BUILTIN_PMAXUB,
27210 IX86_BUILTIN_PMINSW,
27211 IX86_BUILTIN_PMINUB,
27212
27213 IX86_BUILTIN_PMULHUW,
27214 IX86_BUILTIN_PMULHW,
27215 IX86_BUILTIN_PMULLW,
27216
27217 IX86_BUILTIN_PSADBW,
27218 IX86_BUILTIN_PSHUFW,
27219
27220 IX86_BUILTIN_PSLLW,
27221 IX86_BUILTIN_PSLLD,
27222 IX86_BUILTIN_PSLLQ,
27223 IX86_BUILTIN_PSRAW,
27224 IX86_BUILTIN_PSRAD,
27225 IX86_BUILTIN_PSRLW,
27226 IX86_BUILTIN_PSRLD,
27227 IX86_BUILTIN_PSRLQ,
27228 IX86_BUILTIN_PSLLWI,
27229 IX86_BUILTIN_PSLLDI,
27230 IX86_BUILTIN_PSLLQI,
27231 IX86_BUILTIN_PSRAWI,
27232 IX86_BUILTIN_PSRADI,
27233 IX86_BUILTIN_PSRLWI,
27234 IX86_BUILTIN_PSRLDI,
27235 IX86_BUILTIN_PSRLQI,
27236
27237 IX86_BUILTIN_PUNPCKHBW,
27238 IX86_BUILTIN_PUNPCKHWD,
27239 IX86_BUILTIN_PUNPCKHDQ,
27240 IX86_BUILTIN_PUNPCKLBW,
27241 IX86_BUILTIN_PUNPCKLWD,
27242 IX86_BUILTIN_PUNPCKLDQ,
27243
27244 IX86_BUILTIN_SHUFPS,
27245
27246 IX86_BUILTIN_RCPPS,
27247 IX86_BUILTIN_RCPSS,
27248 IX86_BUILTIN_RSQRTPS,
27249 IX86_BUILTIN_RSQRTPS_NR,
27250 IX86_BUILTIN_RSQRTSS,
27251 IX86_BUILTIN_RSQRTF,
27252 IX86_BUILTIN_SQRTPS,
27253 IX86_BUILTIN_SQRTPS_NR,
27254 IX86_BUILTIN_SQRTSS,
27255
27256 IX86_BUILTIN_UNPCKHPS,
27257 IX86_BUILTIN_UNPCKLPS,
27258
27259 IX86_BUILTIN_ANDPS,
27260 IX86_BUILTIN_ANDNPS,
27261 IX86_BUILTIN_ORPS,
27262 IX86_BUILTIN_XORPS,
27263
27264 IX86_BUILTIN_EMMS,
27265 IX86_BUILTIN_LDMXCSR,
27266 IX86_BUILTIN_STMXCSR,
27267 IX86_BUILTIN_SFENCE,
27268
27269 IX86_BUILTIN_FXSAVE,
27270 IX86_BUILTIN_FXRSTOR,
27271 IX86_BUILTIN_FXSAVE64,
27272 IX86_BUILTIN_FXRSTOR64,
27273
27274 IX86_BUILTIN_XSAVE,
27275 IX86_BUILTIN_XRSTOR,
27276 IX86_BUILTIN_XSAVE64,
27277 IX86_BUILTIN_XRSTOR64,
27278
27279 IX86_BUILTIN_XSAVEOPT,
27280 IX86_BUILTIN_XSAVEOPT64,
27281
27282 /* 3DNow! Original */
27283 IX86_BUILTIN_FEMMS,
27284 IX86_BUILTIN_PAVGUSB,
27285 IX86_BUILTIN_PF2ID,
27286 IX86_BUILTIN_PFACC,
27287 IX86_BUILTIN_PFADD,
27288 IX86_BUILTIN_PFCMPEQ,
27289 IX86_BUILTIN_PFCMPGE,
27290 IX86_BUILTIN_PFCMPGT,
27291 IX86_BUILTIN_PFMAX,
27292 IX86_BUILTIN_PFMIN,
27293 IX86_BUILTIN_PFMUL,
27294 IX86_BUILTIN_PFRCP,
27295 IX86_BUILTIN_PFRCPIT1,
27296 IX86_BUILTIN_PFRCPIT2,
27297 IX86_BUILTIN_PFRSQIT1,
27298 IX86_BUILTIN_PFRSQRT,
27299 IX86_BUILTIN_PFSUB,
27300 IX86_BUILTIN_PFSUBR,
27301 IX86_BUILTIN_PI2FD,
27302 IX86_BUILTIN_PMULHRW,
27303
27304 /* 3DNow! Athlon Extensions */
27305 IX86_BUILTIN_PF2IW,
27306 IX86_BUILTIN_PFNACC,
27307 IX86_BUILTIN_PFPNACC,
27308 IX86_BUILTIN_PI2FW,
27309 IX86_BUILTIN_PSWAPDSI,
27310 IX86_BUILTIN_PSWAPDSF,
27311
27312 /* SSE2 */
27313 IX86_BUILTIN_ADDPD,
27314 IX86_BUILTIN_ADDSD,
27315 IX86_BUILTIN_DIVPD,
27316 IX86_BUILTIN_DIVSD,
27317 IX86_BUILTIN_MULPD,
27318 IX86_BUILTIN_MULSD,
27319 IX86_BUILTIN_SUBPD,
27320 IX86_BUILTIN_SUBSD,
27321
27322 IX86_BUILTIN_CMPEQPD,
27323 IX86_BUILTIN_CMPLTPD,
27324 IX86_BUILTIN_CMPLEPD,
27325 IX86_BUILTIN_CMPGTPD,
27326 IX86_BUILTIN_CMPGEPD,
27327 IX86_BUILTIN_CMPNEQPD,
27328 IX86_BUILTIN_CMPNLTPD,
27329 IX86_BUILTIN_CMPNLEPD,
27330 IX86_BUILTIN_CMPNGTPD,
27331 IX86_BUILTIN_CMPNGEPD,
27332 IX86_BUILTIN_CMPORDPD,
27333 IX86_BUILTIN_CMPUNORDPD,
27334 IX86_BUILTIN_CMPEQSD,
27335 IX86_BUILTIN_CMPLTSD,
27336 IX86_BUILTIN_CMPLESD,
27337 IX86_BUILTIN_CMPNEQSD,
27338 IX86_BUILTIN_CMPNLTSD,
27339 IX86_BUILTIN_CMPNLESD,
27340 IX86_BUILTIN_CMPORDSD,
27341 IX86_BUILTIN_CMPUNORDSD,
27342
27343 IX86_BUILTIN_COMIEQSD,
27344 IX86_BUILTIN_COMILTSD,
27345 IX86_BUILTIN_COMILESD,
27346 IX86_BUILTIN_COMIGTSD,
27347 IX86_BUILTIN_COMIGESD,
27348 IX86_BUILTIN_COMINEQSD,
27349 IX86_BUILTIN_UCOMIEQSD,
27350 IX86_BUILTIN_UCOMILTSD,
27351 IX86_BUILTIN_UCOMILESD,
27352 IX86_BUILTIN_UCOMIGTSD,
27353 IX86_BUILTIN_UCOMIGESD,
27354 IX86_BUILTIN_UCOMINEQSD,
27355
27356 IX86_BUILTIN_MAXPD,
27357 IX86_BUILTIN_MAXSD,
27358 IX86_BUILTIN_MINPD,
27359 IX86_BUILTIN_MINSD,
27360
27361 IX86_BUILTIN_ANDPD,
27362 IX86_BUILTIN_ANDNPD,
27363 IX86_BUILTIN_ORPD,
27364 IX86_BUILTIN_XORPD,
27365
27366 IX86_BUILTIN_SQRTPD,
27367 IX86_BUILTIN_SQRTSD,
27368
27369 IX86_BUILTIN_UNPCKHPD,
27370 IX86_BUILTIN_UNPCKLPD,
27371
27372 IX86_BUILTIN_SHUFPD,
27373
27374 IX86_BUILTIN_LOADUPD,
27375 IX86_BUILTIN_STOREUPD,
27376 IX86_BUILTIN_MOVSD,
27377
27378 IX86_BUILTIN_LOADHPD,
27379 IX86_BUILTIN_LOADLPD,
27380
27381 IX86_BUILTIN_CVTDQ2PD,
27382 IX86_BUILTIN_CVTDQ2PS,
27383
27384 IX86_BUILTIN_CVTPD2DQ,
27385 IX86_BUILTIN_CVTPD2PI,
27386 IX86_BUILTIN_CVTPD2PS,
27387 IX86_BUILTIN_CVTTPD2DQ,
27388 IX86_BUILTIN_CVTTPD2PI,
27389
27390 IX86_BUILTIN_CVTPI2PD,
27391 IX86_BUILTIN_CVTSI2SD,
27392 IX86_BUILTIN_CVTSI642SD,
27393
27394 IX86_BUILTIN_CVTSD2SI,
27395 IX86_BUILTIN_CVTSD2SI64,
27396 IX86_BUILTIN_CVTSD2SS,
27397 IX86_BUILTIN_CVTSS2SD,
27398 IX86_BUILTIN_CVTTSD2SI,
27399 IX86_BUILTIN_CVTTSD2SI64,
27400
27401 IX86_BUILTIN_CVTPS2DQ,
27402 IX86_BUILTIN_CVTPS2PD,
27403 IX86_BUILTIN_CVTTPS2DQ,
27404
27405 IX86_BUILTIN_MOVNTI,
27406 IX86_BUILTIN_MOVNTI64,
27407 IX86_BUILTIN_MOVNTPD,
27408 IX86_BUILTIN_MOVNTDQ,
27409
27410 IX86_BUILTIN_MOVQ128,
27411
27412 /* SSE2 MMX */
27413 IX86_BUILTIN_MASKMOVDQU,
27414 IX86_BUILTIN_MOVMSKPD,
27415 IX86_BUILTIN_PMOVMSKB128,
27416
27417 IX86_BUILTIN_PACKSSWB128,
27418 IX86_BUILTIN_PACKSSDW128,
27419 IX86_BUILTIN_PACKUSWB128,
27420
27421 IX86_BUILTIN_PADDB128,
27422 IX86_BUILTIN_PADDW128,
27423 IX86_BUILTIN_PADDD128,
27424 IX86_BUILTIN_PADDQ128,
27425 IX86_BUILTIN_PADDSB128,
27426 IX86_BUILTIN_PADDSW128,
27427 IX86_BUILTIN_PADDUSB128,
27428 IX86_BUILTIN_PADDUSW128,
27429 IX86_BUILTIN_PSUBB128,
27430 IX86_BUILTIN_PSUBW128,
27431 IX86_BUILTIN_PSUBD128,
27432 IX86_BUILTIN_PSUBQ128,
27433 IX86_BUILTIN_PSUBSB128,
27434 IX86_BUILTIN_PSUBSW128,
27435 IX86_BUILTIN_PSUBUSB128,
27436 IX86_BUILTIN_PSUBUSW128,
27437
27438 IX86_BUILTIN_PAND128,
27439 IX86_BUILTIN_PANDN128,
27440 IX86_BUILTIN_POR128,
27441 IX86_BUILTIN_PXOR128,
27442
27443 IX86_BUILTIN_PAVGB128,
27444 IX86_BUILTIN_PAVGW128,
27445
27446 IX86_BUILTIN_PCMPEQB128,
27447 IX86_BUILTIN_PCMPEQW128,
27448 IX86_BUILTIN_PCMPEQD128,
27449 IX86_BUILTIN_PCMPGTB128,
27450 IX86_BUILTIN_PCMPGTW128,
27451 IX86_BUILTIN_PCMPGTD128,
27452
27453 IX86_BUILTIN_PMADDWD128,
27454
27455 IX86_BUILTIN_PMAXSW128,
27456 IX86_BUILTIN_PMAXUB128,
27457 IX86_BUILTIN_PMINSW128,
27458 IX86_BUILTIN_PMINUB128,
27459
27460 IX86_BUILTIN_PMULUDQ,
27461 IX86_BUILTIN_PMULUDQ128,
27462 IX86_BUILTIN_PMULHUW128,
27463 IX86_BUILTIN_PMULHW128,
27464 IX86_BUILTIN_PMULLW128,
27465
27466 IX86_BUILTIN_PSADBW128,
27467 IX86_BUILTIN_PSHUFHW,
27468 IX86_BUILTIN_PSHUFLW,
27469 IX86_BUILTIN_PSHUFD,
27470
27471 IX86_BUILTIN_PSLLDQI128,
27472 IX86_BUILTIN_PSLLWI128,
27473 IX86_BUILTIN_PSLLDI128,
27474 IX86_BUILTIN_PSLLQI128,
27475 IX86_BUILTIN_PSRAWI128,
27476 IX86_BUILTIN_PSRADI128,
27477 IX86_BUILTIN_PSRLDQI128,
27478 IX86_BUILTIN_PSRLWI128,
27479 IX86_BUILTIN_PSRLDI128,
27480 IX86_BUILTIN_PSRLQI128,
27481
27482 IX86_BUILTIN_PSLLDQ128,
27483 IX86_BUILTIN_PSLLW128,
27484 IX86_BUILTIN_PSLLD128,
27485 IX86_BUILTIN_PSLLQ128,
27486 IX86_BUILTIN_PSRAW128,
27487 IX86_BUILTIN_PSRAD128,
27488 IX86_BUILTIN_PSRLW128,
27489 IX86_BUILTIN_PSRLD128,
27490 IX86_BUILTIN_PSRLQ128,
27491
27492 IX86_BUILTIN_PUNPCKHBW128,
27493 IX86_BUILTIN_PUNPCKHWD128,
27494 IX86_BUILTIN_PUNPCKHDQ128,
27495 IX86_BUILTIN_PUNPCKHQDQ128,
27496 IX86_BUILTIN_PUNPCKLBW128,
27497 IX86_BUILTIN_PUNPCKLWD128,
27498 IX86_BUILTIN_PUNPCKLDQ128,
27499 IX86_BUILTIN_PUNPCKLQDQ128,
27500
27501 IX86_BUILTIN_CLFLUSH,
27502 IX86_BUILTIN_MFENCE,
27503 IX86_BUILTIN_LFENCE,
27504 IX86_BUILTIN_PAUSE,
27505
27506 IX86_BUILTIN_FNSTENV,
27507 IX86_BUILTIN_FLDENV,
27508 IX86_BUILTIN_FNSTSW,
27509 IX86_BUILTIN_FNCLEX,
27510
27511 IX86_BUILTIN_BSRSI,
27512 IX86_BUILTIN_BSRDI,
27513 IX86_BUILTIN_RDPMC,
27514 IX86_BUILTIN_RDTSC,
27515 IX86_BUILTIN_RDTSCP,
27516 IX86_BUILTIN_ROLQI,
27517 IX86_BUILTIN_ROLHI,
27518 IX86_BUILTIN_RORQI,
27519 IX86_BUILTIN_RORHI,
27520
27521 /* SSE3. */
27522 IX86_BUILTIN_ADDSUBPS,
27523 IX86_BUILTIN_HADDPS,
27524 IX86_BUILTIN_HSUBPS,
27525 IX86_BUILTIN_MOVSHDUP,
27526 IX86_BUILTIN_MOVSLDUP,
27527 IX86_BUILTIN_ADDSUBPD,
27528 IX86_BUILTIN_HADDPD,
27529 IX86_BUILTIN_HSUBPD,
27530 IX86_BUILTIN_LDDQU,
27531
27532 IX86_BUILTIN_MONITOR,
27533 IX86_BUILTIN_MWAIT,
27534
27535 /* SSSE3. */
27536 IX86_BUILTIN_PHADDW,
27537 IX86_BUILTIN_PHADDD,
27538 IX86_BUILTIN_PHADDSW,
27539 IX86_BUILTIN_PHSUBW,
27540 IX86_BUILTIN_PHSUBD,
27541 IX86_BUILTIN_PHSUBSW,
27542 IX86_BUILTIN_PMADDUBSW,
27543 IX86_BUILTIN_PMULHRSW,
27544 IX86_BUILTIN_PSHUFB,
27545 IX86_BUILTIN_PSIGNB,
27546 IX86_BUILTIN_PSIGNW,
27547 IX86_BUILTIN_PSIGND,
27548 IX86_BUILTIN_PALIGNR,
27549 IX86_BUILTIN_PABSB,
27550 IX86_BUILTIN_PABSW,
27551 IX86_BUILTIN_PABSD,
27552
27553 IX86_BUILTIN_PHADDW128,
27554 IX86_BUILTIN_PHADDD128,
27555 IX86_BUILTIN_PHADDSW128,
27556 IX86_BUILTIN_PHSUBW128,
27557 IX86_BUILTIN_PHSUBD128,
27558 IX86_BUILTIN_PHSUBSW128,
27559 IX86_BUILTIN_PMADDUBSW128,
27560 IX86_BUILTIN_PMULHRSW128,
27561 IX86_BUILTIN_PSHUFB128,
27562 IX86_BUILTIN_PSIGNB128,
27563 IX86_BUILTIN_PSIGNW128,
27564 IX86_BUILTIN_PSIGND128,
27565 IX86_BUILTIN_PALIGNR128,
27566 IX86_BUILTIN_PABSB128,
27567 IX86_BUILTIN_PABSW128,
27568 IX86_BUILTIN_PABSD128,
27569
27570 /* AMDFAM10 - SSE4A New Instructions. */
27571 IX86_BUILTIN_MOVNTSD,
27572 IX86_BUILTIN_MOVNTSS,
27573 IX86_BUILTIN_EXTRQI,
27574 IX86_BUILTIN_EXTRQ,
27575 IX86_BUILTIN_INSERTQI,
27576 IX86_BUILTIN_INSERTQ,
27577
27578 /* SSE4.1. */
27579 IX86_BUILTIN_BLENDPD,
27580 IX86_BUILTIN_BLENDPS,
27581 IX86_BUILTIN_BLENDVPD,
27582 IX86_BUILTIN_BLENDVPS,
27583 IX86_BUILTIN_PBLENDVB128,
27584 IX86_BUILTIN_PBLENDW128,
27585
27586 IX86_BUILTIN_DPPD,
27587 IX86_BUILTIN_DPPS,
27588
27589 IX86_BUILTIN_INSERTPS128,
27590
27591 IX86_BUILTIN_MOVNTDQA,
27592 IX86_BUILTIN_MPSADBW128,
27593 IX86_BUILTIN_PACKUSDW128,
27594 IX86_BUILTIN_PCMPEQQ,
27595 IX86_BUILTIN_PHMINPOSUW128,
27596
27597 IX86_BUILTIN_PMAXSB128,
27598 IX86_BUILTIN_PMAXSD128,
27599 IX86_BUILTIN_PMAXUD128,
27600 IX86_BUILTIN_PMAXUW128,
27601
27602 IX86_BUILTIN_PMINSB128,
27603 IX86_BUILTIN_PMINSD128,
27604 IX86_BUILTIN_PMINUD128,
27605 IX86_BUILTIN_PMINUW128,
27606
27607 IX86_BUILTIN_PMOVSXBW128,
27608 IX86_BUILTIN_PMOVSXBD128,
27609 IX86_BUILTIN_PMOVSXBQ128,
27610 IX86_BUILTIN_PMOVSXWD128,
27611 IX86_BUILTIN_PMOVSXWQ128,
27612 IX86_BUILTIN_PMOVSXDQ128,
27613
27614 IX86_BUILTIN_PMOVZXBW128,
27615 IX86_BUILTIN_PMOVZXBD128,
27616 IX86_BUILTIN_PMOVZXBQ128,
27617 IX86_BUILTIN_PMOVZXWD128,
27618 IX86_BUILTIN_PMOVZXWQ128,
27619 IX86_BUILTIN_PMOVZXDQ128,
27620
27621 IX86_BUILTIN_PMULDQ128,
27622 IX86_BUILTIN_PMULLD128,
27623
27624 IX86_BUILTIN_ROUNDSD,
27625 IX86_BUILTIN_ROUNDSS,
27626
27627 IX86_BUILTIN_ROUNDPD,
27628 IX86_BUILTIN_ROUNDPS,
27629
27630 IX86_BUILTIN_FLOORPD,
27631 IX86_BUILTIN_CEILPD,
27632 IX86_BUILTIN_TRUNCPD,
27633 IX86_BUILTIN_RINTPD,
27634 IX86_BUILTIN_ROUNDPD_AZ,
27635
27636 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27637 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27638 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27639
27640 IX86_BUILTIN_FLOORPS,
27641 IX86_BUILTIN_CEILPS,
27642 IX86_BUILTIN_TRUNCPS,
27643 IX86_BUILTIN_RINTPS,
27644 IX86_BUILTIN_ROUNDPS_AZ,
27645
27646 IX86_BUILTIN_FLOORPS_SFIX,
27647 IX86_BUILTIN_CEILPS_SFIX,
27648 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27649
27650 IX86_BUILTIN_PTESTZ,
27651 IX86_BUILTIN_PTESTC,
27652 IX86_BUILTIN_PTESTNZC,
27653
27654 IX86_BUILTIN_VEC_INIT_V2SI,
27655 IX86_BUILTIN_VEC_INIT_V4HI,
27656 IX86_BUILTIN_VEC_INIT_V8QI,
27657 IX86_BUILTIN_VEC_EXT_V2DF,
27658 IX86_BUILTIN_VEC_EXT_V2DI,
27659 IX86_BUILTIN_VEC_EXT_V4SF,
27660 IX86_BUILTIN_VEC_EXT_V4SI,
27661 IX86_BUILTIN_VEC_EXT_V8HI,
27662 IX86_BUILTIN_VEC_EXT_V2SI,
27663 IX86_BUILTIN_VEC_EXT_V4HI,
27664 IX86_BUILTIN_VEC_EXT_V16QI,
27665 IX86_BUILTIN_VEC_SET_V2DI,
27666 IX86_BUILTIN_VEC_SET_V4SF,
27667 IX86_BUILTIN_VEC_SET_V4SI,
27668 IX86_BUILTIN_VEC_SET_V8HI,
27669 IX86_BUILTIN_VEC_SET_V4HI,
27670 IX86_BUILTIN_VEC_SET_V16QI,
27671
27672 IX86_BUILTIN_VEC_PACK_SFIX,
27673 IX86_BUILTIN_VEC_PACK_SFIX256,
27674
27675 /* SSE4.2. */
27676 IX86_BUILTIN_CRC32QI,
27677 IX86_BUILTIN_CRC32HI,
27678 IX86_BUILTIN_CRC32SI,
27679 IX86_BUILTIN_CRC32DI,
27680
27681 IX86_BUILTIN_PCMPESTRI128,
27682 IX86_BUILTIN_PCMPESTRM128,
27683 IX86_BUILTIN_PCMPESTRA128,
27684 IX86_BUILTIN_PCMPESTRC128,
27685 IX86_BUILTIN_PCMPESTRO128,
27686 IX86_BUILTIN_PCMPESTRS128,
27687 IX86_BUILTIN_PCMPESTRZ128,
27688 IX86_BUILTIN_PCMPISTRI128,
27689 IX86_BUILTIN_PCMPISTRM128,
27690 IX86_BUILTIN_PCMPISTRA128,
27691 IX86_BUILTIN_PCMPISTRC128,
27692 IX86_BUILTIN_PCMPISTRO128,
27693 IX86_BUILTIN_PCMPISTRS128,
27694 IX86_BUILTIN_PCMPISTRZ128,
27695
27696 IX86_BUILTIN_PCMPGTQ,
27697
27698 /* AES instructions */
27699 IX86_BUILTIN_AESENC128,
27700 IX86_BUILTIN_AESENCLAST128,
27701 IX86_BUILTIN_AESDEC128,
27702 IX86_BUILTIN_AESDECLAST128,
27703 IX86_BUILTIN_AESIMC128,
27704 IX86_BUILTIN_AESKEYGENASSIST128,
27705
27706 /* PCLMUL instruction */
27707 IX86_BUILTIN_PCLMULQDQ128,
27708
27709 /* AVX */
27710 IX86_BUILTIN_ADDPD256,
27711 IX86_BUILTIN_ADDPS256,
27712 IX86_BUILTIN_ADDSUBPD256,
27713 IX86_BUILTIN_ADDSUBPS256,
27714 IX86_BUILTIN_ANDPD256,
27715 IX86_BUILTIN_ANDPS256,
27716 IX86_BUILTIN_ANDNPD256,
27717 IX86_BUILTIN_ANDNPS256,
27718 IX86_BUILTIN_BLENDPD256,
27719 IX86_BUILTIN_BLENDPS256,
27720 IX86_BUILTIN_BLENDVPD256,
27721 IX86_BUILTIN_BLENDVPS256,
27722 IX86_BUILTIN_DIVPD256,
27723 IX86_BUILTIN_DIVPS256,
27724 IX86_BUILTIN_DPPS256,
27725 IX86_BUILTIN_HADDPD256,
27726 IX86_BUILTIN_HADDPS256,
27727 IX86_BUILTIN_HSUBPD256,
27728 IX86_BUILTIN_HSUBPS256,
27729 IX86_BUILTIN_MAXPD256,
27730 IX86_BUILTIN_MAXPS256,
27731 IX86_BUILTIN_MINPD256,
27732 IX86_BUILTIN_MINPS256,
27733 IX86_BUILTIN_MULPD256,
27734 IX86_BUILTIN_MULPS256,
27735 IX86_BUILTIN_ORPD256,
27736 IX86_BUILTIN_ORPS256,
27737 IX86_BUILTIN_SHUFPD256,
27738 IX86_BUILTIN_SHUFPS256,
27739 IX86_BUILTIN_SUBPD256,
27740 IX86_BUILTIN_SUBPS256,
27741 IX86_BUILTIN_XORPD256,
27742 IX86_BUILTIN_XORPS256,
27743 IX86_BUILTIN_CMPSD,
27744 IX86_BUILTIN_CMPSS,
27745 IX86_BUILTIN_CMPPD,
27746 IX86_BUILTIN_CMPPS,
27747 IX86_BUILTIN_CMPPD256,
27748 IX86_BUILTIN_CMPPS256,
27749 IX86_BUILTIN_CVTDQ2PD256,
27750 IX86_BUILTIN_CVTDQ2PS256,
27751 IX86_BUILTIN_CVTPD2PS256,
27752 IX86_BUILTIN_CVTPS2DQ256,
27753 IX86_BUILTIN_CVTPS2PD256,
27754 IX86_BUILTIN_CVTTPD2DQ256,
27755 IX86_BUILTIN_CVTPD2DQ256,
27756 IX86_BUILTIN_CVTTPS2DQ256,
27757 IX86_BUILTIN_EXTRACTF128PD256,
27758 IX86_BUILTIN_EXTRACTF128PS256,
27759 IX86_BUILTIN_EXTRACTF128SI256,
27760 IX86_BUILTIN_VZEROALL,
27761 IX86_BUILTIN_VZEROUPPER,
27762 IX86_BUILTIN_VPERMILVARPD,
27763 IX86_BUILTIN_VPERMILVARPS,
27764 IX86_BUILTIN_VPERMILVARPD256,
27765 IX86_BUILTIN_VPERMILVARPS256,
27766 IX86_BUILTIN_VPERMILPD,
27767 IX86_BUILTIN_VPERMILPS,
27768 IX86_BUILTIN_VPERMILPD256,
27769 IX86_BUILTIN_VPERMILPS256,
27770 IX86_BUILTIN_VPERMIL2PD,
27771 IX86_BUILTIN_VPERMIL2PS,
27772 IX86_BUILTIN_VPERMIL2PD256,
27773 IX86_BUILTIN_VPERMIL2PS256,
27774 IX86_BUILTIN_VPERM2F128PD256,
27775 IX86_BUILTIN_VPERM2F128PS256,
27776 IX86_BUILTIN_VPERM2F128SI256,
27777 IX86_BUILTIN_VBROADCASTSS,
27778 IX86_BUILTIN_VBROADCASTSD256,
27779 IX86_BUILTIN_VBROADCASTSS256,
27780 IX86_BUILTIN_VBROADCASTPD256,
27781 IX86_BUILTIN_VBROADCASTPS256,
27782 IX86_BUILTIN_VINSERTF128PD256,
27783 IX86_BUILTIN_VINSERTF128PS256,
27784 IX86_BUILTIN_VINSERTF128SI256,
27785 IX86_BUILTIN_LOADUPD256,
27786 IX86_BUILTIN_LOADUPS256,
27787 IX86_BUILTIN_STOREUPD256,
27788 IX86_BUILTIN_STOREUPS256,
27789 IX86_BUILTIN_LDDQU256,
27790 IX86_BUILTIN_MOVNTDQ256,
27791 IX86_BUILTIN_MOVNTPD256,
27792 IX86_BUILTIN_MOVNTPS256,
27793 IX86_BUILTIN_LOADDQU256,
27794 IX86_BUILTIN_STOREDQU256,
27795 IX86_BUILTIN_MASKLOADPD,
27796 IX86_BUILTIN_MASKLOADPS,
27797 IX86_BUILTIN_MASKSTOREPD,
27798 IX86_BUILTIN_MASKSTOREPS,
27799 IX86_BUILTIN_MASKLOADPD256,
27800 IX86_BUILTIN_MASKLOADPS256,
27801 IX86_BUILTIN_MASKSTOREPD256,
27802 IX86_BUILTIN_MASKSTOREPS256,
27803 IX86_BUILTIN_MOVSHDUP256,
27804 IX86_BUILTIN_MOVSLDUP256,
27805 IX86_BUILTIN_MOVDDUP256,
27806
27807 IX86_BUILTIN_SQRTPD256,
27808 IX86_BUILTIN_SQRTPS256,
27809 IX86_BUILTIN_SQRTPS_NR256,
27810 IX86_BUILTIN_RSQRTPS256,
27811 IX86_BUILTIN_RSQRTPS_NR256,
27812
27813 IX86_BUILTIN_RCPPS256,
27814
27815 IX86_BUILTIN_ROUNDPD256,
27816 IX86_BUILTIN_ROUNDPS256,
27817
27818 IX86_BUILTIN_FLOORPD256,
27819 IX86_BUILTIN_CEILPD256,
27820 IX86_BUILTIN_TRUNCPD256,
27821 IX86_BUILTIN_RINTPD256,
27822 IX86_BUILTIN_ROUNDPD_AZ256,
27823
27824 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27825 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27826 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27827
27828 IX86_BUILTIN_FLOORPS256,
27829 IX86_BUILTIN_CEILPS256,
27830 IX86_BUILTIN_TRUNCPS256,
27831 IX86_BUILTIN_RINTPS256,
27832 IX86_BUILTIN_ROUNDPS_AZ256,
27833
27834 IX86_BUILTIN_FLOORPS_SFIX256,
27835 IX86_BUILTIN_CEILPS_SFIX256,
27836 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27837
27838 IX86_BUILTIN_UNPCKHPD256,
27839 IX86_BUILTIN_UNPCKLPD256,
27840 IX86_BUILTIN_UNPCKHPS256,
27841 IX86_BUILTIN_UNPCKLPS256,
27842
27843 IX86_BUILTIN_SI256_SI,
27844 IX86_BUILTIN_PS256_PS,
27845 IX86_BUILTIN_PD256_PD,
27846 IX86_BUILTIN_SI_SI256,
27847 IX86_BUILTIN_PS_PS256,
27848 IX86_BUILTIN_PD_PD256,
27849
27850 IX86_BUILTIN_VTESTZPD,
27851 IX86_BUILTIN_VTESTCPD,
27852 IX86_BUILTIN_VTESTNZCPD,
27853 IX86_BUILTIN_VTESTZPS,
27854 IX86_BUILTIN_VTESTCPS,
27855 IX86_BUILTIN_VTESTNZCPS,
27856 IX86_BUILTIN_VTESTZPD256,
27857 IX86_BUILTIN_VTESTCPD256,
27858 IX86_BUILTIN_VTESTNZCPD256,
27859 IX86_BUILTIN_VTESTZPS256,
27860 IX86_BUILTIN_VTESTCPS256,
27861 IX86_BUILTIN_VTESTNZCPS256,
27862 IX86_BUILTIN_PTESTZ256,
27863 IX86_BUILTIN_PTESTC256,
27864 IX86_BUILTIN_PTESTNZC256,
27865
27866 IX86_BUILTIN_MOVMSKPD256,
27867 IX86_BUILTIN_MOVMSKPS256,
27868
27869 /* AVX2 */
27870 IX86_BUILTIN_MPSADBW256,
27871 IX86_BUILTIN_PABSB256,
27872 IX86_BUILTIN_PABSW256,
27873 IX86_BUILTIN_PABSD256,
27874 IX86_BUILTIN_PACKSSDW256,
27875 IX86_BUILTIN_PACKSSWB256,
27876 IX86_BUILTIN_PACKUSDW256,
27877 IX86_BUILTIN_PACKUSWB256,
27878 IX86_BUILTIN_PADDB256,
27879 IX86_BUILTIN_PADDW256,
27880 IX86_BUILTIN_PADDD256,
27881 IX86_BUILTIN_PADDQ256,
27882 IX86_BUILTIN_PADDSB256,
27883 IX86_BUILTIN_PADDSW256,
27884 IX86_BUILTIN_PADDUSB256,
27885 IX86_BUILTIN_PADDUSW256,
27886 IX86_BUILTIN_PALIGNR256,
27887 IX86_BUILTIN_AND256I,
27888 IX86_BUILTIN_ANDNOT256I,
27889 IX86_BUILTIN_PAVGB256,
27890 IX86_BUILTIN_PAVGW256,
27891 IX86_BUILTIN_PBLENDVB256,
27892 IX86_BUILTIN_PBLENDVW256,
27893 IX86_BUILTIN_PCMPEQB256,
27894 IX86_BUILTIN_PCMPEQW256,
27895 IX86_BUILTIN_PCMPEQD256,
27896 IX86_BUILTIN_PCMPEQQ256,
27897 IX86_BUILTIN_PCMPGTB256,
27898 IX86_BUILTIN_PCMPGTW256,
27899 IX86_BUILTIN_PCMPGTD256,
27900 IX86_BUILTIN_PCMPGTQ256,
27901 IX86_BUILTIN_PHADDW256,
27902 IX86_BUILTIN_PHADDD256,
27903 IX86_BUILTIN_PHADDSW256,
27904 IX86_BUILTIN_PHSUBW256,
27905 IX86_BUILTIN_PHSUBD256,
27906 IX86_BUILTIN_PHSUBSW256,
27907 IX86_BUILTIN_PMADDUBSW256,
27908 IX86_BUILTIN_PMADDWD256,
27909 IX86_BUILTIN_PMAXSB256,
27910 IX86_BUILTIN_PMAXSW256,
27911 IX86_BUILTIN_PMAXSD256,
27912 IX86_BUILTIN_PMAXUB256,
27913 IX86_BUILTIN_PMAXUW256,
27914 IX86_BUILTIN_PMAXUD256,
27915 IX86_BUILTIN_PMINSB256,
27916 IX86_BUILTIN_PMINSW256,
27917 IX86_BUILTIN_PMINSD256,
27918 IX86_BUILTIN_PMINUB256,
27919 IX86_BUILTIN_PMINUW256,
27920 IX86_BUILTIN_PMINUD256,
27921 IX86_BUILTIN_PMOVMSKB256,
27922 IX86_BUILTIN_PMOVSXBW256,
27923 IX86_BUILTIN_PMOVSXBD256,
27924 IX86_BUILTIN_PMOVSXBQ256,
27925 IX86_BUILTIN_PMOVSXWD256,
27926 IX86_BUILTIN_PMOVSXWQ256,
27927 IX86_BUILTIN_PMOVSXDQ256,
27928 IX86_BUILTIN_PMOVZXBW256,
27929 IX86_BUILTIN_PMOVZXBD256,
27930 IX86_BUILTIN_PMOVZXBQ256,
27931 IX86_BUILTIN_PMOVZXWD256,
27932 IX86_BUILTIN_PMOVZXWQ256,
27933 IX86_BUILTIN_PMOVZXDQ256,
27934 IX86_BUILTIN_PMULDQ256,
27935 IX86_BUILTIN_PMULHRSW256,
27936 IX86_BUILTIN_PMULHUW256,
27937 IX86_BUILTIN_PMULHW256,
27938 IX86_BUILTIN_PMULLW256,
27939 IX86_BUILTIN_PMULLD256,
27940 IX86_BUILTIN_PMULUDQ256,
27941 IX86_BUILTIN_POR256,
27942 IX86_BUILTIN_PSADBW256,
27943 IX86_BUILTIN_PSHUFB256,
27944 IX86_BUILTIN_PSHUFD256,
27945 IX86_BUILTIN_PSHUFHW256,
27946 IX86_BUILTIN_PSHUFLW256,
27947 IX86_BUILTIN_PSIGNB256,
27948 IX86_BUILTIN_PSIGNW256,
27949 IX86_BUILTIN_PSIGND256,
27950 IX86_BUILTIN_PSLLDQI256,
27951 IX86_BUILTIN_PSLLWI256,
27952 IX86_BUILTIN_PSLLW256,
27953 IX86_BUILTIN_PSLLDI256,
27954 IX86_BUILTIN_PSLLD256,
27955 IX86_BUILTIN_PSLLQI256,
27956 IX86_BUILTIN_PSLLQ256,
27957 IX86_BUILTIN_PSRAWI256,
27958 IX86_BUILTIN_PSRAW256,
27959 IX86_BUILTIN_PSRADI256,
27960 IX86_BUILTIN_PSRAD256,
27961 IX86_BUILTIN_PSRLDQI256,
27962 IX86_BUILTIN_PSRLWI256,
27963 IX86_BUILTIN_PSRLW256,
27964 IX86_BUILTIN_PSRLDI256,
27965 IX86_BUILTIN_PSRLD256,
27966 IX86_BUILTIN_PSRLQI256,
27967 IX86_BUILTIN_PSRLQ256,
27968 IX86_BUILTIN_PSUBB256,
27969 IX86_BUILTIN_PSUBW256,
27970 IX86_BUILTIN_PSUBD256,
27971 IX86_BUILTIN_PSUBQ256,
27972 IX86_BUILTIN_PSUBSB256,
27973 IX86_BUILTIN_PSUBSW256,
27974 IX86_BUILTIN_PSUBUSB256,
27975 IX86_BUILTIN_PSUBUSW256,
27976 IX86_BUILTIN_PUNPCKHBW256,
27977 IX86_BUILTIN_PUNPCKHWD256,
27978 IX86_BUILTIN_PUNPCKHDQ256,
27979 IX86_BUILTIN_PUNPCKHQDQ256,
27980 IX86_BUILTIN_PUNPCKLBW256,
27981 IX86_BUILTIN_PUNPCKLWD256,
27982 IX86_BUILTIN_PUNPCKLDQ256,
27983 IX86_BUILTIN_PUNPCKLQDQ256,
27984 IX86_BUILTIN_PXOR256,
27985 IX86_BUILTIN_MOVNTDQA256,
27986 IX86_BUILTIN_VBROADCASTSS_PS,
27987 IX86_BUILTIN_VBROADCASTSS_PS256,
27988 IX86_BUILTIN_VBROADCASTSD_PD256,
27989 IX86_BUILTIN_VBROADCASTSI256,
27990 IX86_BUILTIN_PBLENDD256,
27991 IX86_BUILTIN_PBLENDD128,
27992 IX86_BUILTIN_PBROADCASTB256,
27993 IX86_BUILTIN_PBROADCASTW256,
27994 IX86_BUILTIN_PBROADCASTD256,
27995 IX86_BUILTIN_PBROADCASTQ256,
27996 IX86_BUILTIN_PBROADCASTB128,
27997 IX86_BUILTIN_PBROADCASTW128,
27998 IX86_BUILTIN_PBROADCASTD128,
27999 IX86_BUILTIN_PBROADCASTQ128,
28000 IX86_BUILTIN_VPERMVARSI256,
28001 IX86_BUILTIN_VPERMDF256,
28002 IX86_BUILTIN_VPERMVARSF256,
28003 IX86_BUILTIN_VPERMDI256,
28004 IX86_BUILTIN_VPERMTI256,
28005 IX86_BUILTIN_VEXTRACT128I256,
28006 IX86_BUILTIN_VINSERT128I256,
28007 IX86_BUILTIN_MASKLOADD,
28008 IX86_BUILTIN_MASKLOADQ,
28009 IX86_BUILTIN_MASKLOADD256,
28010 IX86_BUILTIN_MASKLOADQ256,
28011 IX86_BUILTIN_MASKSTORED,
28012 IX86_BUILTIN_MASKSTOREQ,
28013 IX86_BUILTIN_MASKSTORED256,
28014 IX86_BUILTIN_MASKSTOREQ256,
28015 IX86_BUILTIN_PSLLVV4DI,
28016 IX86_BUILTIN_PSLLVV2DI,
28017 IX86_BUILTIN_PSLLVV8SI,
28018 IX86_BUILTIN_PSLLVV4SI,
28019 IX86_BUILTIN_PSRAVV8SI,
28020 IX86_BUILTIN_PSRAVV4SI,
28021 IX86_BUILTIN_PSRLVV4DI,
28022 IX86_BUILTIN_PSRLVV2DI,
28023 IX86_BUILTIN_PSRLVV8SI,
28024 IX86_BUILTIN_PSRLVV4SI,
28025
28026 IX86_BUILTIN_GATHERSIV2DF,
28027 IX86_BUILTIN_GATHERSIV4DF,
28028 IX86_BUILTIN_GATHERDIV2DF,
28029 IX86_BUILTIN_GATHERDIV4DF,
28030 IX86_BUILTIN_GATHERSIV4SF,
28031 IX86_BUILTIN_GATHERSIV8SF,
28032 IX86_BUILTIN_GATHERDIV4SF,
28033 IX86_BUILTIN_GATHERDIV8SF,
28034 IX86_BUILTIN_GATHERSIV2DI,
28035 IX86_BUILTIN_GATHERSIV4DI,
28036 IX86_BUILTIN_GATHERDIV2DI,
28037 IX86_BUILTIN_GATHERDIV4DI,
28038 IX86_BUILTIN_GATHERSIV4SI,
28039 IX86_BUILTIN_GATHERSIV8SI,
28040 IX86_BUILTIN_GATHERDIV4SI,
28041 IX86_BUILTIN_GATHERDIV8SI,
28042
28043 /* AVX512F */
28044 IX86_BUILTIN_ADDPD512,
28045 IX86_BUILTIN_ADDPS512,
28046 IX86_BUILTIN_ADDSD_ROUND,
28047 IX86_BUILTIN_ADDSS_ROUND,
28048 IX86_BUILTIN_ALIGND512,
28049 IX86_BUILTIN_ALIGNQ512,
28050 IX86_BUILTIN_BLENDMD512,
28051 IX86_BUILTIN_BLENDMPD512,
28052 IX86_BUILTIN_BLENDMPS512,
28053 IX86_BUILTIN_BLENDMQ512,
28054 IX86_BUILTIN_BROADCASTF32X4_512,
28055 IX86_BUILTIN_BROADCASTF64X4_512,
28056 IX86_BUILTIN_BROADCASTI32X4_512,
28057 IX86_BUILTIN_BROADCASTI64X4_512,
28058 IX86_BUILTIN_BROADCASTSD512,
28059 IX86_BUILTIN_BROADCASTSS512,
28060 IX86_BUILTIN_CMPD512,
28061 IX86_BUILTIN_CMPPD512,
28062 IX86_BUILTIN_CMPPS512,
28063 IX86_BUILTIN_CMPQ512,
28064 IX86_BUILTIN_CMPSD_MASK,
28065 IX86_BUILTIN_CMPSS_MASK,
28066 IX86_BUILTIN_COMIDF,
28067 IX86_BUILTIN_COMISF,
28068 IX86_BUILTIN_COMPRESSPD512,
28069 IX86_BUILTIN_COMPRESSPDSTORE512,
28070 IX86_BUILTIN_COMPRESSPS512,
28071 IX86_BUILTIN_COMPRESSPSSTORE512,
28072 IX86_BUILTIN_CVTDQ2PD512,
28073 IX86_BUILTIN_CVTDQ2PS512,
28074 IX86_BUILTIN_CVTPD2DQ512,
28075 IX86_BUILTIN_CVTPD2PS512,
28076 IX86_BUILTIN_CVTPD2UDQ512,
28077 IX86_BUILTIN_CVTPH2PS512,
28078 IX86_BUILTIN_CVTPS2DQ512,
28079 IX86_BUILTIN_CVTPS2PD512,
28080 IX86_BUILTIN_CVTPS2PH512,
28081 IX86_BUILTIN_CVTPS2UDQ512,
28082 IX86_BUILTIN_CVTSD2SS_ROUND,
28083 IX86_BUILTIN_CVTSI2SD64,
28084 IX86_BUILTIN_CVTSI2SS32,
28085 IX86_BUILTIN_CVTSI2SS64,
28086 IX86_BUILTIN_CVTSS2SD_ROUND,
28087 IX86_BUILTIN_CVTTPD2DQ512,
28088 IX86_BUILTIN_CVTTPD2UDQ512,
28089 IX86_BUILTIN_CVTTPS2DQ512,
28090 IX86_BUILTIN_CVTTPS2UDQ512,
28091 IX86_BUILTIN_CVTUDQ2PD512,
28092 IX86_BUILTIN_CVTUDQ2PS512,
28093 IX86_BUILTIN_CVTUSI2SD32,
28094 IX86_BUILTIN_CVTUSI2SD64,
28095 IX86_BUILTIN_CVTUSI2SS32,
28096 IX86_BUILTIN_CVTUSI2SS64,
28097 IX86_BUILTIN_DIVPD512,
28098 IX86_BUILTIN_DIVPS512,
28099 IX86_BUILTIN_DIVSD_ROUND,
28100 IX86_BUILTIN_DIVSS_ROUND,
28101 IX86_BUILTIN_EXPANDPD512,
28102 IX86_BUILTIN_EXPANDPD512Z,
28103 IX86_BUILTIN_EXPANDPDLOAD512,
28104 IX86_BUILTIN_EXPANDPDLOAD512Z,
28105 IX86_BUILTIN_EXPANDPS512,
28106 IX86_BUILTIN_EXPANDPS512Z,
28107 IX86_BUILTIN_EXPANDPSLOAD512,
28108 IX86_BUILTIN_EXPANDPSLOAD512Z,
28109 IX86_BUILTIN_EXTRACTF32X4,
28110 IX86_BUILTIN_EXTRACTF64X4,
28111 IX86_BUILTIN_EXTRACTI32X4,
28112 IX86_BUILTIN_EXTRACTI64X4,
28113 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28114 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28115 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28116 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28117 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28118 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28119 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28120 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28121 IX86_BUILTIN_GETEXPPD512,
28122 IX86_BUILTIN_GETEXPPS512,
28123 IX86_BUILTIN_GETEXPSD128,
28124 IX86_BUILTIN_GETEXPSS128,
28125 IX86_BUILTIN_GETMANTPD512,
28126 IX86_BUILTIN_GETMANTPS512,
28127 IX86_BUILTIN_GETMANTSD128,
28128 IX86_BUILTIN_GETMANTSS128,
28129 IX86_BUILTIN_INSERTF32X4,
28130 IX86_BUILTIN_INSERTF64X4,
28131 IX86_BUILTIN_INSERTI32X4,
28132 IX86_BUILTIN_INSERTI64X4,
28133 IX86_BUILTIN_LOADAPD512,
28134 IX86_BUILTIN_LOADAPS512,
28135 IX86_BUILTIN_LOADDQUDI512,
28136 IX86_BUILTIN_LOADDQUSI512,
28137 IX86_BUILTIN_LOADUPD512,
28138 IX86_BUILTIN_LOADUPS512,
28139 IX86_BUILTIN_MAXPD512,
28140 IX86_BUILTIN_MAXPS512,
28141 IX86_BUILTIN_MAXSD_ROUND,
28142 IX86_BUILTIN_MAXSS_ROUND,
28143 IX86_BUILTIN_MINPD512,
28144 IX86_BUILTIN_MINPS512,
28145 IX86_BUILTIN_MINSD_ROUND,
28146 IX86_BUILTIN_MINSS_ROUND,
28147 IX86_BUILTIN_MOVAPD512,
28148 IX86_BUILTIN_MOVAPS512,
28149 IX86_BUILTIN_MOVDDUP512,
28150 IX86_BUILTIN_MOVDQA32LOAD512,
28151 IX86_BUILTIN_MOVDQA32STORE512,
28152 IX86_BUILTIN_MOVDQA32_512,
28153 IX86_BUILTIN_MOVDQA64LOAD512,
28154 IX86_BUILTIN_MOVDQA64STORE512,
28155 IX86_BUILTIN_MOVDQA64_512,
28156 IX86_BUILTIN_MOVNTDQ512,
28157 IX86_BUILTIN_MOVNTDQA512,
28158 IX86_BUILTIN_MOVNTPD512,
28159 IX86_BUILTIN_MOVNTPS512,
28160 IX86_BUILTIN_MOVSHDUP512,
28161 IX86_BUILTIN_MOVSLDUP512,
28162 IX86_BUILTIN_MULPD512,
28163 IX86_BUILTIN_MULPS512,
28164 IX86_BUILTIN_MULSD_ROUND,
28165 IX86_BUILTIN_MULSS_ROUND,
28166 IX86_BUILTIN_PABSD512,
28167 IX86_BUILTIN_PABSQ512,
28168 IX86_BUILTIN_PADDD512,
28169 IX86_BUILTIN_PADDQ512,
28170 IX86_BUILTIN_PANDD512,
28171 IX86_BUILTIN_PANDND512,
28172 IX86_BUILTIN_PANDNQ512,
28173 IX86_BUILTIN_PANDQ512,
28174 IX86_BUILTIN_PBROADCASTD512,
28175 IX86_BUILTIN_PBROADCASTD512_GPR,
28176 IX86_BUILTIN_PBROADCASTMB512,
28177 IX86_BUILTIN_PBROADCASTMW512,
28178 IX86_BUILTIN_PBROADCASTQ512,
28179 IX86_BUILTIN_PBROADCASTQ512_GPR,
28180 IX86_BUILTIN_PBROADCASTQ512_MEM,
28181 IX86_BUILTIN_PCMPEQD512_MASK,
28182 IX86_BUILTIN_PCMPEQQ512_MASK,
28183 IX86_BUILTIN_PCMPGTD512_MASK,
28184 IX86_BUILTIN_PCMPGTQ512_MASK,
28185 IX86_BUILTIN_PCOMPRESSD512,
28186 IX86_BUILTIN_PCOMPRESSDSTORE512,
28187 IX86_BUILTIN_PCOMPRESSQ512,
28188 IX86_BUILTIN_PCOMPRESSQSTORE512,
28189 IX86_BUILTIN_PEXPANDD512,
28190 IX86_BUILTIN_PEXPANDD512Z,
28191 IX86_BUILTIN_PEXPANDDLOAD512,
28192 IX86_BUILTIN_PEXPANDDLOAD512Z,
28193 IX86_BUILTIN_PEXPANDQ512,
28194 IX86_BUILTIN_PEXPANDQ512Z,
28195 IX86_BUILTIN_PEXPANDQLOAD512,
28196 IX86_BUILTIN_PEXPANDQLOAD512Z,
28197 IX86_BUILTIN_PMAXSD512,
28198 IX86_BUILTIN_PMAXSQ512,
28199 IX86_BUILTIN_PMAXUD512,
28200 IX86_BUILTIN_PMAXUQ512,
28201 IX86_BUILTIN_PMINSD512,
28202 IX86_BUILTIN_PMINSQ512,
28203 IX86_BUILTIN_PMINUD512,
28204 IX86_BUILTIN_PMINUQ512,
28205 IX86_BUILTIN_PMOVDB512,
28206 IX86_BUILTIN_PMOVDB512_MEM,
28207 IX86_BUILTIN_PMOVDW512,
28208 IX86_BUILTIN_PMOVDW512_MEM,
28209 IX86_BUILTIN_PMOVQB512,
28210 IX86_BUILTIN_PMOVQB512_MEM,
28211 IX86_BUILTIN_PMOVQD512,
28212 IX86_BUILTIN_PMOVQD512_MEM,
28213 IX86_BUILTIN_PMOVQW512,
28214 IX86_BUILTIN_PMOVQW512_MEM,
28215 IX86_BUILTIN_PMOVSDB512,
28216 IX86_BUILTIN_PMOVSDB512_MEM,
28217 IX86_BUILTIN_PMOVSDW512,
28218 IX86_BUILTIN_PMOVSDW512_MEM,
28219 IX86_BUILTIN_PMOVSQB512,
28220 IX86_BUILTIN_PMOVSQB512_MEM,
28221 IX86_BUILTIN_PMOVSQD512,
28222 IX86_BUILTIN_PMOVSQD512_MEM,
28223 IX86_BUILTIN_PMOVSQW512,
28224 IX86_BUILTIN_PMOVSQW512_MEM,
28225 IX86_BUILTIN_PMOVSXBD512,
28226 IX86_BUILTIN_PMOVSXBQ512,
28227 IX86_BUILTIN_PMOVSXDQ512,
28228 IX86_BUILTIN_PMOVSXWD512,
28229 IX86_BUILTIN_PMOVSXWQ512,
28230 IX86_BUILTIN_PMOVUSDB512,
28231 IX86_BUILTIN_PMOVUSDB512_MEM,
28232 IX86_BUILTIN_PMOVUSDW512,
28233 IX86_BUILTIN_PMOVUSDW512_MEM,
28234 IX86_BUILTIN_PMOVUSQB512,
28235 IX86_BUILTIN_PMOVUSQB512_MEM,
28236 IX86_BUILTIN_PMOVUSQD512,
28237 IX86_BUILTIN_PMOVUSQD512_MEM,
28238 IX86_BUILTIN_PMOVUSQW512,
28239 IX86_BUILTIN_PMOVUSQW512_MEM,
28240 IX86_BUILTIN_PMOVZXBD512,
28241 IX86_BUILTIN_PMOVZXBQ512,
28242 IX86_BUILTIN_PMOVZXDQ512,
28243 IX86_BUILTIN_PMOVZXWD512,
28244 IX86_BUILTIN_PMOVZXWQ512,
28245 IX86_BUILTIN_PMULDQ512,
28246 IX86_BUILTIN_PMULLD512,
28247 IX86_BUILTIN_PMULUDQ512,
28248 IX86_BUILTIN_PORD512,
28249 IX86_BUILTIN_PORQ512,
28250 IX86_BUILTIN_PROLD512,
28251 IX86_BUILTIN_PROLQ512,
28252 IX86_BUILTIN_PROLVD512,
28253 IX86_BUILTIN_PROLVQ512,
28254 IX86_BUILTIN_PRORD512,
28255 IX86_BUILTIN_PRORQ512,
28256 IX86_BUILTIN_PRORVD512,
28257 IX86_BUILTIN_PRORVQ512,
28258 IX86_BUILTIN_PSHUFD512,
28259 IX86_BUILTIN_PSLLD512,
28260 IX86_BUILTIN_PSLLDI512,
28261 IX86_BUILTIN_PSLLQ512,
28262 IX86_BUILTIN_PSLLQI512,
28263 IX86_BUILTIN_PSLLVV16SI,
28264 IX86_BUILTIN_PSLLVV8DI,
28265 IX86_BUILTIN_PSRAD512,
28266 IX86_BUILTIN_PSRADI512,
28267 IX86_BUILTIN_PSRAQ512,
28268 IX86_BUILTIN_PSRAQI512,
28269 IX86_BUILTIN_PSRAVV16SI,
28270 IX86_BUILTIN_PSRAVV8DI,
28271 IX86_BUILTIN_PSRLD512,
28272 IX86_BUILTIN_PSRLDI512,
28273 IX86_BUILTIN_PSRLQ512,
28274 IX86_BUILTIN_PSRLQI512,
28275 IX86_BUILTIN_PSRLVV16SI,
28276 IX86_BUILTIN_PSRLVV8DI,
28277 IX86_BUILTIN_PSUBD512,
28278 IX86_BUILTIN_PSUBQ512,
28279 IX86_BUILTIN_PTESTMD512,
28280 IX86_BUILTIN_PTESTMQ512,
28281 IX86_BUILTIN_PTESTNMD512,
28282 IX86_BUILTIN_PTESTNMQ512,
28283 IX86_BUILTIN_PUNPCKHDQ512,
28284 IX86_BUILTIN_PUNPCKHQDQ512,
28285 IX86_BUILTIN_PUNPCKLDQ512,
28286 IX86_BUILTIN_PUNPCKLQDQ512,
28287 IX86_BUILTIN_PXORD512,
28288 IX86_BUILTIN_PXORQ512,
28289 IX86_BUILTIN_RCP14PD512,
28290 IX86_BUILTIN_RCP14PS512,
28291 IX86_BUILTIN_RCP14SD,
28292 IX86_BUILTIN_RCP14SS,
28293 IX86_BUILTIN_RNDSCALEPD,
28294 IX86_BUILTIN_RNDSCALEPS,
28295 IX86_BUILTIN_RNDSCALESD,
28296 IX86_BUILTIN_RNDSCALESS,
28297 IX86_BUILTIN_RSQRT14PD512,
28298 IX86_BUILTIN_RSQRT14PS512,
28299 IX86_BUILTIN_RSQRT14SD,
28300 IX86_BUILTIN_RSQRT14SS,
28301 IX86_BUILTIN_SCALEFPD512,
28302 IX86_BUILTIN_SCALEFPS512,
28303 IX86_BUILTIN_SCALEFSD,
28304 IX86_BUILTIN_SCALEFSS,
28305 IX86_BUILTIN_SHUFPD512,
28306 IX86_BUILTIN_SHUFPS512,
28307 IX86_BUILTIN_SHUF_F32x4,
28308 IX86_BUILTIN_SHUF_F64x2,
28309 IX86_BUILTIN_SHUF_I32x4,
28310 IX86_BUILTIN_SHUF_I64x2,
28311 IX86_BUILTIN_SQRTPD512,
28312 IX86_BUILTIN_SQRTPD512_MASK,
28313 IX86_BUILTIN_SQRTPS512_MASK,
28314 IX86_BUILTIN_SQRTPS_NR512,
28315 IX86_BUILTIN_SQRTSD_ROUND,
28316 IX86_BUILTIN_SQRTSS_ROUND,
28317 IX86_BUILTIN_STOREAPD512,
28318 IX86_BUILTIN_STOREAPS512,
28319 IX86_BUILTIN_STOREDQUDI512,
28320 IX86_BUILTIN_STOREDQUSI512,
28321 IX86_BUILTIN_STOREUPD512,
28322 IX86_BUILTIN_STOREUPS512,
28323 IX86_BUILTIN_SUBPD512,
28324 IX86_BUILTIN_SUBPS512,
28325 IX86_BUILTIN_SUBSD_ROUND,
28326 IX86_BUILTIN_SUBSS_ROUND,
28327 IX86_BUILTIN_UCMPD512,
28328 IX86_BUILTIN_UCMPQ512,
28329 IX86_BUILTIN_UNPCKHPD512,
28330 IX86_BUILTIN_UNPCKHPS512,
28331 IX86_BUILTIN_UNPCKLPD512,
28332 IX86_BUILTIN_UNPCKLPS512,
28333 IX86_BUILTIN_VCVTSD2SI32,
28334 IX86_BUILTIN_VCVTSD2SI64,
28335 IX86_BUILTIN_VCVTSD2USI32,
28336 IX86_BUILTIN_VCVTSD2USI64,
28337 IX86_BUILTIN_VCVTSS2SI32,
28338 IX86_BUILTIN_VCVTSS2SI64,
28339 IX86_BUILTIN_VCVTSS2USI32,
28340 IX86_BUILTIN_VCVTSS2USI64,
28341 IX86_BUILTIN_VCVTTSD2SI32,
28342 IX86_BUILTIN_VCVTTSD2SI64,
28343 IX86_BUILTIN_VCVTTSD2USI32,
28344 IX86_BUILTIN_VCVTTSD2USI64,
28345 IX86_BUILTIN_VCVTTSS2SI32,
28346 IX86_BUILTIN_VCVTTSS2SI64,
28347 IX86_BUILTIN_VCVTTSS2USI32,
28348 IX86_BUILTIN_VCVTTSS2USI64,
28349 IX86_BUILTIN_VFMADDPD512_MASK,
28350 IX86_BUILTIN_VFMADDPD512_MASK3,
28351 IX86_BUILTIN_VFMADDPD512_MASKZ,
28352 IX86_BUILTIN_VFMADDPS512_MASK,
28353 IX86_BUILTIN_VFMADDPS512_MASK3,
28354 IX86_BUILTIN_VFMADDPS512_MASKZ,
28355 IX86_BUILTIN_VFMADDSD3_ROUND,
28356 IX86_BUILTIN_VFMADDSS3_ROUND,
28357 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28358 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28359 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28360 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28361 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28362 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28363 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28364 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28365 IX86_BUILTIN_VFMSUBPD512_MASK3,
28366 IX86_BUILTIN_VFMSUBPS512_MASK3,
28367 IX86_BUILTIN_VFMSUBSD3_MASK3,
28368 IX86_BUILTIN_VFMSUBSS3_MASK3,
28369 IX86_BUILTIN_VFNMADDPD512_MASK,
28370 IX86_BUILTIN_VFNMADDPS512_MASK,
28371 IX86_BUILTIN_VFNMSUBPD512_MASK,
28372 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28373 IX86_BUILTIN_VFNMSUBPS512_MASK,
28374 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28375 IX86_BUILTIN_VPCLZCNTD512,
28376 IX86_BUILTIN_VPCLZCNTQ512,
28377 IX86_BUILTIN_VPCONFLICTD512,
28378 IX86_BUILTIN_VPCONFLICTQ512,
28379 IX86_BUILTIN_VPERMDF512,
28380 IX86_BUILTIN_VPERMDI512,
28381 IX86_BUILTIN_VPERMI2VARD512,
28382 IX86_BUILTIN_VPERMI2VARPD512,
28383 IX86_BUILTIN_VPERMI2VARPS512,
28384 IX86_BUILTIN_VPERMI2VARQ512,
28385 IX86_BUILTIN_VPERMILPD512,
28386 IX86_BUILTIN_VPERMILPS512,
28387 IX86_BUILTIN_VPERMILVARPD512,
28388 IX86_BUILTIN_VPERMILVARPS512,
28389 IX86_BUILTIN_VPERMT2VARD512,
28390 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28391 IX86_BUILTIN_VPERMT2VARPD512,
28392 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28393 IX86_BUILTIN_VPERMT2VARPS512,
28394 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28395 IX86_BUILTIN_VPERMT2VARQ512,
28396 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28397 IX86_BUILTIN_VPERMVARDF512,
28398 IX86_BUILTIN_VPERMVARDI512,
28399 IX86_BUILTIN_VPERMVARSF512,
28400 IX86_BUILTIN_VPERMVARSI512,
28401 IX86_BUILTIN_VTERNLOGD512_MASK,
28402 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28403 IX86_BUILTIN_VTERNLOGQ512_MASK,
28404 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28405
28406 /* Mask arithmetic operations */
28407 IX86_BUILTIN_KAND16,
28408 IX86_BUILTIN_KANDN16,
28409 IX86_BUILTIN_KNOT16,
28410 IX86_BUILTIN_KOR16,
28411 IX86_BUILTIN_KORTESTC16,
28412 IX86_BUILTIN_KORTESTZ16,
28413 IX86_BUILTIN_KUNPCKBW,
28414 IX86_BUILTIN_KXNOR16,
28415 IX86_BUILTIN_KXOR16,
28416 IX86_BUILTIN_KMOV16,
28417
28418 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28419 where all operands are 32-byte or 64-byte wide respectively. */
28420 IX86_BUILTIN_GATHERALTSIV4DF,
28421 IX86_BUILTIN_GATHERALTDIV8SF,
28422 IX86_BUILTIN_GATHERALTSIV4DI,
28423 IX86_BUILTIN_GATHERALTDIV8SI,
28424 IX86_BUILTIN_GATHER3ALTDIV16SF,
28425 IX86_BUILTIN_GATHER3ALTDIV16SI,
28426 IX86_BUILTIN_GATHER3ALTSIV8DF,
28427 IX86_BUILTIN_GATHER3ALTSIV8DI,
28428 IX86_BUILTIN_GATHER3DIV16SF,
28429 IX86_BUILTIN_GATHER3DIV16SI,
28430 IX86_BUILTIN_GATHER3DIV8DF,
28431 IX86_BUILTIN_GATHER3DIV8DI,
28432 IX86_BUILTIN_GATHER3SIV16SF,
28433 IX86_BUILTIN_GATHER3SIV16SI,
28434 IX86_BUILTIN_GATHER3SIV8DF,
28435 IX86_BUILTIN_GATHER3SIV8DI,
28436 IX86_BUILTIN_SCATTERDIV16SF,
28437 IX86_BUILTIN_SCATTERDIV16SI,
28438 IX86_BUILTIN_SCATTERDIV8DF,
28439 IX86_BUILTIN_SCATTERDIV8DI,
28440 IX86_BUILTIN_SCATTERSIV16SF,
28441 IX86_BUILTIN_SCATTERSIV16SI,
28442 IX86_BUILTIN_SCATTERSIV8DF,
28443 IX86_BUILTIN_SCATTERSIV8DI,
28444
28445 /* AVX512PF */
28446 IX86_BUILTIN_GATHERPFQPD,
28447 IX86_BUILTIN_GATHERPFDPS,
28448 IX86_BUILTIN_GATHERPFDPD,
28449 IX86_BUILTIN_GATHERPFQPS,
28450 IX86_BUILTIN_SCATTERPFDPD,
28451 IX86_BUILTIN_SCATTERPFDPS,
28452 IX86_BUILTIN_SCATTERPFQPD,
28453 IX86_BUILTIN_SCATTERPFQPS,
28454
28455 /* AVX-512ER */
28456 IX86_BUILTIN_EXP2PD_MASK,
28457 IX86_BUILTIN_EXP2PS_MASK,
28458 IX86_BUILTIN_EXP2PS,
28459 IX86_BUILTIN_RCP28PD,
28460 IX86_BUILTIN_RCP28PS,
28461 IX86_BUILTIN_RCP28SD,
28462 IX86_BUILTIN_RCP28SS,
28463 IX86_BUILTIN_RSQRT28PD,
28464 IX86_BUILTIN_RSQRT28PS,
28465 IX86_BUILTIN_RSQRT28SD,
28466 IX86_BUILTIN_RSQRT28SS,
28467
28468 /* SHA builtins. */
28469 IX86_BUILTIN_SHA1MSG1,
28470 IX86_BUILTIN_SHA1MSG2,
28471 IX86_BUILTIN_SHA1NEXTE,
28472 IX86_BUILTIN_SHA1RNDS4,
28473 IX86_BUILTIN_SHA256MSG1,
28474 IX86_BUILTIN_SHA256MSG2,
28475 IX86_BUILTIN_SHA256RNDS2,
28476
28477 /* TFmode support builtins. */
28478 IX86_BUILTIN_INFQ,
28479 IX86_BUILTIN_HUGE_VALQ,
28480 IX86_BUILTIN_FABSQ,
28481 IX86_BUILTIN_COPYSIGNQ,
28482
28483 /* Vectorizer support builtins. */
28484 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28485 IX86_BUILTIN_CPYSGNPS,
28486 IX86_BUILTIN_CPYSGNPD,
28487 IX86_BUILTIN_CPYSGNPS256,
28488 IX86_BUILTIN_CPYSGNPS512,
28489 IX86_BUILTIN_CPYSGNPD256,
28490 IX86_BUILTIN_CPYSGNPD512,
28491 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28492 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28493
28494
28495 /* FMA4 instructions. */
28496 IX86_BUILTIN_VFMADDSS,
28497 IX86_BUILTIN_VFMADDSD,
28498 IX86_BUILTIN_VFMADDPS,
28499 IX86_BUILTIN_VFMADDPD,
28500 IX86_BUILTIN_VFMADDPS256,
28501 IX86_BUILTIN_VFMADDPD256,
28502 IX86_BUILTIN_VFMADDSUBPS,
28503 IX86_BUILTIN_VFMADDSUBPD,
28504 IX86_BUILTIN_VFMADDSUBPS256,
28505 IX86_BUILTIN_VFMADDSUBPD256,
28506
28507 /* FMA3 instructions. */
28508 IX86_BUILTIN_VFMADDSS3,
28509 IX86_BUILTIN_VFMADDSD3,
28510
28511 /* XOP instructions. */
28512 IX86_BUILTIN_VPCMOV,
28513 IX86_BUILTIN_VPCMOV_V2DI,
28514 IX86_BUILTIN_VPCMOV_V4SI,
28515 IX86_BUILTIN_VPCMOV_V8HI,
28516 IX86_BUILTIN_VPCMOV_V16QI,
28517 IX86_BUILTIN_VPCMOV_V4SF,
28518 IX86_BUILTIN_VPCMOV_V2DF,
28519 IX86_BUILTIN_VPCMOV256,
28520 IX86_BUILTIN_VPCMOV_V4DI256,
28521 IX86_BUILTIN_VPCMOV_V8SI256,
28522 IX86_BUILTIN_VPCMOV_V16HI256,
28523 IX86_BUILTIN_VPCMOV_V32QI256,
28524 IX86_BUILTIN_VPCMOV_V8SF256,
28525 IX86_BUILTIN_VPCMOV_V4DF256,
28526
28527 IX86_BUILTIN_VPPERM,
28528
28529 IX86_BUILTIN_VPMACSSWW,
28530 IX86_BUILTIN_VPMACSWW,
28531 IX86_BUILTIN_VPMACSSWD,
28532 IX86_BUILTIN_VPMACSWD,
28533 IX86_BUILTIN_VPMACSSDD,
28534 IX86_BUILTIN_VPMACSDD,
28535 IX86_BUILTIN_VPMACSSDQL,
28536 IX86_BUILTIN_VPMACSSDQH,
28537 IX86_BUILTIN_VPMACSDQL,
28538 IX86_BUILTIN_VPMACSDQH,
28539 IX86_BUILTIN_VPMADCSSWD,
28540 IX86_BUILTIN_VPMADCSWD,
28541
28542 IX86_BUILTIN_VPHADDBW,
28543 IX86_BUILTIN_VPHADDBD,
28544 IX86_BUILTIN_VPHADDBQ,
28545 IX86_BUILTIN_VPHADDWD,
28546 IX86_BUILTIN_VPHADDWQ,
28547 IX86_BUILTIN_VPHADDDQ,
28548 IX86_BUILTIN_VPHADDUBW,
28549 IX86_BUILTIN_VPHADDUBD,
28550 IX86_BUILTIN_VPHADDUBQ,
28551 IX86_BUILTIN_VPHADDUWD,
28552 IX86_BUILTIN_VPHADDUWQ,
28553 IX86_BUILTIN_VPHADDUDQ,
28554 IX86_BUILTIN_VPHSUBBW,
28555 IX86_BUILTIN_VPHSUBWD,
28556 IX86_BUILTIN_VPHSUBDQ,
28557
28558 IX86_BUILTIN_VPROTB,
28559 IX86_BUILTIN_VPROTW,
28560 IX86_BUILTIN_VPROTD,
28561 IX86_BUILTIN_VPROTQ,
28562 IX86_BUILTIN_VPROTB_IMM,
28563 IX86_BUILTIN_VPROTW_IMM,
28564 IX86_BUILTIN_VPROTD_IMM,
28565 IX86_BUILTIN_VPROTQ_IMM,
28566
28567 IX86_BUILTIN_VPSHLB,
28568 IX86_BUILTIN_VPSHLW,
28569 IX86_BUILTIN_VPSHLD,
28570 IX86_BUILTIN_VPSHLQ,
28571 IX86_BUILTIN_VPSHAB,
28572 IX86_BUILTIN_VPSHAW,
28573 IX86_BUILTIN_VPSHAD,
28574 IX86_BUILTIN_VPSHAQ,
28575
28576 IX86_BUILTIN_VFRCZSS,
28577 IX86_BUILTIN_VFRCZSD,
28578 IX86_BUILTIN_VFRCZPS,
28579 IX86_BUILTIN_VFRCZPD,
28580 IX86_BUILTIN_VFRCZPS256,
28581 IX86_BUILTIN_VFRCZPD256,
28582
28583 IX86_BUILTIN_VPCOMEQUB,
28584 IX86_BUILTIN_VPCOMNEUB,
28585 IX86_BUILTIN_VPCOMLTUB,
28586 IX86_BUILTIN_VPCOMLEUB,
28587 IX86_BUILTIN_VPCOMGTUB,
28588 IX86_BUILTIN_VPCOMGEUB,
28589 IX86_BUILTIN_VPCOMFALSEUB,
28590 IX86_BUILTIN_VPCOMTRUEUB,
28591
28592 IX86_BUILTIN_VPCOMEQUW,
28593 IX86_BUILTIN_VPCOMNEUW,
28594 IX86_BUILTIN_VPCOMLTUW,
28595 IX86_BUILTIN_VPCOMLEUW,
28596 IX86_BUILTIN_VPCOMGTUW,
28597 IX86_BUILTIN_VPCOMGEUW,
28598 IX86_BUILTIN_VPCOMFALSEUW,
28599 IX86_BUILTIN_VPCOMTRUEUW,
28600
28601 IX86_BUILTIN_VPCOMEQUD,
28602 IX86_BUILTIN_VPCOMNEUD,
28603 IX86_BUILTIN_VPCOMLTUD,
28604 IX86_BUILTIN_VPCOMLEUD,
28605 IX86_BUILTIN_VPCOMGTUD,
28606 IX86_BUILTIN_VPCOMGEUD,
28607 IX86_BUILTIN_VPCOMFALSEUD,
28608 IX86_BUILTIN_VPCOMTRUEUD,
28609
28610 IX86_BUILTIN_VPCOMEQUQ,
28611 IX86_BUILTIN_VPCOMNEUQ,
28612 IX86_BUILTIN_VPCOMLTUQ,
28613 IX86_BUILTIN_VPCOMLEUQ,
28614 IX86_BUILTIN_VPCOMGTUQ,
28615 IX86_BUILTIN_VPCOMGEUQ,
28616 IX86_BUILTIN_VPCOMFALSEUQ,
28617 IX86_BUILTIN_VPCOMTRUEUQ,
28618
28619 IX86_BUILTIN_VPCOMEQB,
28620 IX86_BUILTIN_VPCOMNEB,
28621 IX86_BUILTIN_VPCOMLTB,
28622 IX86_BUILTIN_VPCOMLEB,
28623 IX86_BUILTIN_VPCOMGTB,
28624 IX86_BUILTIN_VPCOMGEB,
28625 IX86_BUILTIN_VPCOMFALSEB,
28626 IX86_BUILTIN_VPCOMTRUEB,
28627
28628 IX86_BUILTIN_VPCOMEQW,
28629 IX86_BUILTIN_VPCOMNEW,
28630 IX86_BUILTIN_VPCOMLTW,
28631 IX86_BUILTIN_VPCOMLEW,
28632 IX86_BUILTIN_VPCOMGTW,
28633 IX86_BUILTIN_VPCOMGEW,
28634 IX86_BUILTIN_VPCOMFALSEW,
28635 IX86_BUILTIN_VPCOMTRUEW,
28636
28637 IX86_BUILTIN_VPCOMEQD,
28638 IX86_BUILTIN_VPCOMNED,
28639 IX86_BUILTIN_VPCOMLTD,
28640 IX86_BUILTIN_VPCOMLED,
28641 IX86_BUILTIN_VPCOMGTD,
28642 IX86_BUILTIN_VPCOMGED,
28643 IX86_BUILTIN_VPCOMFALSED,
28644 IX86_BUILTIN_VPCOMTRUED,
28645
28646 IX86_BUILTIN_VPCOMEQQ,
28647 IX86_BUILTIN_VPCOMNEQ,
28648 IX86_BUILTIN_VPCOMLTQ,
28649 IX86_BUILTIN_VPCOMLEQ,
28650 IX86_BUILTIN_VPCOMGTQ,
28651 IX86_BUILTIN_VPCOMGEQ,
28652 IX86_BUILTIN_VPCOMFALSEQ,
28653 IX86_BUILTIN_VPCOMTRUEQ,
28654
28655 /* LWP instructions. */
28656 IX86_BUILTIN_LLWPCB,
28657 IX86_BUILTIN_SLWPCB,
28658 IX86_BUILTIN_LWPVAL32,
28659 IX86_BUILTIN_LWPVAL64,
28660 IX86_BUILTIN_LWPINS32,
28661 IX86_BUILTIN_LWPINS64,
28662
28663 IX86_BUILTIN_CLZS,
28664
28665 /* RTM */
28666 IX86_BUILTIN_XBEGIN,
28667 IX86_BUILTIN_XEND,
28668 IX86_BUILTIN_XABORT,
28669 IX86_BUILTIN_XTEST,
28670
28671 /* BMI instructions. */
28672 IX86_BUILTIN_BEXTR32,
28673 IX86_BUILTIN_BEXTR64,
28674 IX86_BUILTIN_CTZS,
28675
28676 /* TBM instructions. */
28677 IX86_BUILTIN_BEXTRI32,
28678 IX86_BUILTIN_BEXTRI64,
28679
28680 /* BMI2 instructions. */
28681 IX86_BUILTIN_BZHI32,
28682 IX86_BUILTIN_BZHI64,
28683 IX86_BUILTIN_PDEP32,
28684 IX86_BUILTIN_PDEP64,
28685 IX86_BUILTIN_PEXT32,
28686 IX86_BUILTIN_PEXT64,
28687
28688 /* ADX instructions. */
28689 IX86_BUILTIN_ADDCARRYX32,
28690 IX86_BUILTIN_ADDCARRYX64,
28691
28692 /* FSGSBASE instructions. */
28693 IX86_BUILTIN_RDFSBASE32,
28694 IX86_BUILTIN_RDFSBASE64,
28695 IX86_BUILTIN_RDGSBASE32,
28696 IX86_BUILTIN_RDGSBASE64,
28697 IX86_BUILTIN_WRFSBASE32,
28698 IX86_BUILTIN_WRFSBASE64,
28699 IX86_BUILTIN_WRGSBASE32,
28700 IX86_BUILTIN_WRGSBASE64,
28701
28702 /* RDRND instructions. */
28703 IX86_BUILTIN_RDRAND16_STEP,
28704 IX86_BUILTIN_RDRAND32_STEP,
28705 IX86_BUILTIN_RDRAND64_STEP,
28706
28707 /* RDSEED instructions. */
28708 IX86_BUILTIN_RDSEED16_STEP,
28709 IX86_BUILTIN_RDSEED32_STEP,
28710 IX86_BUILTIN_RDSEED64_STEP,
28711
28712 /* F16C instructions. */
28713 IX86_BUILTIN_CVTPH2PS,
28714 IX86_BUILTIN_CVTPH2PS256,
28715 IX86_BUILTIN_CVTPS2PH,
28716 IX86_BUILTIN_CVTPS2PH256,
28717
28718 /* CFString built-in for darwin */
28719 IX86_BUILTIN_CFSTRING,
28720
28721 /* Builtins to get CPU type and supported features. */
28722 IX86_BUILTIN_CPU_INIT,
28723 IX86_BUILTIN_CPU_IS,
28724 IX86_BUILTIN_CPU_SUPPORTS,
28725
28726 /* Read/write FLAGS register built-ins. */
28727 IX86_BUILTIN_READ_FLAGS,
28728 IX86_BUILTIN_WRITE_FLAGS,
28729
28730 IX86_BUILTIN_MAX
28731 };
28732
28733 /* Table for the ix86 builtin decls. */
28734 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28735
28736 /* Table of all of the builtin functions that are possible with different ISA's
28737 but are waiting to be built until a function is declared to use that
28738 ISA. */
28739 struct builtin_isa {
28740 const char *name; /* function name */
28741 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28742 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28743 bool const_p; /* true if the declaration is constant */
28744 bool set_and_not_built_p;
28745 };
28746
28747 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28748
28749
28750 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28751 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28752 function decl in the ix86_builtins array. Returns the function decl or
28753 NULL_TREE, if the builtin was not added.
28754
28755 If the front end has a special hook for builtin functions, delay adding
28756 builtin functions that aren't in the current ISA until the ISA is changed
28757 with function specific optimization. Doing so, can save about 300K for the
28758 default compiler. When the builtin is expanded, check at that time whether
28759 it is valid.
28760
28761 If the front end doesn't have a special hook, record all builtins, even if
28762 it isn't an instruction set in the current ISA in case the user uses
28763 function specific options for a different ISA, so that we don't get scope
28764 errors if a builtin is added in the middle of a function scope. */
28765
28766 static inline tree
28767 def_builtin (HOST_WIDE_INT mask, const char *name,
28768 enum ix86_builtin_func_type tcode,
28769 enum ix86_builtins code)
28770 {
28771 tree decl = NULL_TREE;
28772
28773 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28774 {
28775 ix86_builtins_isa[(int) code].isa = mask;
28776
28777 mask &= ~OPTION_MASK_ISA_64BIT;
28778 if (mask == 0
28779 || (mask & ix86_isa_flags) != 0
28780 || (lang_hooks.builtin_function
28781 == lang_hooks.builtin_function_ext_scope))
28782
28783 {
28784 tree type = ix86_get_builtin_func_type (tcode);
28785 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28786 NULL, NULL_TREE);
28787 ix86_builtins[(int) code] = decl;
28788 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28789 }
28790 else
28791 {
28792 ix86_builtins[(int) code] = NULL_TREE;
28793 ix86_builtins_isa[(int) code].tcode = tcode;
28794 ix86_builtins_isa[(int) code].name = name;
28795 ix86_builtins_isa[(int) code].const_p = false;
28796 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28797 }
28798 }
28799
28800 return decl;
28801 }
28802
28803 /* Like def_builtin, but also marks the function decl "const". */
28804
28805 static inline tree
28806 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28807 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28808 {
28809 tree decl = def_builtin (mask, name, tcode, code);
28810 if (decl)
28811 TREE_READONLY (decl) = 1;
28812 else
28813 ix86_builtins_isa[(int) code].const_p = true;
28814
28815 return decl;
28816 }
28817
28818 /* Add any new builtin functions for a given ISA that may not have been
28819 declared. This saves a bit of space compared to adding all of the
28820 declarations to the tree, even if we didn't use them. */
28821
28822 static void
28823 ix86_add_new_builtins (HOST_WIDE_INT isa)
28824 {
28825 int i;
28826
28827 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28828 {
28829 if ((ix86_builtins_isa[i].isa & isa) != 0
28830 && ix86_builtins_isa[i].set_and_not_built_p)
28831 {
28832 tree decl, type;
28833
28834 /* Don't define the builtin again. */
28835 ix86_builtins_isa[i].set_and_not_built_p = false;
28836
28837 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28838 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28839 type, i, BUILT_IN_MD, NULL,
28840 NULL_TREE);
28841
28842 ix86_builtins[i] = decl;
28843 if (ix86_builtins_isa[i].const_p)
28844 TREE_READONLY (decl) = 1;
28845 }
28846 }
28847 }
28848
28849 /* Bits for builtin_description.flag. */
28850
28851 /* Set when we don't support the comparison natively, and should
28852 swap_comparison in order to support it. */
28853 #define BUILTIN_DESC_SWAP_OPERANDS 1
28854
28855 struct builtin_description
28856 {
28857 const HOST_WIDE_INT mask;
28858 const enum insn_code icode;
28859 const char *const name;
28860 const enum ix86_builtins code;
28861 const enum rtx_code comparison;
28862 const int flag;
28863 };
28864
28865 static const struct builtin_description bdesc_comi[] =
28866 {
28867 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28868 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28871 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28875 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28877 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28878 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28879 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28880 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28881 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28882 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28883 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28884 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28885 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28886 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28888 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28889 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28890 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28891 };
28892
28893 static const struct builtin_description bdesc_pcmpestr[] =
28894 {
28895 /* SSE4.2 */
28896 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28897 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28898 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28899 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28900 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28901 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28902 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28903 };
28904
28905 static const struct builtin_description bdesc_pcmpistr[] =
28906 {
28907 /* SSE4.2 */
28908 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28909 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28910 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28911 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28912 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28913 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28914 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28915 };
28916
28917 /* Special builtins with variable number of arguments. */
28918 static const struct builtin_description bdesc_special_args[] =
28919 {
28920 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28921 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28922 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28923
28924 /* 80387 (for use internally for atomic compound assignment). */
28925 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28926 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28927 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
28928 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28929
28930 /* MMX */
28931 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28932
28933 /* 3DNow! */
28934 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28935
28936 /* FXSR, XSAVE and XSAVEOPT */
28937 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
28938 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
28939 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28940 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28941 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28942
28943 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28944 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28945 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28946 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28947 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28948
28949 /* SSE */
28950 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28951 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28952 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
28953
28954 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28955 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28956 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28957 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28958
28959 /* SSE or 3DNow!A */
28960 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28961 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
28962
28963 /* SSE2 */
28964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
28968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
28970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
28971 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
28972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
28973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
28974
28975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
28976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
28977
28978 /* SSE3 */
28979 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
28980
28981 /* SSE4.1 */
28982 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
28983
28984 /* SSE4A */
28985 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28986 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28987
28988 /* AVX */
28989 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
28990 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
28991
28992 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
28993 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
28994 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
28995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
28996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
28997
28998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
28999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29005
29006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29009
29010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29018
29019 /* AVX2 */
29020 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29021 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29022 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29023 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29024 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29025 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29026 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29027 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29028 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29029
29030 /* AVX512F */
29031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29078
29079 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29080 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29081 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29082 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29083 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29084 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29085
29086 /* FSGSBASE */
29087 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29088 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29089 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29090 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29091 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29092 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29093 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29094 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29095
29096 /* RTM */
29097 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29098 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29099 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29100 };
29101
29102 /* Builtins with variable number of arguments. */
29103 static const struct builtin_description bdesc_args[] =
29104 {
29105 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29106 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29107 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29108 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29109 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29110 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29111 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29112
29113 /* MMX */
29114 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29115 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29117 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29118 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29120
29121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29127 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29129
29130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29132
29133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29134 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29137
29138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29144
29145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29151
29152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29155
29156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29157
29158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29161 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29164
29165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29171
29172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29176
29177 /* 3DNow! */
29178 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29179 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29180 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29181 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29182
29183 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29184 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29185 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29186 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29187 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29188 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29189 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29190 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29191 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29192 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29193 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29194 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29195 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29196 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29197 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29198
29199 /* 3DNow!A */
29200 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29201 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29202 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29203 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29204 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29205 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29206
29207 /* SSE */
29208 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29209 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29210 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29211 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29212 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29213 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29214 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29216 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29219 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29220
29221 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29222
29223 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29224 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29225 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29226 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29227 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29228 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29229 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29231
29232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29233 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29252
29253 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29254 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29257
29258 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29259 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29260 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29261 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29262
29263 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29264
29265 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29268 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29269 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29270
29271 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29272 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29273 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29274
29275 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29276
29277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29280
29281 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29282 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29283
29284 /* SSE MMX or 3Dnow!A */
29285 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29286 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29287 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29288
29289 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29290 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29291 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29292 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29293
29294 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29295 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29296
29297 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29298
29299 /* SSE2 */
29300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29301
29302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29303 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29306 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29307
29308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29313
29314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29315
29316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29318 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29319 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29320
29321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29323 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29324
29325 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29326 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29327 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29328 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29333
29334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29354
29355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29356 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29359
29360 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29362 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29363 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29364
29365 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29366
29367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29368 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29369 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29370
29371 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29372
29373 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29374 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29375 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29376 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29377 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29378 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29379 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29380 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29381
29382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29390
29391 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29392 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29393
29394 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29396 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29397 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29398
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29401
29402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29408
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29413
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29422
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29426
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29429
29430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29432
29433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29434
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29436 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29439
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29447
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29455
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29460
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29464
29465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29466
29467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29468
29469 /* SSE2 MMX */
29470 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29471 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29472
29473 /* SSE3 */
29474 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29475 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29476
29477 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29478 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29479 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29480 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29481 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29482 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29483
29484 /* SSSE3 */
29485 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29486 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29487 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29488 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29489 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29490 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29491
29492 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29493 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29494 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29495 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29496 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29497 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29498 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29499 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29500 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29501 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29502 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29503 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29504 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29507 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29508 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29510 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29512 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29516
29517 /* SSSE3. */
29518 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29519 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29520
29521 /* SSE4.1 */
29522 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29523 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29524 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29525 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29526 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29527 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29528 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29529 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29530 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29531 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29532
29533 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29534 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29535 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29536 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29537 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29538 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29539 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29540 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29546
29547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29551 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29553 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29558 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29559
29560 /* SSE4.1 */
29561 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29562 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29563 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29564 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29565
29566 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29567 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29568 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29569 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29570
29571 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29572 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29573
29574 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29575 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29576
29577 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29578 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29579 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29580 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29581
29582 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29583 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29584
29585 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29586 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29587
29588 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29589 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29590 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29591
29592 /* SSE4.2 */
29593 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29594 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29595 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29596 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29597 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29598
29599 /* SSE4A */
29600 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29601 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29602 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29603 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29604
29605 /* AES */
29606 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29607 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29608
29609 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29610 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29611 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29612 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29613
29614 /* PCLMUL */
29615 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29616
29617 /* AVX */
29618 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29619 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29620 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29622 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29623 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29626 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29632 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29633 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29634 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29635 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29636 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29637 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29638 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29639 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29640 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29641 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29642 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29643 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29644
29645 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29646 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29649
29650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29666 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29667 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29671 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29673 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29684
29685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29686 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29688
29689 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29691 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29693 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29694
29695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29696
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29699
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29704
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29707
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29710
29711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29715
29716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29718
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29720 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29721
29722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29726
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29733
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29749
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29752
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29755
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29757
29758 /* AVX2 */
29759 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29760 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29761 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29762 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29763 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29764 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29767 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29768 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29769 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29770 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29771 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29772 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29776 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29795 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29796 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29798 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29799 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29800 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29801 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29802 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29803 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29804 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29805 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29806 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29807 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29808 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29809 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29810 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29811 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29812 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29813 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29820 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29823 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29824 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29825 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29826 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29827 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29828 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29829 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29830 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29831 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29832 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29833 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29834 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29905
29906 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29907
29908 /* BMI */
29909 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29910 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29911 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29912
29913 /* TBM */
29914 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29915 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29916
29917 /* F16C */
29918 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29919 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29920 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29921 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29922
29923 /* BMI2 */
29924 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29925 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29926 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29927 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29928 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29929 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29930
29931 /* AVX512F */
29932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
29933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
29934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
29940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
29941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
29942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
29943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
29945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
29946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
29950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
29952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
29957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
29958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
29959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
29960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
29961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
29962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
29963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
29964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
29980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
29981 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
29982 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
29983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
29984 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
29985 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
29986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
29987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
29988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
29989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
29990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30093 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30094 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30095 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30096 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30123
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30128 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30132
30133 /* Mask arithmetic operations */
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30144
30145 /* SHA */
30146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30153 };
30154
30155 /* Builtins with rounding support. */
30156 static const struct builtin_description bdesc_round_args[] =
30157 {
30158 /* AVX512F */
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30178 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30180 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30187 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30189 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30239 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30241 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30243 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30245 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30247 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30249 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30251 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30253 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30278
30279 /* AVX512ER */
30280 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30281 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30282 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30283 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30284 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30285 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30286 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30287 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30288 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30289 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30290 };
30291
30292 /* FMA4 and XOP. */
30293 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30294 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30295 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30296 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30297 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30298 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30299 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30300 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30301 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30302 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30303 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30304 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30305 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30306 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30307 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30308 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30309 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30310 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30311 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30312 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30313 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30314 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30315 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30316 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30317 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30318 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30319 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30320 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30321 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30322 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30323 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30324 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30325 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30326 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30327 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30328 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30329 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30330 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30331 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30332 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30333 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30334 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30335 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30336 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30337 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30338 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30339 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30340 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30341 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30342 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30343 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30344 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30345
30346 static const struct builtin_description bdesc_multi_arg[] =
30347 {
30348 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30349 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30350 UNKNOWN, (int)MULTI_ARG_3_SF },
30351 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30352 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30353 UNKNOWN, (int)MULTI_ARG_3_DF },
30354
30355 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30356 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30357 UNKNOWN, (int)MULTI_ARG_3_SF },
30358 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30359 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30360 UNKNOWN, (int)MULTI_ARG_3_DF },
30361
30362 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30363 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30364 UNKNOWN, (int)MULTI_ARG_3_SF },
30365 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30366 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30367 UNKNOWN, (int)MULTI_ARG_3_DF },
30368 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30369 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30370 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30371 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30372 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30373 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30374
30375 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30376 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30377 UNKNOWN, (int)MULTI_ARG_3_SF },
30378 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30379 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30380 UNKNOWN, (int)MULTI_ARG_3_DF },
30381 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30382 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30383 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30384 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30385 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30386 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30387
30388 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30389 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30395
30396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30403
30404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30405
30406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30418
30419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30435
30436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30442
30443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30458
30459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30466
30467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30474
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30482
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30490
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30498
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30506
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30514
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30522
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30531
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30540
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30545
30546 };
30547 \f
30548 /* TM vector builtins. */
30549
30550 /* Reuse the existing x86-specific `struct builtin_description' cause
30551 we're lazy. Add casts to make them fit. */
30552 static const struct builtin_description bdesc_tm[] =
30553 {
30554 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30555 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30556 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30557 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30558 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30559 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30560 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30561
30562 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30563 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30564 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30565 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30566 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30567 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30568 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30569
30570 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30571 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30572 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30573 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30574 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30575 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30576 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30577
30578 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30579 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30580 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30581 };
30582
30583 /* TM callbacks. */
30584
30585 /* Return the builtin decl needed to load a vector of TYPE. */
30586
30587 static tree
30588 ix86_builtin_tm_load (tree type)
30589 {
30590 if (TREE_CODE (type) == VECTOR_TYPE)
30591 {
30592 switch (tree_to_uhwi (TYPE_SIZE (type)))
30593 {
30594 case 64:
30595 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30596 case 128:
30597 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30598 case 256:
30599 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30600 }
30601 }
30602 return NULL_TREE;
30603 }
30604
30605 /* Return the builtin decl needed to store a vector of TYPE. */
30606
30607 static tree
30608 ix86_builtin_tm_store (tree type)
30609 {
30610 if (TREE_CODE (type) == VECTOR_TYPE)
30611 {
30612 switch (tree_to_uhwi (TYPE_SIZE (type)))
30613 {
30614 case 64:
30615 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30616 case 128:
30617 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30618 case 256:
30619 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30620 }
30621 }
30622 return NULL_TREE;
30623 }
30624 \f
30625 /* Initialize the transactional memory vector load/store builtins. */
30626
30627 static void
30628 ix86_init_tm_builtins (void)
30629 {
30630 enum ix86_builtin_func_type ftype;
30631 const struct builtin_description *d;
30632 size_t i;
30633 tree decl;
30634 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30635 tree attrs_log, attrs_type_log;
30636
30637 if (!flag_tm)
30638 return;
30639
30640 /* If there are no builtins defined, we must be compiling in a
30641 language without trans-mem support. */
30642 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30643 return;
30644
30645 /* Use whatever attributes a normal TM load has. */
30646 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30647 attrs_load = DECL_ATTRIBUTES (decl);
30648 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30649 /* Use whatever attributes a normal TM store has. */
30650 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30651 attrs_store = DECL_ATTRIBUTES (decl);
30652 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30653 /* Use whatever attributes a normal TM log has. */
30654 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30655 attrs_log = DECL_ATTRIBUTES (decl);
30656 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30657
30658 for (i = 0, d = bdesc_tm;
30659 i < ARRAY_SIZE (bdesc_tm);
30660 i++, d++)
30661 {
30662 if ((d->mask & ix86_isa_flags) != 0
30663 || (lang_hooks.builtin_function
30664 == lang_hooks.builtin_function_ext_scope))
30665 {
30666 tree type, attrs, attrs_type;
30667 enum built_in_function code = (enum built_in_function) d->code;
30668
30669 ftype = (enum ix86_builtin_func_type) d->flag;
30670 type = ix86_get_builtin_func_type (ftype);
30671
30672 if (BUILTIN_TM_LOAD_P (code))
30673 {
30674 attrs = attrs_load;
30675 attrs_type = attrs_type_load;
30676 }
30677 else if (BUILTIN_TM_STORE_P (code))
30678 {
30679 attrs = attrs_store;
30680 attrs_type = attrs_type_store;
30681 }
30682 else
30683 {
30684 attrs = attrs_log;
30685 attrs_type = attrs_type_log;
30686 }
30687 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30688 /* The builtin without the prefix for
30689 calling it directly. */
30690 d->name + strlen ("__builtin_"),
30691 attrs);
30692 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30693 set the TYPE_ATTRIBUTES. */
30694 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30695
30696 set_builtin_decl (code, decl, false);
30697 }
30698 }
30699 }
30700
30701 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30702 in the current target ISA to allow the user to compile particular modules
30703 with different target specific options that differ from the command line
30704 options. */
30705 static void
30706 ix86_init_mmx_sse_builtins (void)
30707 {
30708 const struct builtin_description * d;
30709 enum ix86_builtin_func_type ftype;
30710 size_t i;
30711
30712 /* Add all special builtins with variable number of operands. */
30713 for (i = 0, d = bdesc_special_args;
30714 i < ARRAY_SIZE (bdesc_special_args);
30715 i++, d++)
30716 {
30717 if (d->name == 0)
30718 continue;
30719
30720 ftype = (enum ix86_builtin_func_type) d->flag;
30721 def_builtin (d->mask, d->name, ftype, d->code);
30722 }
30723
30724 /* Add all builtins with variable number of operands. */
30725 for (i = 0, d = bdesc_args;
30726 i < ARRAY_SIZE (bdesc_args);
30727 i++, d++)
30728 {
30729 if (d->name == 0)
30730 continue;
30731
30732 ftype = (enum ix86_builtin_func_type) d->flag;
30733 def_builtin_const (d->mask, d->name, ftype, d->code);
30734 }
30735
30736 /* Add all builtins with rounding. */
30737 for (i = 0, d = bdesc_round_args;
30738 i < ARRAY_SIZE (bdesc_round_args);
30739 i++, d++)
30740 {
30741 if (d->name == 0)
30742 continue;
30743
30744 ftype = (enum ix86_builtin_func_type) d->flag;
30745 def_builtin_const (d->mask, d->name, ftype, d->code);
30746 }
30747
30748 /* pcmpestr[im] insns. */
30749 for (i = 0, d = bdesc_pcmpestr;
30750 i < ARRAY_SIZE (bdesc_pcmpestr);
30751 i++, d++)
30752 {
30753 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30754 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30755 else
30756 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30757 def_builtin_const (d->mask, d->name, ftype, d->code);
30758 }
30759
30760 /* pcmpistr[im] insns. */
30761 for (i = 0, d = bdesc_pcmpistr;
30762 i < ARRAY_SIZE (bdesc_pcmpistr);
30763 i++, d++)
30764 {
30765 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30766 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30767 else
30768 ftype = INT_FTYPE_V16QI_V16QI_INT;
30769 def_builtin_const (d->mask, d->name, ftype, d->code);
30770 }
30771
30772 /* comi/ucomi insns. */
30773 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30774 {
30775 if (d->mask == OPTION_MASK_ISA_SSE2)
30776 ftype = INT_FTYPE_V2DF_V2DF;
30777 else
30778 ftype = INT_FTYPE_V4SF_V4SF;
30779 def_builtin_const (d->mask, d->name, ftype, d->code);
30780 }
30781
30782 /* SSE */
30783 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30784 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30785 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30786 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30787
30788 /* SSE or 3DNow!A */
30789 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30790 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30791 IX86_BUILTIN_MASKMOVQ);
30792
30793 /* SSE2 */
30794 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30795 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30796
30797 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30798 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30799 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30800 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30801
30802 /* SSE3. */
30803 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30804 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30805 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30806 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30807
30808 /* AES */
30809 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30810 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30811 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30812 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30813 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30814 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30815 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30816 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30817 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30818 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30819 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30820 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30821
30822 /* PCLMUL */
30823 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30824 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30825
30826 /* RDRND */
30827 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30828 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30829 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30830 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30831 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30832 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30833 IX86_BUILTIN_RDRAND64_STEP);
30834
30835 /* AVX2 */
30836 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30837 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30838 IX86_BUILTIN_GATHERSIV2DF);
30839
30840 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30841 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30842 IX86_BUILTIN_GATHERSIV4DF);
30843
30844 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30845 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30846 IX86_BUILTIN_GATHERDIV2DF);
30847
30848 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30849 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30850 IX86_BUILTIN_GATHERDIV4DF);
30851
30852 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30853 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30854 IX86_BUILTIN_GATHERSIV4SF);
30855
30856 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30857 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30858 IX86_BUILTIN_GATHERSIV8SF);
30859
30860 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30861 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30862 IX86_BUILTIN_GATHERDIV4SF);
30863
30864 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30865 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30866 IX86_BUILTIN_GATHERDIV8SF);
30867
30868 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30869 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30870 IX86_BUILTIN_GATHERSIV2DI);
30871
30872 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30873 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30874 IX86_BUILTIN_GATHERSIV4DI);
30875
30876 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30877 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30878 IX86_BUILTIN_GATHERDIV2DI);
30879
30880 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30881 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30882 IX86_BUILTIN_GATHERDIV4DI);
30883
30884 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30885 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30886 IX86_BUILTIN_GATHERSIV4SI);
30887
30888 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30889 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30890 IX86_BUILTIN_GATHERSIV8SI);
30891
30892 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30893 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30894 IX86_BUILTIN_GATHERDIV4SI);
30895
30896 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30897 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30898 IX86_BUILTIN_GATHERDIV8SI);
30899
30900 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30901 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30902 IX86_BUILTIN_GATHERALTSIV4DF);
30903
30904 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30905 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30906 IX86_BUILTIN_GATHERALTDIV8SF);
30907
30908 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30909 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30910 IX86_BUILTIN_GATHERALTSIV4DI);
30911
30912 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30913 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30914 IX86_BUILTIN_GATHERALTDIV8SI);
30915
30916 /* AVX512F */
30917 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30918 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30919 IX86_BUILTIN_GATHER3SIV16SF);
30920
30921 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30922 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
30923 IX86_BUILTIN_GATHER3SIV8DF);
30924
30925 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30926 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
30927 IX86_BUILTIN_GATHER3DIV16SF);
30928
30929 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
30930 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
30931 IX86_BUILTIN_GATHER3DIV8DF);
30932
30933 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
30934 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
30935 IX86_BUILTIN_GATHER3SIV16SI);
30936
30937 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
30938 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
30939 IX86_BUILTIN_GATHER3SIV8DI);
30940
30941 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
30942 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
30943 IX86_BUILTIN_GATHER3DIV16SI);
30944
30945 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
30946 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
30947 IX86_BUILTIN_GATHER3DIV8DI);
30948
30949 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
30950 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
30951 IX86_BUILTIN_GATHER3ALTSIV8DF);
30952
30953 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
30954 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
30955 IX86_BUILTIN_GATHER3ALTDIV16SF);
30956
30957 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
30958 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
30959 IX86_BUILTIN_GATHER3ALTSIV8DI);
30960
30961 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
30962 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
30963 IX86_BUILTIN_GATHER3ALTDIV16SI);
30964
30965 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
30966 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
30967 IX86_BUILTIN_SCATTERSIV16SF);
30968
30969 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
30970 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
30971 IX86_BUILTIN_SCATTERSIV8DF);
30972
30973 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
30974 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
30975 IX86_BUILTIN_SCATTERDIV16SF);
30976
30977 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
30978 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
30979 IX86_BUILTIN_SCATTERDIV8DF);
30980
30981 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
30982 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
30983 IX86_BUILTIN_SCATTERSIV16SI);
30984
30985 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
30986 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
30987 IX86_BUILTIN_SCATTERSIV8DI);
30988
30989 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
30990 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
30991 IX86_BUILTIN_SCATTERDIV16SI);
30992
30993 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
30994 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
30995 IX86_BUILTIN_SCATTERDIV8DI);
30996
30997 /* AVX512PF */
30998 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
30999 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31000 IX86_BUILTIN_GATHERPFDPD);
31001 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31002 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31003 IX86_BUILTIN_GATHERPFDPS);
31004 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31005 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31006 IX86_BUILTIN_GATHERPFQPD);
31007 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31008 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31009 IX86_BUILTIN_GATHERPFQPS);
31010 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31011 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31012 IX86_BUILTIN_SCATTERPFDPD);
31013 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31014 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31015 IX86_BUILTIN_SCATTERPFDPS);
31016 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31017 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31018 IX86_BUILTIN_SCATTERPFQPD);
31019 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31020 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31021 IX86_BUILTIN_SCATTERPFQPS);
31022
31023 /* SHA */
31024 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31025 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31026 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31027 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31028 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31029 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31030 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31031 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31032 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31033 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31034 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31035 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31036 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31037 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31038
31039 /* RTM. */
31040 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31041 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31042
31043 /* MMX access to the vec_init patterns. */
31044 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31045 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31046
31047 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31048 V4HI_FTYPE_HI_HI_HI_HI,
31049 IX86_BUILTIN_VEC_INIT_V4HI);
31050
31051 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31052 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31053 IX86_BUILTIN_VEC_INIT_V8QI);
31054
31055 /* Access to the vec_extract patterns. */
31056 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31057 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31058 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31059 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31060 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31061 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31062 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31063 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31064 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31065 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31066
31067 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31068 "__builtin_ia32_vec_ext_v4hi",
31069 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31070
31071 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31072 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31073
31074 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31075 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31076
31077 /* Access to the vec_set patterns. */
31078 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31079 "__builtin_ia32_vec_set_v2di",
31080 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31081
31082 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31083 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31084
31085 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31086 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31087
31088 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31089 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31090
31091 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31092 "__builtin_ia32_vec_set_v4hi",
31093 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31094
31095 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31096 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31097
31098 /* RDSEED */
31099 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31100 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31101 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31102 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31103 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31104 "__builtin_ia32_rdseed_di_step",
31105 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31106
31107 /* ADCX */
31108 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31109 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31110 def_builtin (OPTION_MASK_ISA_64BIT,
31111 "__builtin_ia32_addcarryx_u64",
31112 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31113 IX86_BUILTIN_ADDCARRYX64);
31114
31115 /* Read/write FLAGS. */
31116 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31117 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31118 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31119 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31120 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31121 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31122 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31123 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31124
31125
31126 /* Add FMA4 multi-arg argument instructions */
31127 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31128 {
31129 if (d->name == 0)
31130 continue;
31131
31132 ftype = (enum ix86_builtin_func_type) d->flag;
31133 def_builtin_const (d->mask, d->name, ftype, d->code);
31134 }
31135 }
31136
31137 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31138 to return a pointer to VERSION_DECL if the outcome of the expression
31139 formed by PREDICATE_CHAIN is true. This function will be called during
31140 version dispatch to decide which function version to execute. It returns
31141 the basic block at the end, to which more conditions can be added. */
31142
31143 static basic_block
31144 add_condition_to_bb (tree function_decl, tree version_decl,
31145 tree predicate_chain, basic_block new_bb)
31146 {
31147 gimple return_stmt;
31148 tree convert_expr, result_var;
31149 gimple convert_stmt;
31150 gimple call_cond_stmt;
31151 gimple if_else_stmt;
31152
31153 basic_block bb1, bb2, bb3;
31154 edge e12, e23;
31155
31156 tree cond_var, and_expr_var = NULL_TREE;
31157 gimple_seq gseq;
31158
31159 tree predicate_decl, predicate_arg;
31160
31161 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31162
31163 gcc_assert (new_bb != NULL);
31164 gseq = bb_seq (new_bb);
31165
31166
31167 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31168 build_fold_addr_expr (version_decl));
31169 result_var = create_tmp_var (ptr_type_node, NULL);
31170 convert_stmt = gimple_build_assign (result_var, convert_expr);
31171 return_stmt = gimple_build_return (result_var);
31172
31173 if (predicate_chain == NULL_TREE)
31174 {
31175 gimple_seq_add_stmt (&gseq, convert_stmt);
31176 gimple_seq_add_stmt (&gseq, return_stmt);
31177 set_bb_seq (new_bb, gseq);
31178 gimple_set_bb (convert_stmt, new_bb);
31179 gimple_set_bb (return_stmt, new_bb);
31180 pop_cfun ();
31181 return new_bb;
31182 }
31183
31184 while (predicate_chain != NULL)
31185 {
31186 cond_var = create_tmp_var (integer_type_node, NULL);
31187 predicate_decl = TREE_PURPOSE (predicate_chain);
31188 predicate_arg = TREE_VALUE (predicate_chain);
31189 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31190 gimple_call_set_lhs (call_cond_stmt, cond_var);
31191
31192 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31193 gimple_set_bb (call_cond_stmt, new_bb);
31194 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31195
31196 predicate_chain = TREE_CHAIN (predicate_chain);
31197
31198 if (and_expr_var == NULL)
31199 and_expr_var = cond_var;
31200 else
31201 {
31202 gimple assign_stmt;
31203 /* Use MIN_EXPR to check if any integer is zero?.
31204 and_expr_var = min_expr <cond_var, and_expr_var> */
31205 assign_stmt = gimple_build_assign (and_expr_var,
31206 build2 (MIN_EXPR, integer_type_node,
31207 cond_var, and_expr_var));
31208
31209 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31210 gimple_set_bb (assign_stmt, new_bb);
31211 gimple_seq_add_stmt (&gseq, assign_stmt);
31212 }
31213 }
31214
31215 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31216 integer_zero_node,
31217 NULL_TREE, NULL_TREE);
31218 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31219 gimple_set_bb (if_else_stmt, new_bb);
31220 gimple_seq_add_stmt (&gseq, if_else_stmt);
31221
31222 gimple_seq_add_stmt (&gseq, convert_stmt);
31223 gimple_seq_add_stmt (&gseq, return_stmt);
31224 set_bb_seq (new_bb, gseq);
31225
31226 bb1 = new_bb;
31227 e12 = split_block (bb1, if_else_stmt);
31228 bb2 = e12->dest;
31229 e12->flags &= ~EDGE_FALLTHRU;
31230 e12->flags |= EDGE_TRUE_VALUE;
31231
31232 e23 = split_block (bb2, return_stmt);
31233
31234 gimple_set_bb (convert_stmt, bb2);
31235 gimple_set_bb (return_stmt, bb2);
31236
31237 bb3 = e23->dest;
31238 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31239
31240 remove_edge (e23);
31241 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31242
31243 pop_cfun ();
31244
31245 return bb3;
31246 }
31247
31248 /* This parses the attribute arguments to target in DECL and determines
31249 the right builtin to use to match the platform specification.
31250 It returns the priority value for this version decl. If PREDICATE_LIST
31251 is not NULL, it stores the list of cpu features that need to be checked
31252 before dispatching this function. */
31253
31254 static unsigned int
31255 get_builtin_code_for_version (tree decl, tree *predicate_list)
31256 {
31257 tree attrs;
31258 struct cl_target_option cur_target;
31259 tree target_node;
31260 struct cl_target_option *new_target;
31261 const char *arg_str = NULL;
31262 const char *attrs_str = NULL;
31263 char *tok_str = NULL;
31264 char *token;
31265
31266 /* Priority of i386 features, greater value is higher priority. This is
31267 used to decide the order in which function dispatch must happen. For
31268 instance, a version specialized for SSE4.2 should be checked for dispatch
31269 before a version for SSE3, as SSE4.2 implies SSE3. */
31270 enum feature_priority
31271 {
31272 P_ZERO = 0,
31273 P_MMX,
31274 P_SSE,
31275 P_SSE2,
31276 P_SSE3,
31277 P_SSSE3,
31278 P_PROC_SSSE3,
31279 P_SSE4_A,
31280 P_PROC_SSE4_A,
31281 P_SSE4_1,
31282 P_SSE4_2,
31283 P_PROC_SSE4_2,
31284 P_POPCNT,
31285 P_AVX,
31286 P_PROC_AVX,
31287 P_FMA4,
31288 P_XOP,
31289 P_PROC_XOP,
31290 P_FMA,
31291 P_PROC_FMA,
31292 P_AVX2,
31293 P_PROC_AVX2
31294 };
31295
31296 enum feature_priority priority = P_ZERO;
31297
31298 /* These are the target attribute strings for which a dispatcher is
31299 available, from fold_builtin_cpu. */
31300
31301 static struct _feature_list
31302 {
31303 const char *const name;
31304 const enum feature_priority priority;
31305 }
31306 const feature_list[] =
31307 {
31308 {"mmx", P_MMX},
31309 {"sse", P_SSE},
31310 {"sse2", P_SSE2},
31311 {"sse3", P_SSE3},
31312 {"sse4a", P_SSE4_A},
31313 {"ssse3", P_SSSE3},
31314 {"sse4.1", P_SSE4_1},
31315 {"sse4.2", P_SSE4_2},
31316 {"popcnt", P_POPCNT},
31317 {"avx", P_AVX},
31318 {"fma4", P_FMA4},
31319 {"xop", P_XOP},
31320 {"fma", P_FMA},
31321 {"avx2", P_AVX2}
31322 };
31323
31324
31325 static unsigned int NUM_FEATURES
31326 = sizeof (feature_list) / sizeof (struct _feature_list);
31327
31328 unsigned int i;
31329
31330 tree predicate_chain = NULL_TREE;
31331 tree predicate_decl, predicate_arg;
31332
31333 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31334 gcc_assert (attrs != NULL);
31335
31336 attrs = TREE_VALUE (TREE_VALUE (attrs));
31337
31338 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31339 attrs_str = TREE_STRING_POINTER (attrs);
31340
31341 /* Return priority zero for default function. */
31342 if (strcmp (attrs_str, "default") == 0)
31343 return 0;
31344
31345 /* Handle arch= if specified. For priority, set it to be 1 more than
31346 the best instruction set the processor can handle. For instance, if
31347 there is a version for atom and a version for ssse3 (the highest ISA
31348 priority for atom), the atom version must be checked for dispatch
31349 before the ssse3 version. */
31350 if (strstr (attrs_str, "arch=") != NULL)
31351 {
31352 cl_target_option_save (&cur_target, &global_options);
31353 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31354 &global_options_set);
31355
31356 gcc_assert (target_node);
31357 new_target = TREE_TARGET_OPTION (target_node);
31358 gcc_assert (new_target);
31359
31360 if (new_target->arch_specified && new_target->arch > 0)
31361 {
31362 switch (new_target->arch)
31363 {
31364 case PROCESSOR_CORE2:
31365 arg_str = "core2";
31366 priority = P_PROC_SSSE3;
31367 break;
31368 case PROCESSOR_NEHALEM:
31369 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31370 arg_str = "westmere";
31371 else
31372 /* We translate "arch=corei7" and "arch=nehalem" to
31373 "corei7" so that it will be mapped to M_INTEL_COREI7
31374 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31375 arg_str = "corei7";
31376 priority = P_PROC_SSE4_2;
31377 break;
31378 case PROCESSOR_SANDYBRIDGE:
31379 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31380 arg_str = "ivybridge";
31381 else
31382 arg_str = "sandybridge";
31383 priority = P_PROC_AVX;
31384 break;
31385 case PROCESSOR_HASWELL:
31386 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31387 arg_str = "broadwell";
31388 else
31389 arg_str = "haswell";
31390 priority = P_PROC_AVX2;
31391 break;
31392 case PROCESSOR_BONNELL:
31393 arg_str = "bonnell";
31394 priority = P_PROC_SSSE3;
31395 break;
31396 case PROCESSOR_SILVERMONT:
31397 arg_str = "silvermont";
31398 priority = P_PROC_SSE4_2;
31399 break;
31400 case PROCESSOR_AMDFAM10:
31401 arg_str = "amdfam10h";
31402 priority = P_PROC_SSE4_A;
31403 break;
31404 case PROCESSOR_BTVER1:
31405 arg_str = "btver1";
31406 priority = P_PROC_SSE4_A;
31407 break;
31408 case PROCESSOR_BTVER2:
31409 arg_str = "btver2";
31410 priority = P_PROC_AVX;
31411 break;
31412 case PROCESSOR_BDVER1:
31413 arg_str = "bdver1";
31414 priority = P_PROC_XOP;
31415 break;
31416 case PROCESSOR_BDVER2:
31417 arg_str = "bdver2";
31418 priority = P_PROC_FMA;
31419 break;
31420 case PROCESSOR_BDVER3:
31421 arg_str = "bdver3";
31422 priority = P_PROC_FMA;
31423 break;
31424 case PROCESSOR_BDVER4:
31425 arg_str = "bdver4";
31426 priority = P_PROC_AVX2;
31427 break;
31428 }
31429 }
31430
31431 cl_target_option_restore (&global_options, &cur_target);
31432
31433 if (predicate_list && arg_str == NULL)
31434 {
31435 error_at (DECL_SOURCE_LOCATION (decl),
31436 "No dispatcher found for the versioning attributes");
31437 return 0;
31438 }
31439
31440 if (predicate_list)
31441 {
31442 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31443 /* For a C string literal the length includes the trailing NULL. */
31444 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31445 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31446 predicate_chain);
31447 }
31448 }
31449
31450 /* Process feature name. */
31451 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31452 strcpy (tok_str, attrs_str);
31453 token = strtok (tok_str, ",");
31454 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31455
31456 while (token != NULL)
31457 {
31458 /* Do not process "arch=" */
31459 if (strncmp (token, "arch=", 5) == 0)
31460 {
31461 token = strtok (NULL, ",");
31462 continue;
31463 }
31464 for (i = 0; i < NUM_FEATURES; ++i)
31465 {
31466 if (strcmp (token, feature_list[i].name) == 0)
31467 {
31468 if (predicate_list)
31469 {
31470 predicate_arg = build_string_literal (
31471 strlen (feature_list[i].name) + 1,
31472 feature_list[i].name);
31473 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31474 predicate_chain);
31475 }
31476 /* Find the maximum priority feature. */
31477 if (feature_list[i].priority > priority)
31478 priority = feature_list[i].priority;
31479
31480 break;
31481 }
31482 }
31483 if (predicate_list && i == NUM_FEATURES)
31484 {
31485 error_at (DECL_SOURCE_LOCATION (decl),
31486 "No dispatcher found for %s", token);
31487 return 0;
31488 }
31489 token = strtok (NULL, ",");
31490 }
31491 free (tok_str);
31492
31493 if (predicate_list && predicate_chain == NULL_TREE)
31494 {
31495 error_at (DECL_SOURCE_LOCATION (decl),
31496 "No dispatcher found for the versioning attributes : %s",
31497 attrs_str);
31498 return 0;
31499 }
31500 else if (predicate_list)
31501 {
31502 predicate_chain = nreverse (predicate_chain);
31503 *predicate_list = predicate_chain;
31504 }
31505
31506 return priority;
31507 }
31508
31509 /* This compares the priority of target features in function DECL1
31510 and DECL2. It returns positive value if DECL1 is higher priority,
31511 negative value if DECL2 is higher priority and 0 if they are the
31512 same. */
31513
31514 static int
31515 ix86_compare_version_priority (tree decl1, tree decl2)
31516 {
31517 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31518 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31519
31520 return (int)priority1 - (int)priority2;
31521 }
31522
31523 /* V1 and V2 point to function versions with different priorities
31524 based on the target ISA. This function compares their priorities. */
31525
31526 static int
31527 feature_compare (const void *v1, const void *v2)
31528 {
31529 typedef struct _function_version_info
31530 {
31531 tree version_decl;
31532 tree predicate_chain;
31533 unsigned int dispatch_priority;
31534 } function_version_info;
31535
31536 const function_version_info c1 = *(const function_version_info *)v1;
31537 const function_version_info c2 = *(const function_version_info *)v2;
31538 return (c2.dispatch_priority - c1.dispatch_priority);
31539 }
31540
31541 /* This function generates the dispatch function for
31542 multi-versioned functions. DISPATCH_DECL is the function which will
31543 contain the dispatch logic. FNDECLS are the function choices for
31544 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31545 in DISPATCH_DECL in which the dispatch code is generated. */
31546
31547 static int
31548 dispatch_function_versions (tree dispatch_decl,
31549 void *fndecls_p,
31550 basic_block *empty_bb)
31551 {
31552 tree default_decl;
31553 gimple ifunc_cpu_init_stmt;
31554 gimple_seq gseq;
31555 int ix;
31556 tree ele;
31557 vec<tree> *fndecls;
31558 unsigned int num_versions = 0;
31559 unsigned int actual_versions = 0;
31560 unsigned int i;
31561
31562 struct _function_version_info
31563 {
31564 tree version_decl;
31565 tree predicate_chain;
31566 unsigned int dispatch_priority;
31567 }*function_version_info;
31568
31569 gcc_assert (dispatch_decl != NULL
31570 && fndecls_p != NULL
31571 && empty_bb != NULL);
31572
31573 /*fndecls_p is actually a vector. */
31574 fndecls = static_cast<vec<tree> *> (fndecls_p);
31575
31576 /* At least one more version other than the default. */
31577 num_versions = fndecls->length ();
31578 gcc_assert (num_versions >= 2);
31579
31580 function_version_info = (struct _function_version_info *)
31581 XNEWVEC (struct _function_version_info, (num_versions - 1));
31582
31583 /* The first version in the vector is the default decl. */
31584 default_decl = (*fndecls)[0];
31585
31586 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31587
31588 gseq = bb_seq (*empty_bb);
31589 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31590 constructors, so explicity call __builtin_cpu_init here. */
31591 ifunc_cpu_init_stmt = gimple_build_call_vec (
31592 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31593 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31594 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31595 set_bb_seq (*empty_bb, gseq);
31596
31597 pop_cfun ();
31598
31599
31600 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31601 {
31602 tree version_decl = ele;
31603 tree predicate_chain = NULL_TREE;
31604 unsigned int priority;
31605 /* Get attribute string, parse it and find the right predicate decl.
31606 The predicate function could be a lengthy combination of many
31607 features, like arch-type and various isa-variants. */
31608 priority = get_builtin_code_for_version (version_decl,
31609 &predicate_chain);
31610
31611 if (predicate_chain == NULL_TREE)
31612 continue;
31613
31614 function_version_info [actual_versions].version_decl = version_decl;
31615 function_version_info [actual_versions].predicate_chain
31616 = predicate_chain;
31617 function_version_info [actual_versions].dispatch_priority = priority;
31618 actual_versions++;
31619 }
31620
31621 /* Sort the versions according to descending order of dispatch priority. The
31622 priority is based on the ISA. This is not a perfect solution. There
31623 could still be ambiguity. If more than one function version is suitable
31624 to execute, which one should be dispatched? In future, allow the user
31625 to specify a dispatch priority next to the version. */
31626 qsort (function_version_info, actual_versions,
31627 sizeof (struct _function_version_info), feature_compare);
31628
31629 for (i = 0; i < actual_versions; ++i)
31630 *empty_bb = add_condition_to_bb (dispatch_decl,
31631 function_version_info[i].version_decl,
31632 function_version_info[i].predicate_chain,
31633 *empty_bb);
31634
31635 /* dispatch default version at the end. */
31636 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31637 NULL, *empty_bb);
31638
31639 free (function_version_info);
31640 return 0;
31641 }
31642
31643 /* Comparator function to be used in qsort routine to sort attribute
31644 specification strings to "target". */
31645
31646 static int
31647 attr_strcmp (const void *v1, const void *v2)
31648 {
31649 const char *c1 = *(char *const*)v1;
31650 const char *c2 = *(char *const*)v2;
31651 return strcmp (c1, c2);
31652 }
31653
31654 /* ARGLIST is the argument to target attribute. This function tokenizes
31655 the comma separated arguments, sorts them and returns a string which
31656 is a unique identifier for the comma separated arguments. It also
31657 replaces non-identifier characters "=,-" with "_". */
31658
31659 static char *
31660 sorted_attr_string (tree arglist)
31661 {
31662 tree arg;
31663 size_t str_len_sum = 0;
31664 char **args = NULL;
31665 char *attr_str, *ret_str;
31666 char *attr = NULL;
31667 unsigned int argnum = 1;
31668 unsigned int i;
31669
31670 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31671 {
31672 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31673 size_t len = strlen (str);
31674 str_len_sum += len + 1;
31675 if (arg != arglist)
31676 argnum++;
31677 for (i = 0; i < strlen (str); i++)
31678 if (str[i] == ',')
31679 argnum++;
31680 }
31681
31682 attr_str = XNEWVEC (char, str_len_sum);
31683 str_len_sum = 0;
31684 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31685 {
31686 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31687 size_t len = strlen (str);
31688 memcpy (attr_str + str_len_sum, str, len);
31689 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31690 str_len_sum += len + 1;
31691 }
31692
31693 /* Replace "=,-" with "_". */
31694 for (i = 0; i < strlen (attr_str); i++)
31695 if (attr_str[i] == '=' || attr_str[i]== '-')
31696 attr_str[i] = '_';
31697
31698 if (argnum == 1)
31699 return attr_str;
31700
31701 args = XNEWVEC (char *, argnum);
31702
31703 i = 0;
31704 attr = strtok (attr_str, ",");
31705 while (attr != NULL)
31706 {
31707 args[i] = attr;
31708 i++;
31709 attr = strtok (NULL, ",");
31710 }
31711
31712 qsort (args, argnum, sizeof (char *), attr_strcmp);
31713
31714 ret_str = XNEWVEC (char, str_len_sum);
31715 str_len_sum = 0;
31716 for (i = 0; i < argnum; i++)
31717 {
31718 size_t len = strlen (args[i]);
31719 memcpy (ret_str + str_len_sum, args[i], len);
31720 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31721 str_len_sum += len + 1;
31722 }
31723
31724 XDELETEVEC (args);
31725 XDELETEVEC (attr_str);
31726 return ret_str;
31727 }
31728
31729 /* This function changes the assembler name for functions that are
31730 versions. If DECL is a function version and has a "target"
31731 attribute, it appends the attribute string to its assembler name. */
31732
31733 static tree
31734 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31735 {
31736 tree version_attr;
31737 const char *orig_name, *version_string;
31738 char *attr_str, *assembler_name;
31739
31740 if (DECL_DECLARED_INLINE_P (decl)
31741 && lookup_attribute ("gnu_inline",
31742 DECL_ATTRIBUTES (decl)))
31743 error_at (DECL_SOURCE_LOCATION (decl),
31744 "Function versions cannot be marked as gnu_inline,"
31745 " bodies have to be generated");
31746
31747 if (DECL_VIRTUAL_P (decl)
31748 || DECL_VINDEX (decl))
31749 sorry ("Virtual function multiversioning not supported");
31750
31751 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31752
31753 /* target attribute string cannot be NULL. */
31754 gcc_assert (version_attr != NULL_TREE);
31755
31756 orig_name = IDENTIFIER_POINTER (id);
31757 version_string
31758 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31759
31760 if (strcmp (version_string, "default") == 0)
31761 return id;
31762
31763 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31764 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31765
31766 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31767
31768 /* Allow assembler name to be modified if already set. */
31769 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31770 SET_DECL_RTL (decl, NULL);
31771
31772 tree ret = get_identifier (assembler_name);
31773 XDELETEVEC (attr_str);
31774 XDELETEVEC (assembler_name);
31775 return ret;
31776 }
31777
31778 /* This function returns true if FN1 and FN2 are versions of the same function,
31779 that is, the target strings of the function decls are different. This assumes
31780 that FN1 and FN2 have the same signature. */
31781
31782 static bool
31783 ix86_function_versions (tree fn1, tree fn2)
31784 {
31785 tree attr1, attr2;
31786 char *target1, *target2;
31787 bool result;
31788
31789 if (TREE_CODE (fn1) != FUNCTION_DECL
31790 || TREE_CODE (fn2) != FUNCTION_DECL)
31791 return false;
31792
31793 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31794 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31795
31796 /* At least one function decl should have the target attribute specified. */
31797 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31798 return false;
31799
31800 /* Diagnose missing target attribute if one of the decls is already
31801 multi-versioned. */
31802 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31803 {
31804 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31805 {
31806 if (attr2 != NULL_TREE)
31807 {
31808 tree tem = fn1;
31809 fn1 = fn2;
31810 fn2 = tem;
31811 attr1 = attr2;
31812 }
31813 error_at (DECL_SOURCE_LOCATION (fn2),
31814 "missing %<target%> attribute for multi-versioned %D",
31815 fn2);
31816 inform (DECL_SOURCE_LOCATION (fn1),
31817 "previous declaration of %D", fn1);
31818 /* Prevent diagnosing of the same error multiple times. */
31819 DECL_ATTRIBUTES (fn2)
31820 = tree_cons (get_identifier ("target"),
31821 copy_node (TREE_VALUE (attr1)),
31822 DECL_ATTRIBUTES (fn2));
31823 }
31824 return false;
31825 }
31826
31827 target1 = sorted_attr_string (TREE_VALUE (attr1));
31828 target2 = sorted_attr_string (TREE_VALUE (attr2));
31829
31830 /* The sorted target strings must be different for fn1 and fn2
31831 to be versions. */
31832 if (strcmp (target1, target2) == 0)
31833 result = false;
31834 else
31835 result = true;
31836
31837 XDELETEVEC (target1);
31838 XDELETEVEC (target2);
31839
31840 return result;
31841 }
31842
31843 static tree
31844 ix86_mangle_decl_assembler_name (tree decl, tree id)
31845 {
31846 /* For function version, add the target suffix to the assembler name. */
31847 if (TREE_CODE (decl) == FUNCTION_DECL
31848 && DECL_FUNCTION_VERSIONED (decl))
31849 id = ix86_mangle_function_version_assembler_name (decl, id);
31850 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31851 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31852 #endif
31853
31854 return id;
31855 }
31856
31857 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31858 is true, append the full path name of the source file. */
31859
31860 static char *
31861 make_name (tree decl, const char *suffix, bool make_unique)
31862 {
31863 char *global_var_name;
31864 int name_len;
31865 const char *name;
31866 const char *unique_name = NULL;
31867
31868 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31869
31870 /* Get a unique name that can be used globally without any chances
31871 of collision at link time. */
31872 if (make_unique)
31873 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31874
31875 name_len = strlen (name) + strlen (suffix) + 2;
31876
31877 if (make_unique)
31878 name_len += strlen (unique_name) + 1;
31879 global_var_name = XNEWVEC (char, name_len);
31880
31881 /* Use '.' to concatenate names as it is demangler friendly. */
31882 if (make_unique)
31883 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31884 suffix);
31885 else
31886 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31887
31888 return global_var_name;
31889 }
31890
31891 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31892
31893 /* Make a dispatcher declaration for the multi-versioned function DECL.
31894 Calls to DECL function will be replaced with calls to the dispatcher
31895 by the front-end. Return the decl created. */
31896
31897 static tree
31898 make_dispatcher_decl (const tree decl)
31899 {
31900 tree func_decl;
31901 char *func_name;
31902 tree fn_type, func_type;
31903 bool is_uniq = false;
31904
31905 if (TREE_PUBLIC (decl) == 0)
31906 is_uniq = true;
31907
31908 func_name = make_name (decl, "ifunc", is_uniq);
31909
31910 fn_type = TREE_TYPE (decl);
31911 func_type = build_function_type (TREE_TYPE (fn_type),
31912 TYPE_ARG_TYPES (fn_type));
31913
31914 func_decl = build_fn_decl (func_name, func_type);
31915 XDELETEVEC (func_name);
31916 TREE_USED (func_decl) = 1;
31917 DECL_CONTEXT (func_decl) = NULL_TREE;
31918 DECL_INITIAL (func_decl) = error_mark_node;
31919 DECL_ARTIFICIAL (func_decl) = 1;
31920 /* Mark this func as external, the resolver will flip it again if
31921 it gets generated. */
31922 DECL_EXTERNAL (func_decl) = 1;
31923 /* This will be of type IFUNCs have to be externally visible. */
31924 TREE_PUBLIC (func_decl) = 1;
31925
31926 return func_decl;
31927 }
31928
31929 #endif
31930
31931 /* Returns true if decl is multi-versioned and DECL is the default function,
31932 that is it is not tagged with target specific optimization. */
31933
31934 static bool
31935 is_function_default_version (const tree decl)
31936 {
31937 if (TREE_CODE (decl) != FUNCTION_DECL
31938 || !DECL_FUNCTION_VERSIONED (decl))
31939 return false;
31940 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31941 gcc_assert (attr);
31942 attr = TREE_VALUE (TREE_VALUE (attr));
31943 return (TREE_CODE (attr) == STRING_CST
31944 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
31945 }
31946
31947 /* Make a dispatcher declaration for the multi-versioned function DECL.
31948 Calls to DECL function will be replaced with calls to the dispatcher
31949 by the front-end. Returns the decl of the dispatcher function. */
31950
31951 static tree
31952 ix86_get_function_versions_dispatcher (void *decl)
31953 {
31954 tree fn = (tree) decl;
31955 struct cgraph_node *node = NULL;
31956 struct cgraph_node *default_node = NULL;
31957 struct cgraph_function_version_info *node_v = NULL;
31958 struct cgraph_function_version_info *first_v = NULL;
31959
31960 tree dispatch_decl = NULL;
31961
31962 struct cgraph_function_version_info *default_version_info = NULL;
31963
31964 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31965
31966 node = cgraph_get_node (fn);
31967 gcc_assert (node != NULL);
31968
31969 node_v = get_cgraph_node_version (node);
31970 gcc_assert (node_v != NULL);
31971
31972 if (node_v->dispatcher_resolver != NULL)
31973 return node_v->dispatcher_resolver;
31974
31975 /* Find the default version and make it the first node. */
31976 first_v = node_v;
31977 /* Go to the beginning of the chain. */
31978 while (first_v->prev != NULL)
31979 first_v = first_v->prev;
31980 default_version_info = first_v;
31981 while (default_version_info != NULL)
31982 {
31983 if (is_function_default_version
31984 (default_version_info->this_node->decl))
31985 break;
31986 default_version_info = default_version_info->next;
31987 }
31988
31989 /* If there is no default node, just return NULL. */
31990 if (default_version_info == NULL)
31991 return NULL;
31992
31993 /* Make default info the first node. */
31994 if (first_v != default_version_info)
31995 {
31996 default_version_info->prev->next = default_version_info->next;
31997 if (default_version_info->next)
31998 default_version_info->next->prev = default_version_info->prev;
31999 first_v->prev = default_version_info;
32000 default_version_info->next = first_v;
32001 default_version_info->prev = NULL;
32002 }
32003
32004 default_node = default_version_info->this_node;
32005
32006 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32007 if (targetm.has_ifunc_p ())
32008 {
32009 struct cgraph_function_version_info *it_v = NULL;
32010 struct cgraph_node *dispatcher_node = NULL;
32011 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32012
32013 /* Right now, the dispatching is done via ifunc. */
32014 dispatch_decl = make_dispatcher_decl (default_node->decl);
32015
32016 dispatcher_node = cgraph_get_create_node (dispatch_decl);
32017 gcc_assert (dispatcher_node != NULL);
32018 dispatcher_node->dispatcher_function = 1;
32019 dispatcher_version_info
32020 = insert_new_cgraph_node_version (dispatcher_node);
32021 dispatcher_version_info->next = default_version_info;
32022 dispatcher_node->definition = 1;
32023
32024 /* Set the dispatcher for all the versions. */
32025 it_v = default_version_info;
32026 while (it_v != NULL)
32027 {
32028 it_v->dispatcher_resolver = dispatch_decl;
32029 it_v = it_v->next;
32030 }
32031 }
32032 else
32033 #endif
32034 {
32035 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32036 "multiversioning needs ifunc which is not supported "
32037 "on this target");
32038 }
32039
32040 return dispatch_decl;
32041 }
32042
32043 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32044 it to CHAIN. */
32045
32046 static tree
32047 make_attribute (const char *name, const char *arg_name, tree chain)
32048 {
32049 tree attr_name;
32050 tree attr_arg_name;
32051 tree attr_args;
32052 tree attr;
32053
32054 attr_name = get_identifier (name);
32055 attr_arg_name = build_string (strlen (arg_name), arg_name);
32056 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32057 attr = tree_cons (attr_name, attr_args, chain);
32058 return attr;
32059 }
32060
32061 /* Make the resolver function decl to dispatch the versions of
32062 a multi-versioned function, DEFAULT_DECL. Create an
32063 empty basic block in the resolver and store the pointer in
32064 EMPTY_BB. Return the decl of the resolver function. */
32065
32066 static tree
32067 make_resolver_func (const tree default_decl,
32068 const tree dispatch_decl,
32069 basic_block *empty_bb)
32070 {
32071 char *resolver_name;
32072 tree decl, type, decl_name, t;
32073 bool is_uniq = false;
32074
32075 /* IFUNC's have to be globally visible. So, if the default_decl is
32076 not, then the name of the IFUNC should be made unique. */
32077 if (TREE_PUBLIC (default_decl) == 0)
32078 is_uniq = true;
32079
32080 /* Append the filename to the resolver function if the versions are
32081 not externally visible. This is because the resolver function has
32082 to be externally visible for the loader to find it. So, appending
32083 the filename will prevent conflicts with a resolver function from
32084 another module which is based on the same version name. */
32085 resolver_name = make_name (default_decl, "resolver", is_uniq);
32086
32087 /* The resolver function should return a (void *). */
32088 type = build_function_type_list (ptr_type_node, NULL_TREE);
32089
32090 decl = build_fn_decl (resolver_name, type);
32091 decl_name = get_identifier (resolver_name);
32092 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32093
32094 DECL_NAME (decl) = decl_name;
32095 TREE_USED (decl) = 1;
32096 DECL_ARTIFICIAL (decl) = 1;
32097 DECL_IGNORED_P (decl) = 0;
32098 /* IFUNC resolvers have to be externally visible. */
32099 TREE_PUBLIC (decl) = 1;
32100 DECL_UNINLINABLE (decl) = 1;
32101
32102 /* Resolver is not external, body is generated. */
32103 DECL_EXTERNAL (decl) = 0;
32104 DECL_EXTERNAL (dispatch_decl) = 0;
32105
32106 DECL_CONTEXT (decl) = NULL_TREE;
32107 DECL_INITIAL (decl) = make_node (BLOCK);
32108 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32109
32110 if (DECL_COMDAT_GROUP (default_decl)
32111 || TREE_PUBLIC (default_decl))
32112 {
32113 /* In this case, each translation unit with a call to this
32114 versioned function will put out a resolver. Ensure it
32115 is comdat to keep just one copy. */
32116 DECL_COMDAT (decl) = 1;
32117 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32118 }
32119 /* Build result decl and add to function_decl. */
32120 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32121 DECL_ARTIFICIAL (t) = 1;
32122 DECL_IGNORED_P (t) = 1;
32123 DECL_RESULT (decl) = t;
32124
32125 gimplify_function_tree (decl);
32126 push_cfun (DECL_STRUCT_FUNCTION (decl));
32127 *empty_bb = init_lowered_empty_function (decl, false);
32128
32129 cgraph_add_new_function (decl, true);
32130 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
32131
32132 pop_cfun ();
32133
32134 gcc_assert (dispatch_decl != NULL);
32135 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32136 DECL_ATTRIBUTES (dispatch_decl)
32137 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32138
32139 /* Create the alias for dispatch to resolver here. */
32140 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32141 cgraph_same_body_alias (NULL, dispatch_decl, decl);
32142 XDELETEVEC (resolver_name);
32143 return decl;
32144 }
32145
32146 /* Generate the dispatching code body to dispatch multi-versioned function
32147 DECL. The target hook is called to process the "target" attributes and
32148 provide the code to dispatch the right function at run-time. NODE points
32149 to the dispatcher decl whose body will be created. */
32150
32151 static tree
32152 ix86_generate_version_dispatcher_body (void *node_p)
32153 {
32154 tree resolver_decl;
32155 basic_block empty_bb;
32156 tree default_ver_decl;
32157 struct cgraph_node *versn;
32158 struct cgraph_node *node;
32159
32160 struct cgraph_function_version_info *node_version_info = NULL;
32161 struct cgraph_function_version_info *versn_info = NULL;
32162
32163 node = (cgraph_node *)node_p;
32164
32165 node_version_info = get_cgraph_node_version (node);
32166 gcc_assert (node->dispatcher_function
32167 && node_version_info != NULL);
32168
32169 if (node_version_info->dispatcher_resolver)
32170 return node_version_info->dispatcher_resolver;
32171
32172 /* The first version in the chain corresponds to the default version. */
32173 default_ver_decl = node_version_info->next->this_node->decl;
32174
32175 /* node is going to be an alias, so remove the finalized bit. */
32176 node->definition = false;
32177
32178 resolver_decl = make_resolver_func (default_ver_decl,
32179 node->decl, &empty_bb);
32180
32181 node_version_info->dispatcher_resolver = resolver_decl;
32182
32183 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32184
32185 auto_vec<tree, 2> fn_ver_vec;
32186
32187 for (versn_info = node_version_info->next; versn_info;
32188 versn_info = versn_info->next)
32189 {
32190 versn = versn_info->this_node;
32191 /* Check for virtual functions here again, as by this time it should
32192 have been determined if this function needs a vtable index or
32193 not. This happens for methods in derived classes that override
32194 virtual methods in base classes but are not explicitly marked as
32195 virtual. */
32196 if (DECL_VINDEX (versn->decl))
32197 sorry ("Virtual function multiversioning not supported");
32198
32199 fn_ver_vec.safe_push (versn->decl);
32200 }
32201
32202 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32203 rebuild_cgraph_edges ();
32204 pop_cfun ();
32205 return resolver_decl;
32206 }
32207 /* This builds the processor_model struct type defined in
32208 libgcc/config/i386/cpuinfo.c */
32209
32210 static tree
32211 build_processor_model_struct (void)
32212 {
32213 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32214 "__cpu_features"};
32215 tree field = NULL_TREE, field_chain = NULL_TREE;
32216 int i;
32217 tree type = make_node (RECORD_TYPE);
32218
32219 /* The first 3 fields are unsigned int. */
32220 for (i = 0; i < 3; ++i)
32221 {
32222 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32223 get_identifier (field_name[i]), unsigned_type_node);
32224 if (field_chain != NULL_TREE)
32225 DECL_CHAIN (field) = field_chain;
32226 field_chain = field;
32227 }
32228
32229 /* The last field is an array of unsigned integers of size one. */
32230 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32231 get_identifier (field_name[3]),
32232 build_array_type (unsigned_type_node,
32233 build_index_type (size_one_node)));
32234 if (field_chain != NULL_TREE)
32235 DECL_CHAIN (field) = field_chain;
32236 field_chain = field;
32237
32238 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32239 return type;
32240 }
32241
32242 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32243
32244 static tree
32245 make_var_decl (tree type, const char *name)
32246 {
32247 tree new_decl;
32248
32249 new_decl = build_decl (UNKNOWN_LOCATION,
32250 VAR_DECL,
32251 get_identifier(name),
32252 type);
32253
32254 DECL_EXTERNAL (new_decl) = 1;
32255 TREE_STATIC (new_decl) = 1;
32256 TREE_PUBLIC (new_decl) = 1;
32257 DECL_INITIAL (new_decl) = 0;
32258 DECL_ARTIFICIAL (new_decl) = 0;
32259 DECL_PRESERVE_P (new_decl) = 1;
32260
32261 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32262 assemble_variable (new_decl, 0, 0, 0);
32263
32264 return new_decl;
32265 }
32266
32267 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32268 into an integer defined in libgcc/config/i386/cpuinfo.c */
32269
32270 static tree
32271 fold_builtin_cpu (tree fndecl, tree *args)
32272 {
32273 unsigned int i;
32274 enum ix86_builtins fn_code = (enum ix86_builtins)
32275 DECL_FUNCTION_CODE (fndecl);
32276 tree param_string_cst = NULL;
32277
32278 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32279 enum processor_features
32280 {
32281 F_CMOV = 0,
32282 F_MMX,
32283 F_POPCNT,
32284 F_SSE,
32285 F_SSE2,
32286 F_SSE3,
32287 F_SSSE3,
32288 F_SSE4_1,
32289 F_SSE4_2,
32290 F_AVX,
32291 F_AVX2,
32292 F_SSE4_A,
32293 F_FMA4,
32294 F_XOP,
32295 F_FMA,
32296 F_MAX
32297 };
32298
32299 /* These are the values for vendor types and cpu types and subtypes
32300 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32301 the corresponding start value. */
32302 enum processor_model
32303 {
32304 M_INTEL = 1,
32305 M_AMD,
32306 M_CPU_TYPE_START,
32307 M_INTEL_BONNELL,
32308 M_INTEL_CORE2,
32309 M_INTEL_COREI7,
32310 M_AMDFAM10H,
32311 M_AMDFAM15H,
32312 M_INTEL_SILVERMONT,
32313 M_AMD_BTVER1,
32314 M_AMD_BTVER2,
32315 M_CPU_SUBTYPE_START,
32316 M_INTEL_COREI7_NEHALEM,
32317 M_INTEL_COREI7_WESTMERE,
32318 M_INTEL_COREI7_SANDYBRIDGE,
32319 M_AMDFAM10H_BARCELONA,
32320 M_AMDFAM10H_SHANGHAI,
32321 M_AMDFAM10H_ISTANBUL,
32322 M_AMDFAM15H_BDVER1,
32323 M_AMDFAM15H_BDVER2,
32324 M_AMDFAM15H_BDVER3,
32325 M_AMDFAM15H_BDVER4,
32326 M_INTEL_COREI7_IVYBRIDGE,
32327 M_INTEL_COREI7_HASWELL
32328 };
32329
32330 static struct _arch_names_table
32331 {
32332 const char *const name;
32333 const enum processor_model model;
32334 }
32335 const arch_names_table[] =
32336 {
32337 {"amd", M_AMD},
32338 {"intel", M_INTEL},
32339 {"atom", M_INTEL_BONNELL},
32340 {"slm", M_INTEL_SILVERMONT},
32341 {"core2", M_INTEL_CORE2},
32342 {"corei7", M_INTEL_COREI7},
32343 {"nehalem", M_INTEL_COREI7_NEHALEM},
32344 {"westmere", M_INTEL_COREI7_WESTMERE},
32345 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32346 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32347 {"haswell", M_INTEL_COREI7_HASWELL},
32348 {"bonnell", M_INTEL_BONNELL},
32349 {"silvermont", M_INTEL_SILVERMONT},
32350 {"amdfam10h", M_AMDFAM10H},
32351 {"barcelona", M_AMDFAM10H_BARCELONA},
32352 {"shanghai", M_AMDFAM10H_SHANGHAI},
32353 {"istanbul", M_AMDFAM10H_ISTANBUL},
32354 {"btver1", M_AMD_BTVER1},
32355 {"amdfam15h", M_AMDFAM15H},
32356 {"bdver1", M_AMDFAM15H_BDVER1},
32357 {"bdver2", M_AMDFAM15H_BDVER2},
32358 {"bdver3", M_AMDFAM15H_BDVER3},
32359 {"bdver4", M_AMDFAM15H_BDVER4},
32360 {"btver2", M_AMD_BTVER2},
32361 };
32362
32363 static struct _isa_names_table
32364 {
32365 const char *const name;
32366 const enum processor_features feature;
32367 }
32368 const isa_names_table[] =
32369 {
32370 {"cmov", F_CMOV},
32371 {"mmx", F_MMX},
32372 {"popcnt", F_POPCNT},
32373 {"sse", F_SSE},
32374 {"sse2", F_SSE2},
32375 {"sse3", F_SSE3},
32376 {"ssse3", F_SSSE3},
32377 {"sse4a", F_SSE4_A},
32378 {"sse4.1", F_SSE4_1},
32379 {"sse4.2", F_SSE4_2},
32380 {"avx", F_AVX},
32381 {"fma4", F_FMA4},
32382 {"xop", F_XOP},
32383 {"fma", F_FMA},
32384 {"avx2", F_AVX2}
32385 };
32386
32387 tree __processor_model_type = build_processor_model_struct ();
32388 tree __cpu_model_var = make_var_decl (__processor_model_type,
32389 "__cpu_model");
32390
32391
32392 varpool_add_new_variable (__cpu_model_var);
32393
32394 gcc_assert ((args != NULL) && (*args != NULL));
32395
32396 param_string_cst = *args;
32397 while (param_string_cst
32398 && TREE_CODE (param_string_cst) != STRING_CST)
32399 {
32400 /* *args must be a expr that can contain other EXPRS leading to a
32401 STRING_CST. */
32402 if (!EXPR_P (param_string_cst))
32403 {
32404 error ("Parameter to builtin must be a string constant or literal");
32405 return integer_zero_node;
32406 }
32407 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32408 }
32409
32410 gcc_assert (param_string_cst);
32411
32412 if (fn_code == IX86_BUILTIN_CPU_IS)
32413 {
32414 tree ref;
32415 tree field;
32416 tree final;
32417
32418 unsigned int field_val = 0;
32419 unsigned int NUM_ARCH_NAMES
32420 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32421
32422 for (i = 0; i < NUM_ARCH_NAMES; i++)
32423 if (strcmp (arch_names_table[i].name,
32424 TREE_STRING_POINTER (param_string_cst)) == 0)
32425 break;
32426
32427 if (i == NUM_ARCH_NAMES)
32428 {
32429 error ("Parameter to builtin not valid: %s",
32430 TREE_STRING_POINTER (param_string_cst));
32431 return integer_zero_node;
32432 }
32433
32434 field = TYPE_FIELDS (__processor_model_type);
32435 field_val = arch_names_table[i].model;
32436
32437 /* CPU types are stored in the next field. */
32438 if (field_val > M_CPU_TYPE_START
32439 && field_val < M_CPU_SUBTYPE_START)
32440 {
32441 field = DECL_CHAIN (field);
32442 field_val -= M_CPU_TYPE_START;
32443 }
32444
32445 /* CPU subtypes are stored in the next field. */
32446 if (field_val > M_CPU_SUBTYPE_START)
32447 {
32448 field = DECL_CHAIN ( DECL_CHAIN (field));
32449 field_val -= M_CPU_SUBTYPE_START;
32450 }
32451
32452 /* Get the appropriate field in __cpu_model. */
32453 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32454 field, NULL_TREE);
32455
32456 /* Check the value. */
32457 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32458 build_int_cstu (unsigned_type_node, field_val));
32459 return build1 (CONVERT_EXPR, integer_type_node, final);
32460 }
32461 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32462 {
32463 tree ref;
32464 tree array_elt;
32465 tree field;
32466 tree final;
32467
32468 unsigned int field_val = 0;
32469 unsigned int NUM_ISA_NAMES
32470 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32471
32472 for (i = 0; i < NUM_ISA_NAMES; i++)
32473 if (strcmp (isa_names_table[i].name,
32474 TREE_STRING_POINTER (param_string_cst)) == 0)
32475 break;
32476
32477 if (i == NUM_ISA_NAMES)
32478 {
32479 error ("Parameter to builtin not valid: %s",
32480 TREE_STRING_POINTER (param_string_cst));
32481 return integer_zero_node;
32482 }
32483
32484 field = TYPE_FIELDS (__processor_model_type);
32485 /* Get the last field, which is __cpu_features. */
32486 while (DECL_CHAIN (field))
32487 field = DECL_CHAIN (field);
32488
32489 /* Get the appropriate field: __cpu_model.__cpu_features */
32490 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32491 field, NULL_TREE);
32492
32493 /* Access the 0th element of __cpu_features array. */
32494 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32495 integer_zero_node, NULL_TREE, NULL_TREE);
32496
32497 field_val = (1 << isa_names_table[i].feature);
32498 /* Return __cpu_model.__cpu_features[0] & field_val */
32499 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32500 build_int_cstu (unsigned_type_node, field_val));
32501 return build1 (CONVERT_EXPR, integer_type_node, final);
32502 }
32503 gcc_unreachable ();
32504 }
32505
32506 static tree
32507 ix86_fold_builtin (tree fndecl, int n_args,
32508 tree *args, bool ignore ATTRIBUTE_UNUSED)
32509 {
32510 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32511 {
32512 enum ix86_builtins fn_code = (enum ix86_builtins)
32513 DECL_FUNCTION_CODE (fndecl);
32514 if (fn_code == IX86_BUILTIN_CPU_IS
32515 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32516 {
32517 gcc_assert (n_args == 1);
32518 return fold_builtin_cpu (fndecl, args);
32519 }
32520 }
32521
32522 #ifdef SUBTARGET_FOLD_BUILTIN
32523 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32524 #endif
32525
32526 return NULL_TREE;
32527 }
32528
32529 /* Make builtins to detect cpu type and features supported. NAME is
32530 the builtin name, CODE is the builtin code, and FTYPE is the function
32531 type of the builtin. */
32532
32533 static void
32534 make_cpu_type_builtin (const char* name, int code,
32535 enum ix86_builtin_func_type ftype, bool is_const)
32536 {
32537 tree decl;
32538 tree type;
32539
32540 type = ix86_get_builtin_func_type (ftype);
32541 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32542 NULL, NULL_TREE);
32543 gcc_assert (decl != NULL_TREE);
32544 ix86_builtins[(int) code] = decl;
32545 TREE_READONLY (decl) = is_const;
32546 }
32547
32548 /* Make builtins to get CPU type and features supported. The created
32549 builtins are :
32550
32551 __builtin_cpu_init (), to detect cpu type and features,
32552 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32553 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32554 */
32555
32556 static void
32557 ix86_init_platform_type_builtins (void)
32558 {
32559 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32560 INT_FTYPE_VOID, false);
32561 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32562 INT_FTYPE_PCCHAR, true);
32563 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32564 INT_FTYPE_PCCHAR, true);
32565 }
32566
32567 /* Internal method for ix86_init_builtins. */
32568
32569 static void
32570 ix86_init_builtins_va_builtins_abi (void)
32571 {
32572 tree ms_va_ref, sysv_va_ref;
32573 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32574 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32575 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32576 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32577
32578 if (!TARGET_64BIT)
32579 return;
32580 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32581 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32582 ms_va_ref = build_reference_type (ms_va_list_type_node);
32583 sysv_va_ref =
32584 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32585
32586 fnvoid_va_end_ms =
32587 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32588 fnvoid_va_start_ms =
32589 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32590 fnvoid_va_end_sysv =
32591 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32592 fnvoid_va_start_sysv =
32593 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32594 NULL_TREE);
32595 fnvoid_va_copy_ms =
32596 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32597 NULL_TREE);
32598 fnvoid_va_copy_sysv =
32599 build_function_type_list (void_type_node, sysv_va_ref,
32600 sysv_va_ref, NULL_TREE);
32601
32602 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32603 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32604 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32605 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32606 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32607 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32608 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32609 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32610 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32611 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32612 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32613 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32614 }
32615
32616 static void
32617 ix86_init_builtin_types (void)
32618 {
32619 tree float128_type_node, float80_type_node;
32620
32621 /* The __float80 type. */
32622 float80_type_node = long_double_type_node;
32623 if (TYPE_MODE (float80_type_node) != XFmode)
32624 {
32625 /* The __float80 type. */
32626 float80_type_node = make_node (REAL_TYPE);
32627
32628 TYPE_PRECISION (float80_type_node) = 80;
32629 layout_type (float80_type_node);
32630 }
32631 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32632
32633 /* The __float128 type. */
32634 float128_type_node = make_node (REAL_TYPE);
32635 TYPE_PRECISION (float128_type_node) = 128;
32636 layout_type (float128_type_node);
32637 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32638
32639 /* This macro is built by i386-builtin-types.awk. */
32640 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32641 }
32642
32643 static void
32644 ix86_init_builtins (void)
32645 {
32646 tree t;
32647
32648 ix86_init_builtin_types ();
32649
32650 /* Builtins to get CPU type and features. */
32651 ix86_init_platform_type_builtins ();
32652
32653 /* TFmode support builtins. */
32654 def_builtin_const (0, "__builtin_infq",
32655 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32656 def_builtin_const (0, "__builtin_huge_valq",
32657 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32658
32659 /* We will expand them to normal call if SSE isn't available since
32660 they are used by libgcc. */
32661 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32662 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32663 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32664 TREE_READONLY (t) = 1;
32665 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32666
32667 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32668 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32669 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32670 TREE_READONLY (t) = 1;
32671 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32672
32673 ix86_init_tm_builtins ();
32674 ix86_init_mmx_sse_builtins ();
32675
32676 if (TARGET_LP64)
32677 ix86_init_builtins_va_builtins_abi ();
32678
32679 #ifdef SUBTARGET_INIT_BUILTINS
32680 SUBTARGET_INIT_BUILTINS;
32681 #endif
32682 }
32683
32684 /* Return the ix86 builtin for CODE. */
32685
32686 static tree
32687 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
32688 {
32689 if (code >= IX86_BUILTIN_MAX)
32690 return error_mark_node;
32691
32692 return ix86_builtins[code];
32693 }
32694
32695 /* Errors in the source file can cause expand_expr to return const0_rtx
32696 where we expect a vector. To avoid crashing, use one of the vector
32697 clear instructions. */
32698 static rtx
32699 safe_vector_operand (rtx x, enum machine_mode mode)
32700 {
32701 if (x == const0_rtx)
32702 x = CONST0_RTX (mode);
32703 return x;
32704 }
32705
32706 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32707
32708 static rtx
32709 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32710 {
32711 rtx pat;
32712 tree arg0 = CALL_EXPR_ARG (exp, 0);
32713 tree arg1 = CALL_EXPR_ARG (exp, 1);
32714 rtx op0 = expand_normal (arg0);
32715 rtx op1 = expand_normal (arg1);
32716 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32717 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32718 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32719
32720 if (VECTOR_MODE_P (mode0))
32721 op0 = safe_vector_operand (op0, mode0);
32722 if (VECTOR_MODE_P (mode1))
32723 op1 = safe_vector_operand (op1, mode1);
32724
32725 if (optimize || !target
32726 || GET_MODE (target) != tmode
32727 || !insn_data[icode].operand[0].predicate (target, tmode))
32728 target = gen_reg_rtx (tmode);
32729
32730 if (GET_MODE (op1) == SImode && mode1 == TImode)
32731 {
32732 rtx x = gen_reg_rtx (V4SImode);
32733 emit_insn (gen_sse2_loadd (x, op1));
32734 op1 = gen_lowpart (TImode, x);
32735 }
32736
32737 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32738 op0 = copy_to_mode_reg (mode0, op0);
32739 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32740 op1 = copy_to_mode_reg (mode1, op1);
32741
32742 pat = GEN_FCN (icode) (target, op0, op1);
32743 if (! pat)
32744 return 0;
32745
32746 emit_insn (pat);
32747
32748 return target;
32749 }
32750
32751 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32752
32753 static rtx
32754 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32755 enum ix86_builtin_func_type m_type,
32756 enum rtx_code sub_code)
32757 {
32758 rtx pat;
32759 int i;
32760 int nargs;
32761 bool comparison_p = false;
32762 bool tf_p = false;
32763 bool last_arg_constant = false;
32764 int num_memory = 0;
32765 struct {
32766 rtx op;
32767 enum machine_mode mode;
32768 } args[4];
32769
32770 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32771
32772 switch (m_type)
32773 {
32774 case MULTI_ARG_4_DF2_DI_I:
32775 case MULTI_ARG_4_DF2_DI_I1:
32776 case MULTI_ARG_4_SF2_SI_I:
32777 case MULTI_ARG_4_SF2_SI_I1:
32778 nargs = 4;
32779 last_arg_constant = true;
32780 break;
32781
32782 case MULTI_ARG_3_SF:
32783 case MULTI_ARG_3_DF:
32784 case MULTI_ARG_3_SF2:
32785 case MULTI_ARG_3_DF2:
32786 case MULTI_ARG_3_DI:
32787 case MULTI_ARG_3_SI:
32788 case MULTI_ARG_3_SI_DI:
32789 case MULTI_ARG_3_HI:
32790 case MULTI_ARG_3_HI_SI:
32791 case MULTI_ARG_3_QI:
32792 case MULTI_ARG_3_DI2:
32793 case MULTI_ARG_3_SI2:
32794 case MULTI_ARG_3_HI2:
32795 case MULTI_ARG_3_QI2:
32796 nargs = 3;
32797 break;
32798
32799 case MULTI_ARG_2_SF:
32800 case MULTI_ARG_2_DF:
32801 case MULTI_ARG_2_DI:
32802 case MULTI_ARG_2_SI:
32803 case MULTI_ARG_2_HI:
32804 case MULTI_ARG_2_QI:
32805 nargs = 2;
32806 break;
32807
32808 case MULTI_ARG_2_DI_IMM:
32809 case MULTI_ARG_2_SI_IMM:
32810 case MULTI_ARG_2_HI_IMM:
32811 case MULTI_ARG_2_QI_IMM:
32812 nargs = 2;
32813 last_arg_constant = true;
32814 break;
32815
32816 case MULTI_ARG_1_SF:
32817 case MULTI_ARG_1_DF:
32818 case MULTI_ARG_1_SF2:
32819 case MULTI_ARG_1_DF2:
32820 case MULTI_ARG_1_DI:
32821 case MULTI_ARG_1_SI:
32822 case MULTI_ARG_1_HI:
32823 case MULTI_ARG_1_QI:
32824 case MULTI_ARG_1_SI_DI:
32825 case MULTI_ARG_1_HI_DI:
32826 case MULTI_ARG_1_HI_SI:
32827 case MULTI_ARG_1_QI_DI:
32828 case MULTI_ARG_1_QI_SI:
32829 case MULTI_ARG_1_QI_HI:
32830 nargs = 1;
32831 break;
32832
32833 case MULTI_ARG_2_DI_CMP:
32834 case MULTI_ARG_2_SI_CMP:
32835 case MULTI_ARG_2_HI_CMP:
32836 case MULTI_ARG_2_QI_CMP:
32837 nargs = 2;
32838 comparison_p = true;
32839 break;
32840
32841 case MULTI_ARG_2_SF_TF:
32842 case MULTI_ARG_2_DF_TF:
32843 case MULTI_ARG_2_DI_TF:
32844 case MULTI_ARG_2_SI_TF:
32845 case MULTI_ARG_2_HI_TF:
32846 case MULTI_ARG_2_QI_TF:
32847 nargs = 2;
32848 tf_p = true;
32849 break;
32850
32851 default:
32852 gcc_unreachable ();
32853 }
32854
32855 if (optimize || !target
32856 || GET_MODE (target) != tmode
32857 || !insn_data[icode].operand[0].predicate (target, tmode))
32858 target = gen_reg_rtx (tmode);
32859
32860 gcc_assert (nargs <= 4);
32861
32862 for (i = 0; i < nargs; i++)
32863 {
32864 tree arg = CALL_EXPR_ARG (exp, i);
32865 rtx op = expand_normal (arg);
32866 int adjust = (comparison_p) ? 1 : 0;
32867 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32868
32869 if (last_arg_constant && i == nargs - 1)
32870 {
32871 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32872 {
32873 enum insn_code new_icode = icode;
32874 switch (icode)
32875 {
32876 case CODE_FOR_xop_vpermil2v2df3:
32877 case CODE_FOR_xop_vpermil2v4sf3:
32878 case CODE_FOR_xop_vpermil2v4df3:
32879 case CODE_FOR_xop_vpermil2v8sf3:
32880 error ("the last argument must be a 2-bit immediate");
32881 return gen_reg_rtx (tmode);
32882 case CODE_FOR_xop_rotlv2di3:
32883 new_icode = CODE_FOR_rotlv2di3;
32884 goto xop_rotl;
32885 case CODE_FOR_xop_rotlv4si3:
32886 new_icode = CODE_FOR_rotlv4si3;
32887 goto xop_rotl;
32888 case CODE_FOR_xop_rotlv8hi3:
32889 new_icode = CODE_FOR_rotlv8hi3;
32890 goto xop_rotl;
32891 case CODE_FOR_xop_rotlv16qi3:
32892 new_icode = CODE_FOR_rotlv16qi3;
32893 xop_rotl:
32894 if (CONST_INT_P (op))
32895 {
32896 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32897 op = GEN_INT (INTVAL (op) & mask);
32898 gcc_checking_assert
32899 (insn_data[icode].operand[i + 1].predicate (op, mode));
32900 }
32901 else
32902 {
32903 gcc_checking_assert
32904 (nargs == 2
32905 && insn_data[new_icode].operand[0].mode == tmode
32906 && insn_data[new_icode].operand[1].mode == tmode
32907 && insn_data[new_icode].operand[2].mode == mode
32908 && insn_data[new_icode].operand[0].predicate
32909 == insn_data[icode].operand[0].predicate
32910 && insn_data[new_icode].operand[1].predicate
32911 == insn_data[icode].operand[1].predicate);
32912 icode = new_icode;
32913 goto non_constant;
32914 }
32915 break;
32916 default:
32917 gcc_unreachable ();
32918 }
32919 }
32920 }
32921 else
32922 {
32923 non_constant:
32924 if (VECTOR_MODE_P (mode))
32925 op = safe_vector_operand (op, mode);
32926
32927 /* If we aren't optimizing, only allow one memory operand to be
32928 generated. */
32929 if (memory_operand (op, mode))
32930 num_memory++;
32931
32932 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
32933
32934 if (optimize
32935 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
32936 || num_memory > 1)
32937 op = force_reg (mode, op);
32938 }
32939
32940 args[i].op = op;
32941 args[i].mode = mode;
32942 }
32943
32944 switch (nargs)
32945 {
32946 case 1:
32947 pat = GEN_FCN (icode) (target, args[0].op);
32948 break;
32949
32950 case 2:
32951 if (tf_p)
32952 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
32953 GEN_INT ((int)sub_code));
32954 else if (! comparison_p)
32955 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
32956 else
32957 {
32958 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
32959 args[0].op,
32960 args[1].op);
32961
32962 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
32963 }
32964 break;
32965
32966 case 3:
32967 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
32968 break;
32969
32970 case 4:
32971 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
32972 break;
32973
32974 default:
32975 gcc_unreachable ();
32976 }
32977
32978 if (! pat)
32979 return 0;
32980
32981 emit_insn (pat);
32982 return target;
32983 }
32984
32985 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
32986 insns with vec_merge. */
32987
32988 static rtx
32989 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
32990 rtx target)
32991 {
32992 rtx pat;
32993 tree arg0 = CALL_EXPR_ARG (exp, 0);
32994 rtx op1, op0 = expand_normal (arg0);
32995 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32996 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32997
32998 if (optimize || !target
32999 || GET_MODE (target) != tmode
33000 || !insn_data[icode].operand[0].predicate (target, tmode))
33001 target = gen_reg_rtx (tmode);
33002
33003 if (VECTOR_MODE_P (mode0))
33004 op0 = safe_vector_operand (op0, mode0);
33005
33006 if ((optimize && !register_operand (op0, mode0))
33007 || !insn_data[icode].operand[1].predicate (op0, mode0))
33008 op0 = copy_to_mode_reg (mode0, op0);
33009
33010 op1 = op0;
33011 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33012 op1 = copy_to_mode_reg (mode0, op1);
33013
33014 pat = GEN_FCN (icode) (target, op0, op1);
33015 if (! pat)
33016 return 0;
33017 emit_insn (pat);
33018 return target;
33019 }
33020
33021 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33022
33023 static rtx
33024 ix86_expand_sse_compare (const struct builtin_description *d,
33025 tree exp, rtx target, bool swap)
33026 {
33027 rtx pat;
33028 tree arg0 = CALL_EXPR_ARG (exp, 0);
33029 tree arg1 = CALL_EXPR_ARG (exp, 1);
33030 rtx op0 = expand_normal (arg0);
33031 rtx op1 = expand_normal (arg1);
33032 rtx op2;
33033 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33034 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33035 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33036 enum rtx_code comparison = d->comparison;
33037
33038 if (VECTOR_MODE_P (mode0))
33039 op0 = safe_vector_operand (op0, mode0);
33040 if (VECTOR_MODE_P (mode1))
33041 op1 = safe_vector_operand (op1, mode1);
33042
33043 /* Swap operands if we have a comparison that isn't available in
33044 hardware. */
33045 if (swap)
33046 {
33047 rtx tmp = gen_reg_rtx (mode1);
33048 emit_move_insn (tmp, op1);
33049 op1 = op0;
33050 op0 = tmp;
33051 }
33052
33053 if (optimize || !target
33054 || GET_MODE (target) != tmode
33055 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33056 target = gen_reg_rtx (tmode);
33057
33058 if ((optimize && !register_operand (op0, mode0))
33059 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33060 op0 = copy_to_mode_reg (mode0, op0);
33061 if ((optimize && !register_operand (op1, mode1))
33062 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33063 op1 = copy_to_mode_reg (mode1, op1);
33064
33065 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33066 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33067 if (! pat)
33068 return 0;
33069 emit_insn (pat);
33070 return target;
33071 }
33072
33073 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33074
33075 static rtx
33076 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33077 rtx target)
33078 {
33079 rtx pat;
33080 tree arg0 = CALL_EXPR_ARG (exp, 0);
33081 tree arg1 = CALL_EXPR_ARG (exp, 1);
33082 rtx op0 = expand_normal (arg0);
33083 rtx op1 = expand_normal (arg1);
33084 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33085 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33086 enum rtx_code comparison = d->comparison;
33087
33088 if (VECTOR_MODE_P (mode0))
33089 op0 = safe_vector_operand (op0, mode0);
33090 if (VECTOR_MODE_P (mode1))
33091 op1 = safe_vector_operand (op1, mode1);
33092
33093 /* Swap operands if we have a comparison that isn't available in
33094 hardware. */
33095 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33096 {
33097 rtx tmp = op1;
33098 op1 = op0;
33099 op0 = tmp;
33100 }
33101
33102 target = gen_reg_rtx (SImode);
33103 emit_move_insn (target, const0_rtx);
33104 target = gen_rtx_SUBREG (QImode, target, 0);
33105
33106 if ((optimize && !register_operand (op0, mode0))
33107 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33108 op0 = copy_to_mode_reg (mode0, op0);
33109 if ((optimize && !register_operand (op1, mode1))
33110 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33111 op1 = copy_to_mode_reg (mode1, op1);
33112
33113 pat = GEN_FCN (d->icode) (op0, op1);
33114 if (! pat)
33115 return 0;
33116 emit_insn (pat);
33117 emit_insn (gen_rtx_SET (VOIDmode,
33118 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33119 gen_rtx_fmt_ee (comparison, QImode,
33120 SET_DEST (pat),
33121 const0_rtx)));
33122
33123 return SUBREG_REG (target);
33124 }
33125
33126 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33127
33128 static rtx
33129 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33130 rtx target)
33131 {
33132 rtx pat;
33133 tree arg0 = CALL_EXPR_ARG (exp, 0);
33134 rtx op1, op0 = expand_normal (arg0);
33135 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33136 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33137
33138 if (optimize || target == 0
33139 || GET_MODE (target) != tmode
33140 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33141 target = gen_reg_rtx (tmode);
33142
33143 if (VECTOR_MODE_P (mode0))
33144 op0 = safe_vector_operand (op0, mode0);
33145
33146 if ((optimize && !register_operand (op0, mode0))
33147 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33148 op0 = copy_to_mode_reg (mode0, op0);
33149
33150 op1 = GEN_INT (d->comparison);
33151
33152 pat = GEN_FCN (d->icode) (target, op0, op1);
33153 if (! pat)
33154 return 0;
33155 emit_insn (pat);
33156 return target;
33157 }
33158
33159 static rtx
33160 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33161 tree exp, rtx target)
33162 {
33163 rtx pat;
33164 tree arg0 = CALL_EXPR_ARG (exp, 0);
33165 tree arg1 = CALL_EXPR_ARG (exp, 1);
33166 rtx op0 = expand_normal (arg0);
33167 rtx op1 = expand_normal (arg1);
33168 rtx op2;
33169 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33170 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33171 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33172
33173 if (optimize || target == 0
33174 || GET_MODE (target) != tmode
33175 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33176 target = gen_reg_rtx (tmode);
33177
33178 op0 = safe_vector_operand (op0, mode0);
33179 op1 = safe_vector_operand (op1, mode1);
33180
33181 if ((optimize && !register_operand (op0, mode0))
33182 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33183 op0 = copy_to_mode_reg (mode0, op0);
33184 if ((optimize && !register_operand (op1, mode1))
33185 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33186 op1 = copy_to_mode_reg (mode1, op1);
33187
33188 op2 = GEN_INT (d->comparison);
33189
33190 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33191 if (! pat)
33192 return 0;
33193 emit_insn (pat);
33194 return target;
33195 }
33196
33197 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33198
33199 static rtx
33200 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33201 rtx target)
33202 {
33203 rtx pat;
33204 tree arg0 = CALL_EXPR_ARG (exp, 0);
33205 tree arg1 = CALL_EXPR_ARG (exp, 1);
33206 rtx op0 = expand_normal (arg0);
33207 rtx op1 = expand_normal (arg1);
33208 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33209 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33210 enum rtx_code comparison = d->comparison;
33211
33212 if (VECTOR_MODE_P (mode0))
33213 op0 = safe_vector_operand (op0, mode0);
33214 if (VECTOR_MODE_P (mode1))
33215 op1 = safe_vector_operand (op1, mode1);
33216
33217 target = gen_reg_rtx (SImode);
33218 emit_move_insn (target, const0_rtx);
33219 target = gen_rtx_SUBREG (QImode, target, 0);
33220
33221 if ((optimize && !register_operand (op0, mode0))
33222 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33223 op0 = copy_to_mode_reg (mode0, op0);
33224 if ((optimize && !register_operand (op1, mode1))
33225 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33226 op1 = copy_to_mode_reg (mode1, op1);
33227
33228 pat = GEN_FCN (d->icode) (op0, op1);
33229 if (! pat)
33230 return 0;
33231 emit_insn (pat);
33232 emit_insn (gen_rtx_SET (VOIDmode,
33233 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33234 gen_rtx_fmt_ee (comparison, QImode,
33235 SET_DEST (pat),
33236 const0_rtx)));
33237
33238 return SUBREG_REG (target);
33239 }
33240
33241 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33242
33243 static rtx
33244 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33245 tree exp, rtx target)
33246 {
33247 rtx pat;
33248 tree arg0 = CALL_EXPR_ARG (exp, 0);
33249 tree arg1 = CALL_EXPR_ARG (exp, 1);
33250 tree arg2 = CALL_EXPR_ARG (exp, 2);
33251 tree arg3 = CALL_EXPR_ARG (exp, 3);
33252 tree arg4 = CALL_EXPR_ARG (exp, 4);
33253 rtx scratch0, scratch1;
33254 rtx op0 = expand_normal (arg0);
33255 rtx op1 = expand_normal (arg1);
33256 rtx op2 = expand_normal (arg2);
33257 rtx op3 = expand_normal (arg3);
33258 rtx op4 = expand_normal (arg4);
33259 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33260
33261 tmode0 = insn_data[d->icode].operand[0].mode;
33262 tmode1 = insn_data[d->icode].operand[1].mode;
33263 modev2 = insn_data[d->icode].operand[2].mode;
33264 modei3 = insn_data[d->icode].operand[3].mode;
33265 modev4 = insn_data[d->icode].operand[4].mode;
33266 modei5 = insn_data[d->icode].operand[5].mode;
33267 modeimm = insn_data[d->icode].operand[6].mode;
33268
33269 if (VECTOR_MODE_P (modev2))
33270 op0 = safe_vector_operand (op0, modev2);
33271 if (VECTOR_MODE_P (modev4))
33272 op2 = safe_vector_operand (op2, modev4);
33273
33274 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33275 op0 = copy_to_mode_reg (modev2, op0);
33276 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33277 op1 = copy_to_mode_reg (modei3, op1);
33278 if ((optimize && !register_operand (op2, modev4))
33279 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33280 op2 = copy_to_mode_reg (modev4, op2);
33281 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33282 op3 = copy_to_mode_reg (modei5, op3);
33283
33284 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33285 {
33286 error ("the fifth argument must be an 8-bit immediate");
33287 return const0_rtx;
33288 }
33289
33290 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33291 {
33292 if (optimize || !target
33293 || GET_MODE (target) != tmode0
33294 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33295 target = gen_reg_rtx (tmode0);
33296
33297 scratch1 = gen_reg_rtx (tmode1);
33298
33299 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33300 }
33301 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33302 {
33303 if (optimize || !target
33304 || GET_MODE (target) != tmode1
33305 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33306 target = gen_reg_rtx (tmode1);
33307
33308 scratch0 = gen_reg_rtx (tmode0);
33309
33310 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33311 }
33312 else
33313 {
33314 gcc_assert (d->flag);
33315
33316 scratch0 = gen_reg_rtx (tmode0);
33317 scratch1 = gen_reg_rtx (tmode1);
33318
33319 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33320 }
33321
33322 if (! pat)
33323 return 0;
33324
33325 emit_insn (pat);
33326
33327 if (d->flag)
33328 {
33329 target = gen_reg_rtx (SImode);
33330 emit_move_insn (target, const0_rtx);
33331 target = gen_rtx_SUBREG (QImode, target, 0);
33332
33333 emit_insn
33334 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33335 gen_rtx_fmt_ee (EQ, QImode,
33336 gen_rtx_REG ((enum machine_mode) d->flag,
33337 FLAGS_REG),
33338 const0_rtx)));
33339 return SUBREG_REG (target);
33340 }
33341 else
33342 return target;
33343 }
33344
33345
33346 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33347
33348 static rtx
33349 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33350 tree exp, rtx target)
33351 {
33352 rtx pat;
33353 tree arg0 = CALL_EXPR_ARG (exp, 0);
33354 tree arg1 = CALL_EXPR_ARG (exp, 1);
33355 tree arg2 = CALL_EXPR_ARG (exp, 2);
33356 rtx scratch0, scratch1;
33357 rtx op0 = expand_normal (arg0);
33358 rtx op1 = expand_normal (arg1);
33359 rtx op2 = expand_normal (arg2);
33360 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33361
33362 tmode0 = insn_data[d->icode].operand[0].mode;
33363 tmode1 = insn_data[d->icode].operand[1].mode;
33364 modev2 = insn_data[d->icode].operand[2].mode;
33365 modev3 = insn_data[d->icode].operand[3].mode;
33366 modeimm = insn_data[d->icode].operand[4].mode;
33367
33368 if (VECTOR_MODE_P (modev2))
33369 op0 = safe_vector_operand (op0, modev2);
33370 if (VECTOR_MODE_P (modev3))
33371 op1 = safe_vector_operand (op1, modev3);
33372
33373 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33374 op0 = copy_to_mode_reg (modev2, op0);
33375 if ((optimize && !register_operand (op1, modev3))
33376 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33377 op1 = copy_to_mode_reg (modev3, op1);
33378
33379 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33380 {
33381 error ("the third argument must be an 8-bit immediate");
33382 return const0_rtx;
33383 }
33384
33385 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33386 {
33387 if (optimize || !target
33388 || GET_MODE (target) != tmode0
33389 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33390 target = gen_reg_rtx (tmode0);
33391
33392 scratch1 = gen_reg_rtx (tmode1);
33393
33394 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33395 }
33396 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33397 {
33398 if (optimize || !target
33399 || GET_MODE (target) != tmode1
33400 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33401 target = gen_reg_rtx (tmode1);
33402
33403 scratch0 = gen_reg_rtx (tmode0);
33404
33405 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33406 }
33407 else
33408 {
33409 gcc_assert (d->flag);
33410
33411 scratch0 = gen_reg_rtx (tmode0);
33412 scratch1 = gen_reg_rtx (tmode1);
33413
33414 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33415 }
33416
33417 if (! pat)
33418 return 0;
33419
33420 emit_insn (pat);
33421
33422 if (d->flag)
33423 {
33424 target = gen_reg_rtx (SImode);
33425 emit_move_insn (target, const0_rtx);
33426 target = gen_rtx_SUBREG (QImode, target, 0);
33427
33428 emit_insn
33429 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33430 gen_rtx_fmt_ee (EQ, QImode,
33431 gen_rtx_REG ((enum machine_mode) d->flag,
33432 FLAGS_REG),
33433 const0_rtx)));
33434 return SUBREG_REG (target);
33435 }
33436 else
33437 return target;
33438 }
33439
33440 /* Subroutine of ix86_expand_builtin to take care of insns with
33441 variable number of operands. */
33442
33443 static rtx
33444 ix86_expand_args_builtin (const struct builtin_description *d,
33445 tree exp, rtx target)
33446 {
33447 rtx pat, real_target;
33448 unsigned int i, nargs;
33449 unsigned int nargs_constant = 0;
33450 unsigned int mask_pos = 0;
33451 int num_memory = 0;
33452 struct
33453 {
33454 rtx op;
33455 enum machine_mode mode;
33456 } args[6];
33457 bool last_arg_count = false;
33458 enum insn_code icode = d->icode;
33459 const struct insn_data_d *insn_p = &insn_data[icode];
33460 enum machine_mode tmode = insn_p->operand[0].mode;
33461 enum machine_mode rmode = VOIDmode;
33462 bool swap = false;
33463 enum rtx_code comparison = d->comparison;
33464
33465 switch ((enum ix86_builtin_func_type) d->flag)
33466 {
33467 case V2DF_FTYPE_V2DF_ROUND:
33468 case V4DF_FTYPE_V4DF_ROUND:
33469 case V4SF_FTYPE_V4SF_ROUND:
33470 case V8SF_FTYPE_V8SF_ROUND:
33471 case V4SI_FTYPE_V4SF_ROUND:
33472 case V8SI_FTYPE_V8SF_ROUND:
33473 return ix86_expand_sse_round (d, exp, target);
33474 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33475 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33476 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33477 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33478 case INT_FTYPE_V8SF_V8SF_PTEST:
33479 case INT_FTYPE_V4DI_V4DI_PTEST:
33480 case INT_FTYPE_V4DF_V4DF_PTEST:
33481 case INT_FTYPE_V4SF_V4SF_PTEST:
33482 case INT_FTYPE_V2DI_V2DI_PTEST:
33483 case INT_FTYPE_V2DF_V2DF_PTEST:
33484 return ix86_expand_sse_ptest (d, exp, target);
33485 case FLOAT128_FTYPE_FLOAT128:
33486 case FLOAT_FTYPE_FLOAT:
33487 case INT_FTYPE_INT:
33488 case UINT64_FTYPE_INT:
33489 case UINT16_FTYPE_UINT16:
33490 case INT64_FTYPE_INT64:
33491 case INT64_FTYPE_V4SF:
33492 case INT64_FTYPE_V2DF:
33493 case INT_FTYPE_V16QI:
33494 case INT_FTYPE_V8QI:
33495 case INT_FTYPE_V8SF:
33496 case INT_FTYPE_V4DF:
33497 case INT_FTYPE_V4SF:
33498 case INT_FTYPE_V2DF:
33499 case INT_FTYPE_V32QI:
33500 case V16QI_FTYPE_V16QI:
33501 case V8SI_FTYPE_V8SF:
33502 case V8SI_FTYPE_V4SI:
33503 case V8HI_FTYPE_V8HI:
33504 case V8HI_FTYPE_V16QI:
33505 case V8QI_FTYPE_V8QI:
33506 case V8SF_FTYPE_V8SF:
33507 case V8SF_FTYPE_V8SI:
33508 case V8SF_FTYPE_V4SF:
33509 case V8SF_FTYPE_V8HI:
33510 case V4SI_FTYPE_V4SI:
33511 case V4SI_FTYPE_V16QI:
33512 case V4SI_FTYPE_V4SF:
33513 case V4SI_FTYPE_V8SI:
33514 case V4SI_FTYPE_V8HI:
33515 case V4SI_FTYPE_V4DF:
33516 case V4SI_FTYPE_V2DF:
33517 case V4HI_FTYPE_V4HI:
33518 case V4DF_FTYPE_V4DF:
33519 case V4DF_FTYPE_V4SI:
33520 case V4DF_FTYPE_V4SF:
33521 case V4DF_FTYPE_V2DF:
33522 case V4SF_FTYPE_V4SF:
33523 case V4SF_FTYPE_V4SI:
33524 case V4SF_FTYPE_V8SF:
33525 case V4SF_FTYPE_V4DF:
33526 case V4SF_FTYPE_V8HI:
33527 case V4SF_FTYPE_V2DF:
33528 case V2DI_FTYPE_V2DI:
33529 case V2DI_FTYPE_V16QI:
33530 case V2DI_FTYPE_V8HI:
33531 case V2DI_FTYPE_V4SI:
33532 case V2DF_FTYPE_V2DF:
33533 case V2DF_FTYPE_V4SI:
33534 case V2DF_FTYPE_V4DF:
33535 case V2DF_FTYPE_V4SF:
33536 case V2DF_FTYPE_V2SI:
33537 case V2SI_FTYPE_V2SI:
33538 case V2SI_FTYPE_V4SF:
33539 case V2SI_FTYPE_V2SF:
33540 case V2SI_FTYPE_V2DF:
33541 case V2SF_FTYPE_V2SF:
33542 case V2SF_FTYPE_V2SI:
33543 case V32QI_FTYPE_V32QI:
33544 case V32QI_FTYPE_V16QI:
33545 case V16HI_FTYPE_V16HI:
33546 case V16HI_FTYPE_V8HI:
33547 case V8SI_FTYPE_V8SI:
33548 case V16HI_FTYPE_V16QI:
33549 case V8SI_FTYPE_V16QI:
33550 case V4DI_FTYPE_V16QI:
33551 case V8SI_FTYPE_V8HI:
33552 case V4DI_FTYPE_V8HI:
33553 case V4DI_FTYPE_V4SI:
33554 case V4DI_FTYPE_V2DI:
33555 case HI_FTYPE_HI:
33556 case UINT_FTYPE_V2DF:
33557 case UINT_FTYPE_V4SF:
33558 case UINT64_FTYPE_V2DF:
33559 case UINT64_FTYPE_V4SF:
33560 case V16QI_FTYPE_V8DI:
33561 case V16HI_FTYPE_V16SI:
33562 case V16SI_FTYPE_HI:
33563 case V16SI_FTYPE_V16SI:
33564 case V16SI_FTYPE_INT:
33565 case V16SF_FTYPE_FLOAT:
33566 case V16SF_FTYPE_V4SF:
33567 case V16SF_FTYPE_V16SF:
33568 case V8HI_FTYPE_V8DI:
33569 case V8UHI_FTYPE_V8UHI:
33570 case V8SI_FTYPE_V8DI:
33571 case V8USI_FTYPE_V8USI:
33572 case V8SF_FTYPE_V8DF:
33573 case V8DI_FTYPE_QI:
33574 case V8DI_FTYPE_INT64:
33575 case V8DI_FTYPE_V4DI:
33576 case V8DI_FTYPE_V8DI:
33577 case V8DF_FTYPE_DOUBLE:
33578 case V8DF_FTYPE_V4DF:
33579 case V8DF_FTYPE_V8DF:
33580 case V8DF_FTYPE_V8SI:
33581 nargs = 1;
33582 break;
33583 case V4SF_FTYPE_V4SF_VEC_MERGE:
33584 case V2DF_FTYPE_V2DF_VEC_MERGE:
33585 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33586 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33587 case V16QI_FTYPE_V16QI_V16QI:
33588 case V16QI_FTYPE_V8HI_V8HI:
33589 case V16SI_FTYPE_V16SI_V16SI:
33590 case V16SF_FTYPE_V16SF_V16SF:
33591 case V16SF_FTYPE_V16SF_V16SI:
33592 case V8QI_FTYPE_V8QI_V8QI:
33593 case V8QI_FTYPE_V4HI_V4HI:
33594 case V8HI_FTYPE_V8HI_V8HI:
33595 case V8HI_FTYPE_V16QI_V16QI:
33596 case V8HI_FTYPE_V4SI_V4SI:
33597 case V8SF_FTYPE_V8SF_V8SF:
33598 case V8SF_FTYPE_V8SF_V8SI:
33599 case V8DI_FTYPE_V8DI_V8DI:
33600 case V8DF_FTYPE_V8DF_V8DF:
33601 case V8DF_FTYPE_V8DF_V8DI:
33602 case V4SI_FTYPE_V4SI_V4SI:
33603 case V4SI_FTYPE_V8HI_V8HI:
33604 case V4SI_FTYPE_V4SF_V4SF:
33605 case V4SI_FTYPE_V2DF_V2DF:
33606 case V4HI_FTYPE_V4HI_V4HI:
33607 case V4HI_FTYPE_V8QI_V8QI:
33608 case V4HI_FTYPE_V2SI_V2SI:
33609 case V4DF_FTYPE_V4DF_V4DF:
33610 case V4DF_FTYPE_V4DF_V4DI:
33611 case V4SF_FTYPE_V4SF_V4SF:
33612 case V4SF_FTYPE_V4SF_V4SI:
33613 case V4SF_FTYPE_V4SF_V2SI:
33614 case V4SF_FTYPE_V4SF_V2DF:
33615 case V4SF_FTYPE_V4SF_UINT:
33616 case V4SF_FTYPE_V4SF_UINT64:
33617 case V4SF_FTYPE_V4SF_DI:
33618 case V4SF_FTYPE_V4SF_SI:
33619 case V2DI_FTYPE_V2DI_V2DI:
33620 case V2DI_FTYPE_V16QI_V16QI:
33621 case V2DI_FTYPE_V4SI_V4SI:
33622 case V2UDI_FTYPE_V4USI_V4USI:
33623 case V2DI_FTYPE_V2DI_V16QI:
33624 case V2DI_FTYPE_V2DF_V2DF:
33625 case V2SI_FTYPE_V2SI_V2SI:
33626 case V2SI_FTYPE_V4HI_V4HI:
33627 case V2SI_FTYPE_V2SF_V2SF:
33628 case V2DF_FTYPE_V2DF_V2DF:
33629 case V2DF_FTYPE_V2DF_V4SF:
33630 case V2DF_FTYPE_V2DF_V2DI:
33631 case V2DF_FTYPE_V2DF_DI:
33632 case V2DF_FTYPE_V2DF_SI:
33633 case V2DF_FTYPE_V2DF_UINT:
33634 case V2DF_FTYPE_V2DF_UINT64:
33635 case V2SF_FTYPE_V2SF_V2SF:
33636 case V1DI_FTYPE_V1DI_V1DI:
33637 case V1DI_FTYPE_V8QI_V8QI:
33638 case V1DI_FTYPE_V2SI_V2SI:
33639 case V32QI_FTYPE_V16HI_V16HI:
33640 case V16HI_FTYPE_V8SI_V8SI:
33641 case V32QI_FTYPE_V32QI_V32QI:
33642 case V16HI_FTYPE_V32QI_V32QI:
33643 case V16HI_FTYPE_V16HI_V16HI:
33644 case V8SI_FTYPE_V4DF_V4DF:
33645 case V8SI_FTYPE_V8SI_V8SI:
33646 case V8SI_FTYPE_V16HI_V16HI:
33647 case V4DI_FTYPE_V4DI_V4DI:
33648 case V4DI_FTYPE_V8SI_V8SI:
33649 case V4UDI_FTYPE_V8USI_V8USI:
33650 case QI_FTYPE_V8DI_V8DI:
33651 case HI_FTYPE_V16SI_V16SI:
33652 if (comparison == UNKNOWN)
33653 return ix86_expand_binop_builtin (icode, exp, target);
33654 nargs = 2;
33655 break;
33656 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33657 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33658 gcc_assert (comparison != UNKNOWN);
33659 nargs = 2;
33660 swap = true;
33661 break;
33662 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33663 case V16HI_FTYPE_V16HI_SI_COUNT:
33664 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33665 case V8SI_FTYPE_V8SI_SI_COUNT:
33666 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33667 case V4DI_FTYPE_V4DI_INT_COUNT:
33668 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33669 case V8HI_FTYPE_V8HI_SI_COUNT:
33670 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33671 case V4SI_FTYPE_V4SI_SI_COUNT:
33672 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33673 case V4HI_FTYPE_V4HI_SI_COUNT:
33674 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33675 case V2DI_FTYPE_V2DI_SI_COUNT:
33676 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33677 case V2SI_FTYPE_V2SI_SI_COUNT:
33678 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33679 case V1DI_FTYPE_V1DI_SI_COUNT:
33680 nargs = 2;
33681 last_arg_count = true;
33682 break;
33683 case UINT64_FTYPE_UINT64_UINT64:
33684 case UINT_FTYPE_UINT_UINT:
33685 case UINT_FTYPE_UINT_USHORT:
33686 case UINT_FTYPE_UINT_UCHAR:
33687 case UINT16_FTYPE_UINT16_INT:
33688 case UINT8_FTYPE_UINT8_INT:
33689 case HI_FTYPE_HI_HI:
33690 case V16SI_FTYPE_V8DF_V8DF:
33691 nargs = 2;
33692 break;
33693 case V2DI_FTYPE_V2DI_INT_CONVERT:
33694 nargs = 2;
33695 rmode = V1TImode;
33696 nargs_constant = 1;
33697 break;
33698 case V4DI_FTYPE_V4DI_INT_CONVERT:
33699 nargs = 2;
33700 rmode = V2TImode;
33701 nargs_constant = 1;
33702 break;
33703 case V8HI_FTYPE_V8HI_INT:
33704 case V8HI_FTYPE_V8SF_INT:
33705 case V16HI_FTYPE_V16SF_INT:
33706 case V8HI_FTYPE_V4SF_INT:
33707 case V8SF_FTYPE_V8SF_INT:
33708 case V4SF_FTYPE_V16SF_INT:
33709 case V16SF_FTYPE_V16SF_INT:
33710 case V4SI_FTYPE_V4SI_INT:
33711 case V4SI_FTYPE_V8SI_INT:
33712 case V4HI_FTYPE_V4HI_INT:
33713 case V4DF_FTYPE_V4DF_INT:
33714 case V4DF_FTYPE_V8DF_INT:
33715 case V4SF_FTYPE_V4SF_INT:
33716 case V4SF_FTYPE_V8SF_INT:
33717 case V2DI_FTYPE_V2DI_INT:
33718 case V2DF_FTYPE_V2DF_INT:
33719 case V2DF_FTYPE_V4DF_INT:
33720 case V16HI_FTYPE_V16HI_INT:
33721 case V8SI_FTYPE_V8SI_INT:
33722 case V16SI_FTYPE_V16SI_INT:
33723 case V4SI_FTYPE_V16SI_INT:
33724 case V4DI_FTYPE_V4DI_INT:
33725 case V2DI_FTYPE_V4DI_INT:
33726 case V4DI_FTYPE_V8DI_INT:
33727 case HI_FTYPE_HI_INT:
33728 nargs = 2;
33729 nargs_constant = 1;
33730 break;
33731 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33732 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33733 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33734 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33735 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33736 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33737 case HI_FTYPE_V16SI_V16SI_HI:
33738 case QI_FTYPE_V8DI_V8DI_QI:
33739 case V16HI_FTYPE_V16SI_V16HI_HI:
33740 case V16QI_FTYPE_V16SI_V16QI_HI:
33741 case V16QI_FTYPE_V8DI_V16QI_QI:
33742 case V16SF_FTYPE_V16SF_V16SF_HI:
33743 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33744 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33745 case V16SF_FTYPE_V16SI_V16SF_HI:
33746 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33747 case V16SF_FTYPE_V4SF_V16SF_HI:
33748 case V16SI_FTYPE_SI_V16SI_HI:
33749 case V16SI_FTYPE_V16HI_V16SI_HI:
33750 case V16SI_FTYPE_V16QI_V16SI_HI:
33751 case V16SI_FTYPE_V16SF_V16SI_HI:
33752 case V16SI_FTYPE_V16SI_V16SI_HI:
33753 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33754 case V16SI_FTYPE_V4SI_V16SI_HI:
33755 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33756 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33757 case V8DF_FTYPE_V2DF_V8DF_QI:
33758 case V8DF_FTYPE_V4DF_V8DF_QI:
33759 case V8DF_FTYPE_V8DF_V8DF_QI:
33760 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33761 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33762 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33763 case V8DF_FTYPE_V8SF_V8DF_QI:
33764 case V8DF_FTYPE_V8SI_V8DF_QI:
33765 case V8DI_FTYPE_DI_V8DI_QI:
33766 case V8DI_FTYPE_V16QI_V8DI_QI:
33767 case V8DI_FTYPE_V2DI_V8DI_QI:
33768 case V8DI_FTYPE_V4DI_V8DI_QI:
33769 case V8DI_FTYPE_V8DI_V8DI_QI:
33770 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33771 case V8DI_FTYPE_V8HI_V8DI_QI:
33772 case V8DI_FTYPE_V8SI_V8DI_QI:
33773 case V8HI_FTYPE_V8DI_V8HI_QI:
33774 case V8SF_FTYPE_V8DF_V8SF_QI:
33775 case V8SI_FTYPE_V8DF_V8SI_QI:
33776 case V8SI_FTYPE_V8DI_V8SI_QI:
33777 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33778 nargs = 3;
33779 break;
33780 case V32QI_FTYPE_V32QI_V32QI_INT:
33781 case V16HI_FTYPE_V16HI_V16HI_INT:
33782 case V16QI_FTYPE_V16QI_V16QI_INT:
33783 case V4DI_FTYPE_V4DI_V4DI_INT:
33784 case V8HI_FTYPE_V8HI_V8HI_INT:
33785 case V8SI_FTYPE_V8SI_V8SI_INT:
33786 case V8SI_FTYPE_V8SI_V4SI_INT:
33787 case V8SF_FTYPE_V8SF_V8SF_INT:
33788 case V8SF_FTYPE_V8SF_V4SF_INT:
33789 case V4SI_FTYPE_V4SI_V4SI_INT:
33790 case V4DF_FTYPE_V4DF_V4DF_INT:
33791 case V16SF_FTYPE_V16SF_V16SF_INT:
33792 case V16SF_FTYPE_V16SF_V4SF_INT:
33793 case V16SI_FTYPE_V16SI_V4SI_INT:
33794 case V4DF_FTYPE_V4DF_V2DF_INT:
33795 case V4SF_FTYPE_V4SF_V4SF_INT:
33796 case V2DI_FTYPE_V2DI_V2DI_INT:
33797 case V4DI_FTYPE_V4DI_V2DI_INT:
33798 case V2DF_FTYPE_V2DF_V2DF_INT:
33799 case QI_FTYPE_V8DI_V8DI_INT:
33800 case QI_FTYPE_V8DF_V8DF_INT:
33801 case QI_FTYPE_V2DF_V2DF_INT:
33802 case QI_FTYPE_V4SF_V4SF_INT:
33803 case HI_FTYPE_V16SI_V16SI_INT:
33804 case HI_FTYPE_V16SF_V16SF_INT:
33805 nargs = 3;
33806 nargs_constant = 1;
33807 break;
33808 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33809 nargs = 3;
33810 rmode = V4DImode;
33811 nargs_constant = 1;
33812 break;
33813 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33814 nargs = 3;
33815 rmode = V2DImode;
33816 nargs_constant = 1;
33817 break;
33818 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33819 nargs = 3;
33820 rmode = DImode;
33821 nargs_constant = 1;
33822 break;
33823 case V2DI_FTYPE_V2DI_UINT_UINT:
33824 nargs = 3;
33825 nargs_constant = 2;
33826 break;
33827 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33828 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33829 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33830 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33831 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33832 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33833 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33834 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33835 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33836 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33837 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33838 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33839 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33840 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33841 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33842 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33843 nargs = 4;
33844 break;
33845 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33846 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33847 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33848 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33849 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33850 nargs = 4;
33851 nargs_constant = 1;
33852 break;
33853 case QI_FTYPE_V2DF_V2DF_INT_QI:
33854 case QI_FTYPE_V4SF_V4SF_INT_QI:
33855 nargs = 4;
33856 mask_pos = 1;
33857 nargs_constant = 1;
33858 break;
33859 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33860 nargs = 4;
33861 nargs_constant = 2;
33862 break;
33863 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33864 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33865 nargs = 4;
33866 break;
33867 case QI_FTYPE_V8DI_V8DI_INT_QI:
33868 case HI_FTYPE_V16SI_V16SI_INT_HI:
33869 case QI_FTYPE_V8DF_V8DF_INT_QI:
33870 case HI_FTYPE_V16SF_V16SF_INT_HI:
33871 mask_pos = 1;
33872 nargs = 4;
33873 nargs_constant = 1;
33874 break;
33875 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33876 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33877 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33878 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33879 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33880 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33881 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33882 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33883 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33884 nargs = 4;
33885 mask_pos = 2;
33886 nargs_constant = 1;
33887 break;
33888 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33889 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33890 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33891 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33892 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33893 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33894 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33895 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33896 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33897 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33898 nargs = 5;
33899 mask_pos = 2;
33900 nargs_constant = 1;
33901 break;
33902 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33903 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33904 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33905 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33906 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33907 nargs = 5;
33908 mask_pos = 1;
33909 nargs_constant = 1;
33910 break;
33911
33912 default:
33913 gcc_unreachable ();
33914 }
33915
33916 gcc_assert (nargs <= ARRAY_SIZE (args));
33917
33918 if (comparison != UNKNOWN)
33919 {
33920 gcc_assert (nargs == 2);
33921 return ix86_expand_sse_compare (d, exp, target, swap);
33922 }
33923
33924 if (rmode == VOIDmode || rmode == tmode)
33925 {
33926 if (optimize
33927 || target == 0
33928 || GET_MODE (target) != tmode
33929 || !insn_p->operand[0].predicate (target, tmode))
33930 target = gen_reg_rtx (tmode);
33931 real_target = target;
33932 }
33933 else
33934 {
33935 real_target = gen_reg_rtx (tmode);
33936 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
33937 }
33938
33939 for (i = 0; i < nargs; i++)
33940 {
33941 tree arg = CALL_EXPR_ARG (exp, i);
33942 rtx op = expand_normal (arg);
33943 enum machine_mode mode = insn_p->operand[i + 1].mode;
33944 bool match = insn_p->operand[i + 1].predicate (op, mode);
33945
33946 if (last_arg_count && (i + 1) == nargs)
33947 {
33948 /* SIMD shift insns take either an 8-bit immediate or
33949 register as count. But builtin functions take int as
33950 count. If count doesn't match, we put it in register. */
33951 if (!match)
33952 {
33953 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
33954 if (!insn_p->operand[i + 1].predicate (op, mode))
33955 op = copy_to_reg (op);
33956 }
33957 }
33958 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
33959 (!mask_pos && (nargs - i) <= nargs_constant))
33960 {
33961 if (!match)
33962 switch (icode)
33963 {
33964 case CODE_FOR_avx2_inserti128:
33965 case CODE_FOR_avx2_extracti128:
33966 error ("the last argument must be an 1-bit immediate");
33967 return const0_rtx;
33968
33969 case CODE_FOR_avx512f_cmpv8di3_mask:
33970 case CODE_FOR_avx512f_cmpv16si3_mask:
33971 case CODE_FOR_avx512f_ucmpv8di3_mask:
33972 case CODE_FOR_avx512f_ucmpv16si3_mask:
33973 error ("the last argument must be a 3-bit immediate");
33974 return const0_rtx;
33975
33976 case CODE_FOR_sse4_1_roundsd:
33977 case CODE_FOR_sse4_1_roundss:
33978
33979 case CODE_FOR_sse4_1_roundpd:
33980 case CODE_FOR_sse4_1_roundps:
33981 case CODE_FOR_avx_roundpd256:
33982 case CODE_FOR_avx_roundps256:
33983
33984 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
33985 case CODE_FOR_sse4_1_roundps_sfix:
33986 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
33987 case CODE_FOR_avx_roundps_sfix256:
33988
33989 case CODE_FOR_sse4_1_blendps:
33990 case CODE_FOR_avx_blendpd256:
33991 case CODE_FOR_avx_vpermilv4df:
33992 case CODE_FOR_avx512f_getmantv8df_mask:
33993 case CODE_FOR_avx512f_getmantv16sf_mask:
33994 error ("the last argument must be a 4-bit immediate");
33995 return const0_rtx;
33996
33997 case CODE_FOR_sha1rnds4:
33998 case CODE_FOR_sse4_1_blendpd:
33999 case CODE_FOR_avx_vpermilv2df:
34000 case CODE_FOR_xop_vpermil2v2df3:
34001 case CODE_FOR_xop_vpermil2v4sf3:
34002 case CODE_FOR_xop_vpermil2v4df3:
34003 case CODE_FOR_xop_vpermil2v8sf3:
34004 case CODE_FOR_avx512f_vinsertf32x4_mask:
34005 case CODE_FOR_avx512f_vinserti32x4_mask:
34006 case CODE_FOR_avx512f_vextractf32x4_mask:
34007 case CODE_FOR_avx512f_vextracti32x4_mask:
34008 error ("the last argument must be a 2-bit immediate");
34009 return const0_rtx;
34010
34011 case CODE_FOR_avx_vextractf128v4df:
34012 case CODE_FOR_avx_vextractf128v8sf:
34013 case CODE_FOR_avx_vextractf128v8si:
34014 case CODE_FOR_avx_vinsertf128v4df:
34015 case CODE_FOR_avx_vinsertf128v8sf:
34016 case CODE_FOR_avx_vinsertf128v8si:
34017 case CODE_FOR_avx512f_vinsertf64x4_mask:
34018 case CODE_FOR_avx512f_vinserti64x4_mask:
34019 case CODE_FOR_avx512f_vextractf64x4_mask:
34020 case CODE_FOR_avx512f_vextracti64x4_mask:
34021 error ("the last argument must be a 1-bit immediate");
34022 return const0_rtx;
34023
34024 case CODE_FOR_avx_vmcmpv2df3:
34025 case CODE_FOR_avx_vmcmpv4sf3:
34026 case CODE_FOR_avx_cmpv2df3:
34027 case CODE_FOR_avx_cmpv4sf3:
34028 case CODE_FOR_avx_cmpv4df3:
34029 case CODE_FOR_avx_cmpv8sf3:
34030 case CODE_FOR_avx512f_cmpv8df3_mask:
34031 case CODE_FOR_avx512f_cmpv16sf3_mask:
34032 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34033 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34034 error ("the last argument must be a 5-bit immediate");
34035 return const0_rtx;
34036
34037 default:
34038 switch (nargs_constant)
34039 {
34040 case 2:
34041 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34042 (!mask_pos && (nargs - i) == nargs_constant))
34043 {
34044 error ("the next to last argument must be an 8-bit immediate");
34045 break;
34046 }
34047 case 1:
34048 error ("the last argument must be an 8-bit immediate");
34049 break;
34050 default:
34051 gcc_unreachable ();
34052 }
34053 return const0_rtx;
34054 }
34055 }
34056 else
34057 {
34058 if (VECTOR_MODE_P (mode))
34059 op = safe_vector_operand (op, mode);
34060
34061 /* If we aren't optimizing, only allow one memory operand to
34062 be generated. */
34063 if (memory_operand (op, mode))
34064 num_memory++;
34065
34066 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34067 {
34068 if (optimize || !match || num_memory > 1)
34069 op = copy_to_mode_reg (mode, op);
34070 }
34071 else
34072 {
34073 op = copy_to_reg (op);
34074 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34075 }
34076 }
34077
34078 args[i].op = op;
34079 args[i].mode = mode;
34080 }
34081
34082 switch (nargs)
34083 {
34084 case 1:
34085 pat = GEN_FCN (icode) (real_target, args[0].op);
34086 break;
34087 case 2:
34088 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34089 break;
34090 case 3:
34091 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34092 args[2].op);
34093 break;
34094 case 4:
34095 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34096 args[2].op, args[3].op);
34097 break;
34098 case 5:
34099 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34100 args[2].op, args[3].op, args[4].op);
34101 case 6:
34102 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34103 args[2].op, args[3].op, args[4].op,
34104 args[5].op);
34105 break;
34106 default:
34107 gcc_unreachable ();
34108 }
34109
34110 if (! pat)
34111 return 0;
34112
34113 emit_insn (pat);
34114 return target;
34115 }
34116
34117 /* Transform pattern of following layout:
34118 (parallel [
34119 set (A B)
34120 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34121 ])
34122 into:
34123 (set (A B))
34124
34125 Or:
34126 (parallel [ A B
34127 ...
34128 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34129 ...
34130 ])
34131 into:
34132 (parallel [ A B ... ]) */
34133
34134 static rtx
34135 ix86_erase_embedded_rounding (rtx pat)
34136 {
34137 if (GET_CODE (pat) == INSN)
34138 pat = PATTERN (pat);
34139
34140 gcc_assert (GET_CODE (pat) == PARALLEL);
34141
34142 if (XVECLEN (pat, 0) == 2)
34143 {
34144 rtx p0 = XVECEXP (pat, 0, 0);
34145 rtx p1 = XVECEXP (pat, 0, 1);
34146
34147 gcc_assert (GET_CODE (p0) == SET
34148 && GET_CODE (p1) == UNSPEC
34149 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34150
34151 return p0;
34152 }
34153 else
34154 {
34155 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34156 int i = 0;
34157 int j = 0;
34158
34159 for (; i < XVECLEN (pat, 0); ++i)
34160 {
34161 rtx elem = XVECEXP (pat, 0, i);
34162 if (GET_CODE (elem) != UNSPEC
34163 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34164 res [j++] = elem;
34165 }
34166
34167 /* No more than 1 occurence was removed. */
34168 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34169
34170 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34171 }
34172 }
34173
34174 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34175 with rounding. */
34176 static rtx
34177 ix86_expand_sse_comi_round (const struct builtin_description *d,
34178 tree exp, rtx target)
34179 {
34180 rtx pat, set_dst;
34181 tree arg0 = CALL_EXPR_ARG (exp, 0);
34182 tree arg1 = CALL_EXPR_ARG (exp, 1);
34183 tree arg2 = CALL_EXPR_ARG (exp, 2);
34184 tree arg3 = CALL_EXPR_ARG (exp, 3);
34185 rtx op0 = expand_normal (arg0);
34186 rtx op1 = expand_normal (arg1);
34187 rtx op2 = expand_normal (arg2);
34188 rtx op3 = expand_normal (arg3);
34189 enum insn_code icode = d->icode;
34190 const struct insn_data_d *insn_p = &insn_data[icode];
34191 enum machine_mode mode0 = insn_p->operand[0].mode;
34192 enum machine_mode mode1 = insn_p->operand[1].mode;
34193 enum rtx_code comparison = UNEQ;
34194 bool need_ucomi = false;
34195
34196 /* See avxintrin.h for values. */
34197 enum rtx_code comi_comparisons[32] =
34198 {
34199 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34200 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34201 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34202 };
34203 bool need_ucomi_values[32] =
34204 {
34205 true, false, false, true, true, false, false, true,
34206 true, false, false, true, true, false, false, true,
34207 false, true, true, false, false, true, true, false,
34208 false, true, true, false, false, true, true, false
34209 };
34210
34211 if (!CONST_INT_P (op2))
34212 {
34213 error ("the third argument must be comparison constant");
34214 return const0_rtx;
34215 }
34216 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34217 {
34218 error ("incorect comparison mode");
34219 return const0_rtx;
34220 }
34221
34222 if (!insn_p->operand[2].predicate (op3, SImode))
34223 {
34224 error ("incorrect rounding operand");
34225 return const0_rtx;
34226 }
34227
34228 comparison = comi_comparisons[INTVAL (op2)];
34229 need_ucomi = need_ucomi_values[INTVAL (op2)];
34230
34231 if (VECTOR_MODE_P (mode0))
34232 op0 = safe_vector_operand (op0, mode0);
34233 if (VECTOR_MODE_P (mode1))
34234 op1 = safe_vector_operand (op1, mode1);
34235
34236 target = gen_reg_rtx (SImode);
34237 emit_move_insn (target, const0_rtx);
34238 target = gen_rtx_SUBREG (QImode, target, 0);
34239
34240 if ((optimize && !register_operand (op0, mode0))
34241 || !insn_p->operand[0].predicate (op0, mode0))
34242 op0 = copy_to_mode_reg (mode0, op0);
34243 if ((optimize && !register_operand (op1, mode1))
34244 || !insn_p->operand[1].predicate (op1, mode1))
34245 op1 = copy_to_mode_reg (mode1, op1);
34246
34247 if (need_ucomi)
34248 icode = icode == CODE_FOR_sse_comi_round
34249 ? CODE_FOR_sse_ucomi_round
34250 : CODE_FOR_sse2_ucomi_round;
34251
34252 pat = GEN_FCN (icode) (op0, op1, op3);
34253 if (! pat)
34254 return 0;
34255
34256 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34257 if (INTVAL (op3) == NO_ROUND)
34258 {
34259 pat = ix86_erase_embedded_rounding (pat);
34260 if (! pat)
34261 return 0;
34262
34263 set_dst = SET_DEST (pat);
34264 }
34265 else
34266 {
34267 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34268 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34269 }
34270
34271 emit_insn (pat);
34272 emit_insn (gen_rtx_SET (VOIDmode,
34273 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34274 gen_rtx_fmt_ee (comparison, QImode,
34275 set_dst,
34276 const0_rtx)));
34277
34278 return SUBREG_REG (target);
34279 }
34280
34281 static rtx
34282 ix86_expand_round_builtin (const struct builtin_description *d,
34283 tree exp, rtx target)
34284 {
34285 rtx pat;
34286 unsigned int i, nargs;
34287 struct
34288 {
34289 rtx op;
34290 enum machine_mode mode;
34291 } args[6];
34292 enum insn_code icode = d->icode;
34293 const struct insn_data_d *insn_p = &insn_data[icode];
34294 enum machine_mode tmode = insn_p->operand[0].mode;
34295 unsigned int nargs_constant = 0;
34296 unsigned int redundant_embed_rnd = 0;
34297
34298 switch ((enum ix86_builtin_func_type) d->flag)
34299 {
34300 case UINT64_FTYPE_V2DF_INT:
34301 case UINT64_FTYPE_V4SF_INT:
34302 case UINT_FTYPE_V2DF_INT:
34303 case UINT_FTYPE_V4SF_INT:
34304 case INT64_FTYPE_V2DF_INT:
34305 case INT64_FTYPE_V4SF_INT:
34306 case INT_FTYPE_V2DF_INT:
34307 case INT_FTYPE_V4SF_INT:
34308 nargs = 2;
34309 break;
34310 case V4SF_FTYPE_V4SF_UINT_INT:
34311 case V4SF_FTYPE_V4SF_UINT64_INT:
34312 case V2DF_FTYPE_V2DF_UINT64_INT:
34313 case V4SF_FTYPE_V4SF_INT_INT:
34314 case V4SF_FTYPE_V4SF_INT64_INT:
34315 case V2DF_FTYPE_V2DF_INT64_INT:
34316 case V4SF_FTYPE_V4SF_V4SF_INT:
34317 case V2DF_FTYPE_V2DF_V2DF_INT:
34318 case V4SF_FTYPE_V4SF_V2DF_INT:
34319 case V2DF_FTYPE_V2DF_V4SF_INT:
34320 nargs = 3;
34321 break;
34322 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34323 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34324 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34325 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34326 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34327 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34328 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34329 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34330 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34331 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34332 nargs = 4;
34333 break;
34334 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34335 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34336 nargs_constant = 2;
34337 nargs = 4;
34338 break;
34339 case INT_FTYPE_V4SF_V4SF_INT_INT:
34340 case INT_FTYPE_V2DF_V2DF_INT_INT:
34341 return ix86_expand_sse_comi_round (d, exp, target);
34342 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34343 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34344 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34345 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34346 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34347 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34348 nargs = 5;
34349 break;
34350 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34351 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34352 nargs_constant = 4;
34353 nargs = 5;
34354 break;
34355 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34356 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34357 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34358 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34359 nargs_constant = 3;
34360 nargs = 5;
34361 break;
34362 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34363 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34364 nargs = 6;
34365 nargs_constant = 4;
34366 break;
34367 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34368 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34369 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34370 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34371 nargs = 6;
34372 nargs_constant = 3;
34373 break;
34374 default:
34375 gcc_unreachable ();
34376 }
34377 gcc_assert (nargs <= ARRAY_SIZE (args));
34378
34379 if (optimize
34380 || target == 0
34381 || GET_MODE (target) != tmode
34382 || !insn_p->operand[0].predicate (target, tmode))
34383 target = gen_reg_rtx (tmode);
34384
34385 for (i = 0; i < nargs; i++)
34386 {
34387 tree arg = CALL_EXPR_ARG (exp, i);
34388 rtx op = expand_normal (arg);
34389 enum machine_mode mode = insn_p->operand[i + 1].mode;
34390 bool match = insn_p->operand[i + 1].predicate (op, mode);
34391
34392 if (i == nargs - nargs_constant)
34393 {
34394 if (!match)
34395 {
34396 switch (icode)
34397 {
34398 case CODE_FOR_avx512f_getmantv8df_mask_round:
34399 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34400 case CODE_FOR_avx512f_getmantv2df_round:
34401 case CODE_FOR_avx512f_getmantv4sf_round:
34402 error ("the immediate argument must be a 4-bit immediate");
34403 return const0_rtx;
34404 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34405 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34406 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34407 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34408 error ("the immediate argument must be a 5-bit immediate");
34409 return const0_rtx;
34410 default:
34411 error ("the immediate argument must be an 8-bit immediate");
34412 return const0_rtx;
34413 }
34414 }
34415 }
34416 else if (i == nargs-1)
34417 {
34418 if (!insn_p->operand[nargs].predicate (op, SImode))
34419 {
34420 error ("incorrect rounding operand");
34421 return const0_rtx;
34422 }
34423
34424 /* If there is no rounding use normal version of the pattern. */
34425 if (INTVAL (op) == NO_ROUND)
34426 redundant_embed_rnd = 1;
34427 }
34428 else
34429 {
34430 if (VECTOR_MODE_P (mode))
34431 op = safe_vector_operand (op, mode);
34432
34433 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34434 {
34435 if (optimize || !match)
34436 op = copy_to_mode_reg (mode, op);
34437 }
34438 else
34439 {
34440 op = copy_to_reg (op);
34441 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34442 }
34443 }
34444
34445 args[i].op = op;
34446 args[i].mode = mode;
34447 }
34448
34449 switch (nargs)
34450 {
34451 case 1:
34452 pat = GEN_FCN (icode) (target, args[0].op);
34453 break;
34454 case 2:
34455 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34456 break;
34457 case 3:
34458 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34459 args[2].op);
34460 break;
34461 case 4:
34462 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34463 args[2].op, args[3].op);
34464 break;
34465 case 5:
34466 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34467 args[2].op, args[3].op, args[4].op);
34468 case 6:
34469 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34470 args[2].op, args[3].op, args[4].op,
34471 args[5].op);
34472 break;
34473 default:
34474 gcc_unreachable ();
34475 }
34476
34477 if (!pat)
34478 return 0;
34479
34480 if (redundant_embed_rnd)
34481 pat = ix86_erase_embedded_rounding (pat);
34482
34483 emit_insn (pat);
34484 return target;
34485 }
34486
34487 /* Subroutine of ix86_expand_builtin to take care of special insns
34488 with variable number of operands. */
34489
34490 static rtx
34491 ix86_expand_special_args_builtin (const struct builtin_description *d,
34492 tree exp, rtx target)
34493 {
34494 tree arg;
34495 rtx pat, op;
34496 unsigned int i, nargs, arg_adjust, memory;
34497 bool aligned_mem = false;
34498 struct
34499 {
34500 rtx op;
34501 enum machine_mode mode;
34502 } args[3];
34503 enum insn_code icode = d->icode;
34504 bool last_arg_constant = false;
34505 const struct insn_data_d *insn_p = &insn_data[icode];
34506 enum machine_mode tmode = insn_p->operand[0].mode;
34507 enum { load, store } klass;
34508
34509 switch ((enum ix86_builtin_func_type) d->flag)
34510 {
34511 case VOID_FTYPE_VOID:
34512 emit_insn (GEN_FCN (icode) (target));
34513 return 0;
34514 case VOID_FTYPE_UINT64:
34515 case VOID_FTYPE_UNSIGNED:
34516 nargs = 0;
34517 klass = store;
34518 memory = 0;
34519 break;
34520
34521 case INT_FTYPE_VOID:
34522 case UINT64_FTYPE_VOID:
34523 case UNSIGNED_FTYPE_VOID:
34524 nargs = 0;
34525 klass = load;
34526 memory = 0;
34527 break;
34528 case UINT64_FTYPE_PUNSIGNED:
34529 case V2DI_FTYPE_PV2DI:
34530 case V4DI_FTYPE_PV4DI:
34531 case V32QI_FTYPE_PCCHAR:
34532 case V16QI_FTYPE_PCCHAR:
34533 case V8SF_FTYPE_PCV4SF:
34534 case V8SF_FTYPE_PCFLOAT:
34535 case V4SF_FTYPE_PCFLOAT:
34536 case V4DF_FTYPE_PCV2DF:
34537 case V4DF_FTYPE_PCDOUBLE:
34538 case V2DF_FTYPE_PCDOUBLE:
34539 case VOID_FTYPE_PVOID:
34540 case V16SI_FTYPE_PV4SI:
34541 case V16SF_FTYPE_PV4SF:
34542 case V8DI_FTYPE_PV4DI:
34543 case V8DI_FTYPE_PV8DI:
34544 case V8DF_FTYPE_PV4DF:
34545 nargs = 1;
34546 klass = load;
34547 memory = 0;
34548 switch (icode)
34549 {
34550 case CODE_FOR_sse4_1_movntdqa:
34551 case CODE_FOR_avx2_movntdqa:
34552 case CODE_FOR_avx512f_movntdqa:
34553 aligned_mem = true;
34554 break;
34555 default:
34556 break;
34557 }
34558 break;
34559 case VOID_FTYPE_PV2SF_V4SF:
34560 case VOID_FTYPE_PV8DI_V8DI:
34561 case VOID_FTYPE_PV4DI_V4DI:
34562 case VOID_FTYPE_PV2DI_V2DI:
34563 case VOID_FTYPE_PCHAR_V32QI:
34564 case VOID_FTYPE_PCHAR_V16QI:
34565 case VOID_FTYPE_PFLOAT_V16SF:
34566 case VOID_FTYPE_PFLOAT_V8SF:
34567 case VOID_FTYPE_PFLOAT_V4SF:
34568 case VOID_FTYPE_PDOUBLE_V8DF:
34569 case VOID_FTYPE_PDOUBLE_V4DF:
34570 case VOID_FTYPE_PDOUBLE_V2DF:
34571 case VOID_FTYPE_PLONGLONG_LONGLONG:
34572 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34573 case VOID_FTYPE_PINT_INT:
34574 nargs = 1;
34575 klass = store;
34576 /* Reserve memory operand for target. */
34577 memory = ARRAY_SIZE (args);
34578 switch (icode)
34579 {
34580 /* These builtins and instructions require the memory
34581 to be properly aligned. */
34582 case CODE_FOR_avx_movntv4di:
34583 case CODE_FOR_sse2_movntv2di:
34584 case CODE_FOR_avx_movntv8sf:
34585 case CODE_FOR_sse_movntv4sf:
34586 case CODE_FOR_sse4a_vmmovntv4sf:
34587 case CODE_FOR_avx_movntv4df:
34588 case CODE_FOR_sse2_movntv2df:
34589 case CODE_FOR_sse4a_vmmovntv2df:
34590 case CODE_FOR_sse2_movntidi:
34591 case CODE_FOR_sse_movntq:
34592 case CODE_FOR_sse2_movntisi:
34593 case CODE_FOR_avx512f_movntv16sf:
34594 case CODE_FOR_avx512f_movntv8df:
34595 case CODE_FOR_avx512f_movntv8di:
34596 aligned_mem = true;
34597 break;
34598 default:
34599 break;
34600 }
34601 break;
34602 case V4SF_FTYPE_V4SF_PCV2SF:
34603 case V2DF_FTYPE_V2DF_PCDOUBLE:
34604 nargs = 2;
34605 klass = load;
34606 memory = 1;
34607 break;
34608 case V8SF_FTYPE_PCV8SF_V8SI:
34609 case V4DF_FTYPE_PCV4DF_V4DI:
34610 case V4SF_FTYPE_PCV4SF_V4SI:
34611 case V2DF_FTYPE_PCV2DF_V2DI:
34612 case V8SI_FTYPE_PCV8SI_V8SI:
34613 case V4DI_FTYPE_PCV4DI_V4DI:
34614 case V4SI_FTYPE_PCV4SI_V4SI:
34615 case V2DI_FTYPE_PCV2DI_V2DI:
34616 nargs = 2;
34617 klass = load;
34618 memory = 0;
34619 break;
34620 case VOID_FTYPE_PV8DF_V8DF_QI:
34621 case VOID_FTYPE_PV16SF_V16SF_HI:
34622 case VOID_FTYPE_PV8DI_V8DI_QI:
34623 case VOID_FTYPE_PV16SI_V16SI_HI:
34624 switch (icode)
34625 {
34626 /* These builtins and instructions require the memory
34627 to be properly aligned. */
34628 case CODE_FOR_avx512f_storev16sf_mask:
34629 case CODE_FOR_avx512f_storev16si_mask:
34630 case CODE_FOR_avx512f_storev8df_mask:
34631 case CODE_FOR_avx512f_storev8di_mask:
34632 aligned_mem = true;
34633 break;
34634 default:
34635 break;
34636 }
34637 /* FALLTHRU */
34638 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34639 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34640 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34641 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34642 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34643 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34644 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34645 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34646 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34647 case VOID_FTYPE_PFLOAT_V4SF_QI:
34648 case VOID_FTYPE_PV8SI_V8DI_QI:
34649 case VOID_FTYPE_PV8HI_V8DI_QI:
34650 case VOID_FTYPE_PV16HI_V16SI_HI:
34651 case VOID_FTYPE_PV16QI_V8DI_QI:
34652 case VOID_FTYPE_PV16QI_V16SI_HI:
34653 nargs = 2;
34654 klass = store;
34655 /* Reserve memory operand for target. */
34656 memory = ARRAY_SIZE (args);
34657 break;
34658 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34659 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34660 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34661 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34662 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34663 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34664 nargs = 3;
34665 klass = load;
34666 memory = 0;
34667 switch (icode)
34668 {
34669 /* These builtins and instructions require the memory
34670 to be properly aligned. */
34671 case CODE_FOR_avx512f_loadv16sf_mask:
34672 case CODE_FOR_avx512f_loadv16si_mask:
34673 case CODE_FOR_avx512f_loadv8df_mask:
34674 case CODE_FOR_avx512f_loadv8di_mask:
34675 aligned_mem = true;
34676 break;
34677 default:
34678 break;
34679 }
34680 break;
34681 case VOID_FTYPE_UINT_UINT_UINT:
34682 case VOID_FTYPE_UINT64_UINT_UINT:
34683 case UCHAR_FTYPE_UINT_UINT_UINT:
34684 case UCHAR_FTYPE_UINT64_UINT_UINT:
34685 nargs = 3;
34686 klass = load;
34687 memory = ARRAY_SIZE (args);
34688 last_arg_constant = true;
34689 break;
34690 default:
34691 gcc_unreachable ();
34692 }
34693
34694 gcc_assert (nargs <= ARRAY_SIZE (args));
34695
34696 if (klass == store)
34697 {
34698 arg = CALL_EXPR_ARG (exp, 0);
34699 op = expand_normal (arg);
34700 gcc_assert (target == 0);
34701 if (memory)
34702 {
34703 op = ix86_zero_extend_to_Pmode (op);
34704 target = gen_rtx_MEM (tmode, op);
34705 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34706 on it. Try to improve it using get_pointer_alignment,
34707 and if the special builtin is one that requires strict
34708 mode alignment, also from it's GET_MODE_ALIGNMENT.
34709 Failure to do so could lead to ix86_legitimate_combined_insn
34710 rejecting all changes to such insns. */
34711 unsigned int align = get_pointer_alignment (arg);
34712 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34713 align = GET_MODE_ALIGNMENT (tmode);
34714 if (MEM_ALIGN (target) < align)
34715 set_mem_align (target, align);
34716 }
34717 else
34718 target = force_reg (tmode, op);
34719 arg_adjust = 1;
34720 }
34721 else
34722 {
34723 arg_adjust = 0;
34724 if (optimize
34725 || target == 0
34726 || !register_operand (target, tmode)
34727 || GET_MODE (target) != tmode)
34728 target = gen_reg_rtx (tmode);
34729 }
34730
34731 for (i = 0; i < nargs; i++)
34732 {
34733 enum machine_mode mode = insn_p->operand[i + 1].mode;
34734 bool match;
34735
34736 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34737 op = expand_normal (arg);
34738 match = insn_p->operand[i + 1].predicate (op, mode);
34739
34740 if (last_arg_constant && (i + 1) == nargs)
34741 {
34742 if (!match)
34743 {
34744 if (icode == CODE_FOR_lwp_lwpvalsi3
34745 || icode == CODE_FOR_lwp_lwpinssi3
34746 || icode == CODE_FOR_lwp_lwpvaldi3
34747 || icode == CODE_FOR_lwp_lwpinsdi3)
34748 error ("the last argument must be a 32-bit immediate");
34749 else
34750 error ("the last argument must be an 8-bit immediate");
34751 return const0_rtx;
34752 }
34753 }
34754 else
34755 {
34756 if (i == memory)
34757 {
34758 /* This must be the memory operand. */
34759 op = ix86_zero_extend_to_Pmode (op);
34760 op = gen_rtx_MEM (mode, op);
34761 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34762 on it. Try to improve it using get_pointer_alignment,
34763 and if the special builtin is one that requires strict
34764 mode alignment, also from it's GET_MODE_ALIGNMENT.
34765 Failure to do so could lead to ix86_legitimate_combined_insn
34766 rejecting all changes to such insns. */
34767 unsigned int align = get_pointer_alignment (arg);
34768 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34769 align = GET_MODE_ALIGNMENT (mode);
34770 if (MEM_ALIGN (op) < align)
34771 set_mem_align (op, align);
34772 }
34773 else
34774 {
34775 /* This must be register. */
34776 if (VECTOR_MODE_P (mode))
34777 op = safe_vector_operand (op, mode);
34778
34779 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34780 op = copy_to_mode_reg (mode, op);
34781 else
34782 {
34783 op = copy_to_reg (op);
34784 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34785 }
34786 }
34787 }
34788
34789 args[i].op = op;
34790 args[i].mode = mode;
34791 }
34792
34793 switch (nargs)
34794 {
34795 case 0:
34796 pat = GEN_FCN (icode) (target);
34797 break;
34798 case 1:
34799 pat = GEN_FCN (icode) (target, args[0].op);
34800 break;
34801 case 2:
34802 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34803 break;
34804 case 3:
34805 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34806 break;
34807 default:
34808 gcc_unreachable ();
34809 }
34810
34811 if (! pat)
34812 return 0;
34813 emit_insn (pat);
34814 return klass == store ? 0 : target;
34815 }
34816
34817 /* Return the integer constant in ARG. Constrain it to be in the range
34818 of the subparts of VEC_TYPE; issue an error if not. */
34819
34820 static int
34821 get_element_number (tree vec_type, tree arg)
34822 {
34823 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34824
34825 if (!tree_fits_uhwi_p (arg)
34826 || (elt = tree_to_uhwi (arg), elt > max))
34827 {
34828 error ("selector must be an integer constant in the range 0..%wi", max);
34829 return 0;
34830 }
34831
34832 return elt;
34833 }
34834
34835 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34836 ix86_expand_vector_init. We DO have language-level syntax for this, in
34837 the form of (type){ init-list }. Except that since we can't place emms
34838 instructions from inside the compiler, we can't allow the use of MMX
34839 registers unless the user explicitly asks for it. So we do *not* define
34840 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34841 we have builtins invoked by mmintrin.h that gives us license to emit
34842 these sorts of instructions. */
34843
34844 static rtx
34845 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34846 {
34847 enum machine_mode tmode = TYPE_MODE (type);
34848 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34849 int i, n_elt = GET_MODE_NUNITS (tmode);
34850 rtvec v = rtvec_alloc (n_elt);
34851
34852 gcc_assert (VECTOR_MODE_P (tmode));
34853 gcc_assert (call_expr_nargs (exp) == n_elt);
34854
34855 for (i = 0; i < n_elt; ++i)
34856 {
34857 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34858 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34859 }
34860
34861 if (!target || !register_operand (target, tmode))
34862 target = gen_reg_rtx (tmode);
34863
34864 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34865 return target;
34866 }
34867
34868 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34869 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34870 had a language-level syntax for referencing vector elements. */
34871
34872 static rtx
34873 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34874 {
34875 enum machine_mode tmode, mode0;
34876 tree arg0, arg1;
34877 int elt;
34878 rtx op0;
34879
34880 arg0 = CALL_EXPR_ARG (exp, 0);
34881 arg1 = CALL_EXPR_ARG (exp, 1);
34882
34883 op0 = expand_normal (arg0);
34884 elt = get_element_number (TREE_TYPE (arg0), arg1);
34885
34886 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34887 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34888 gcc_assert (VECTOR_MODE_P (mode0));
34889
34890 op0 = force_reg (mode0, op0);
34891
34892 if (optimize || !target || !register_operand (target, tmode))
34893 target = gen_reg_rtx (tmode);
34894
34895 ix86_expand_vector_extract (true, target, op0, elt);
34896
34897 return target;
34898 }
34899
34900 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34901 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34902 a language-level syntax for referencing vector elements. */
34903
34904 static rtx
34905 ix86_expand_vec_set_builtin (tree exp)
34906 {
34907 enum machine_mode tmode, mode1;
34908 tree arg0, arg1, arg2;
34909 int elt;
34910 rtx op0, op1, target;
34911
34912 arg0 = CALL_EXPR_ARG (exp, 0);
34913 arg1 = CALL_EXPR_ARG (exp, 1);
34914 arg2 = CALL_EXPR_ARG (exp, 2);
34915
34916 tmode = TYPE_MODE (TREE_TYPE (arg0));
34917 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34918 gcc_assert (VECTOR_MODE_P (tmode));
34919
34920 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
34921 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
34922 elt = get_element_number (TREE_TYPE (arg0), arg2);
34923
34924 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
34925 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
34926
34927 op0 = force_reg (tmode, op0);
34928 op1 = force_reg (mode1, op1);
34929
34930 /* OP0 is the source of these builtin functions and shouldn't be
34931 modified. Create a copy, use it and return it as target. */
34932 target = gen_reg_rtx (tmode);
34933 emit_move_insn (target, op0);
34934 ix86_expand_vector_set (true, target, op1, elt);
34935
34936 return target;
34937 }
34938
34939 /* Expand an expression EXP that calls a built-in function,
34940 with result going to TARGET if that's convenient
34941 (and in mode MODE if that's convenient).
34942 SUBTARGET may be used as the target for computing one of EXP's operands.
34943 IGNORE is nonzero if the value is to be ignored. */
34944
34945 static rtx
34946 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
34947 enum machine_mode mode, int ignore)
34948 {
34949 const struct builtin_description *d;
34950 size_t i;
34951 enum insn_code icode;
34952 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
34953 tree arg0, arg1, arg2, arg3, arg4;
34954 rtx op0, op1, op2, op3, op4, pat, insn;
34955 enum machine_mode mode0, mode1, mode2, mode3, mode4;
34956 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
34957
34958 /* For CPU builtins that can be folded, fold first and expand the fold. */
34959 switch (fcode)
34960 {
34961 case IX86_BUILTIN_CPU_INIT:
34962 {
34963 /* Make it call __cpu_indicator_init in libgcc. */
34964 tree call_expr, fndecl, type;
34965 type = build_function_type_list (integer_type_node, NULL_TREE);
34966 fndecl = build_fn_decl ("__cpu_indicator_init", type);
34967 call_expr = build_call_expr (fndecl, 0);
34968 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
34969 }
34970 case IX86_BUILTIN_CPU_IS:
34971 case IX86_BUILTIN_CPU_SUPPORTS:
34972 {
34973 tree arg0 = CALL_EXPR_ARG (exp, 0);
34974 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
34975 gcc_assert (fold_expr != NULL_TREE);
34976 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
34977 }
34978 }
34979
34980 /* Determine whether the builtin function is available under the current ISA.
34981 Originally the builtin was not created if it wasn't applicable to the
34982 current ISA based on the command line switches. With function specific
34983 options, we need to check in the context of the function making the call
34984 whether it is supported. */
34985 if (ix86_builtins_isa[fcode].isa
34986 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
34987 {
34988 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
34989 NULL, (enum fpmath_unit) 0, false);
34990
34991 if (!opts)
34992 error ("%qE needs unknown isa option", fndecl);
34993 else
34994 {
34995 gcc_assert (opts != NULL);
34996 error ("%qE needs isa option %s", fndecl, opts);
34997 free (opts);
34998 }
34999 return const0_rtx;
35000 }
35001
35002 switch (fcode)
35003 {
35004 case IX86_BUILTIN_MASKMOVQ:
35005 case IX86_BUILTIN_MASKMOVDQU:
35006 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35007 ? CODE_FOR_mmx_maskmovq
35008 : CODE_FOR_sse2_maskmovdqu);
35009 /* Note the arg order is different from the operand order. */
35010 arg1 = CALL_EXPR_ARG (exp, 0);
35011 arg2 = CALL_EXPR_ARG (exp, 1);
35012 arg0 = CALL_EXPR_ARG (exp, 2);
35013 op0 = expand_normal (arg0);
35014 op1 = expand_normal (arg1);
35015 op2 = expand_normal (arg2);
35016 mode0 = insn_data[icode].operand[0].mode;
35017 mode1 = insn_data[icode].operand[1].mode;
35018 mode2 = insn_data[icode].operand[2].mode;
35019
35020 op0 = ix86_zero_extend_to_Pmode (op0);
35021 op0 = gen_rtx_MEM (mode1, op0);
35022
35023 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35024 op0 = copy_to_mode_reg (mode0, op0);
35025 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35026 op1 = copy_to_mode_reg (mode1, op1);
35027 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35028 op2 = copy_to_mode_reg (mode2, op2);
35029 pat = GEN_FCN (icode) (op0, op1, op2);
35030 if (! pat)
35031 return 0;
35032 emit_insn (pat);
35033 return 0;
35034
35035 case IX86_BUILTIN_LDMXCSR:
35036 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35037 target = assign_386_stack_local (SImode, SLOT_TEMP);
35038 emit_move_insn (target, op0);
35039 emit_insn (gen_sse_ldmxcsr (target));
35040 return 0;
35041
35042 case IX86_BUILTIN_STMXCSR:
35043 target = assign_386_stack_local (SImode, SLOT_TEMP);
35044 emit_insn (gen_sse_stmxcsr (target));
35045 return copy_to_mode_reg (SImode, target);
35046
35047 case IX86_BUILTIN_CLFLUSH:
35048 arg0 = CALL_EXPR_ARG (exp, 0);
35049 op0 = expand_normal (arg0);
35050 icode = CODE_FOR_sse2_clflush;
35051 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35052 op0 = ix86_zero_extend_to_Pmode (op0);
35053
35054 emit_insn (gen_sse2_clflush (op0));
35055 return 0;
35056
35057 case IX86_BUILTIN_MONITOR:
35058 arg0 = CALL_EXPR_ARG (exp, 0);
35059 arg1 = CALL_EXPR_ARG (exp, 1);
35060 arg2 = CALL_EXPR_ARG (exp, 2);
35061 op0 = expand_normal (arg0);
35062 op1 = expand_normal (arg1);
35063 op2 = expand_normal (arg2);
35064 if (!REG_P (op0))
35065 op0 = ix86_zero_extend_to_Pmode (op0);
35066 if (!REG_P (op1))
35067 op1 = copy_to_mode_reg (SImode, op1);
35068 if (!REG_P (op2))
35069 op2 = copy_to_mode_reg (SImode, op2);
35070 emit_insn (ix86_gen_monitor (op0, op1, op2));
35071 return 0;
35072
35073 case IX86_BUILTIN_MWAIT:
35074 arg0 = CALL_EXPR_ARG (exp, 0);
35075 arg1 = CALL_EXPR_ARG (exp, 1);
35076 op0 = expand_normal (arg0);
35077 op1 = expand_normal (arg1);
35078 if (!REG_P (op0))
35079 op0 = copy_to_mode_reg (SImode, op0);
35080 if (!REG_P (op1))
35081 op1 = copy_to_mode_reg (SImode, op1);
35082 emit_insn (gen_sse3_mwait (op0, op1));
35083 return 0;
35084
35085 case IX86_BUILTIN_VEC_INIT_V2SI:
35086 case IX86_BUILTIN_VEC_INIT_V4HI:
35087 case IX86_BUILTIN_VEC_INIT_V8QI:
35088 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35089
35090 case IX86_BUILTIN_VEC_EXT_V2DF:
35091 case IX86_BUILTIN_VEC_EXT_V2DI:
35092 case IX86_BUILTIN_VEC_EXT_V4SF:
35093 case IX86_BUILTIN_VEC_EXT_V4SI:
35094 case IX86_BUILTIN_VEC_EXT_V8HI:
35095 case IX86_BUILTIN_VEC_EXT_V2SI:
35096 case IX86_BUILTIN_VEC_EXT_V4HI:
35097 case IX86_BUILTIN_VEC_EXT_V16QI:
35098 return ix86_expand_vec_ext_builtin (exp, target);
35099
35100 case IX86_BUILTIN_VEC_SET_V2DI:
35101 case IX86_BUILTIN_VEC_SET_V4SF:
35102 case IX86_BUILTIN_VEC_SET_V4SI:
35103 case IX86_BUILTIN_VEC_SET_V8HI:
35104 case IX86_BUILTIN_VEC_SET_V4HI:
35105 case IX86_BUILTIN_VEC_SET_V16QI:
35106 return ix86_expand_vec_set_builtin (exp);
35107
35108 case IX86_BUILTIN_INFQ:
35109 case IX86_BUILTIN_HUGE_VALQ:
35110 {
35111 REAL_VALUE_TYPE inf;
35112 rtx tmp;
35113
35114 real_inf (&inf);
35115 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35116
35117 tmp = validize_mem (force_const_mem (mode, tmp));
35118
35119 if (target == 0)
35120 target = gen_reg_rtx (mode);
35121
35122 emit_move_insn (target, tmp);
35123 return target;
35124 }
35125
35126 case IX86_BUILTIN_RDPMC:
35127 case IX86_BUILTIN_RDTSC:
35128 case IX86_BUILTIN_RDTSCP:
35129
35130 op0 = gen_reg_rtx (DImode);
35131 op1 = gen_reg_rtx (DImode);
35132
35133 if (fcode == IX86_BUILTIN_RDPMC)
35134 {
35135 arg0 = CALL_EXPR_ARG (exp, 0);
35136 op2 = expand_normal (arg0);
35137 if (!register_operand (op2, SImode))
35138 op2 = copy_to_mode_reg (SImode, op2);
35139
35140 insn = (TARGET_64BIT
35141 ? gen_rdpmc_rex64 (op0, op1, op2)
35142 : gen_rdpmc (op0, op2));
35143 emit_insn (insn);
35144 }
35145 else if (fcode == IX86_BUILTIN_RDTSC)
35146 {
35147 insn = (TARGET_64BIT
35148 ? gen_rdtsc_rex64 (op0, op1)
35149 : gen_rdtsc (op0));
35150 emit_insn (insn);
35151 }
35152 else
35153 {
35154 op2 = gen_reg_rtx (SImode);
35155
35156 insn = (TARGET_64BIT
35157 ? gen_rdtscp_rex64 (op0, op1, op2)
35158 : gen_rdtscp (op0, op2));
35159 emit_insn (insn);
35160
35161 arg0 = CALL_EXPR_ARG (exp, 0);
35162 op4 = expand_normal (arg0);
35163 if (!address_operand (op4, VOIDmode))
35164 {
35165 op4 = convert_memory_address (Pmode, op4);
35166 op4 = copy_addr_to_reg (op4);
35167 }
35168 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35169 }
35170
35171 if (target == 0)
35172 {
35173 /* mode is VOIDmode if __builtin_rd* has been called
35174 without lhs. */
35175 if (mode == VOIDmode)
35176 return target;
35177 target = gen_reg_rtx (mode);
35178 }
35179
35180 if (TARGET_64BIT)
35181 {
35182 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35183 op1, 1, OPTAB_DIRECT);
35184 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35185 op0, 1, OPTAB_DIRECT);
35186 }
35187
35188 emit_move_insn (target, op0);
35189 return target;
35190
35191 case IX86_BUILTIN_FXSAVE:
35192 case IX86_BUILTIN_FXRSTOR:
35193 case IX86_BUILTIN_FXSAVE64:
35194 case IX86_BUILTIN_FXRSTOR64:
35195 case IX86_BUILTIN_FNSTENV:
35196 case IX86_BUILTIN_FLDENV:
35197 case IX86_BUILTIN_FNSTSW:
35198 mode0 = BLKmode;
35199 switch (fcode)
35200 {
35201 case IX86_BUILTIN_FXSAVE:
35202 icode = CODE_FOR_fxsave;
35203 break;
35204 case IX86_BUILTIN_FXRSTOR:
35205 icode = CODE_FOR_fxrstor;
35206 break;
35207 case IX86_BUILTIN_FXSAVE64:
35208 icode = CODE_FOR_fxsave64;
35209 break;
35210 case IX86_BUILTIN_FXRSTOR64:
35211 icode = CODE_FOR_fxrstor64;
35212 break;
35213 case IX86_BUILTIN_FNSTENV:
35214 icode = CODE_FOR_fnstenv;
35215 break;
35216 case IX86_BUILTIN_FLDENV:
35217 icode = CODE_FOR_fldenv;
35218 break;
35219 case IX86_BUILTIN_FNSTSW:
35220 icode = CODE_FOR_fnstsw;
35221 mode0 = HImode;
35222 break;
35223 default:
35224 gcc_unreachable ();
35225 }
35226
35227 arg0 = CALL_EXPR_ARG (exp, 0);
35228 op0 = expand_normal (arg0);
35229
35230 if (!address_operand (op0, VOIDmode))
35231 {
35232 op0 = convert_memory_address (Pmode, op0);
35233 op0 = copy_addr_to_reg (op0);
35234 }
35235 op0 = gen_rtx_MEM (mode0, op0);
35236
35237 pat = GEN_FCN (icode) (op0);
35238 if (pat)
35239 emit_insn (pat);
35240 return 0;
35241
35242 case IX86_BUILTIN_XSAVE:
35243 case IX86_BUILTIN_XRSTOR:
35244 case IX86_BUILTIN_XSAVE64:
35245 case IX86_BUILTIN_XRSTOR64:
35246 case IX86_BUILTIN_XSAVEOPT:
35247 case IX86_BUILTIN_XSAVEOPT64:
35248 arg0 = CALL_EXPR_ARG (exp, 0);
35249 arg1 = CALL_EXPR_ARG (exp, 1);
35250 op0 = expand_normal (arg0);
35251 op1 = expand_normal (arg1);
35252
35253 if (!address_operand (op0, VOIDmode))
35254 {
35255 op0 = convert_memory_address (Pmode, op0);
35256 op0 = copy_addr_to_reg (op0);
35257 }
35258 op0 = gen_rtx_MEM (BLKmode, op0);
35259
35260 op1 = force_reg (DImode, op1);
35261
35262 if (TARGET_64BIT)
35263 {
35264 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35265 NULL, 1, OPTAB_DIRECT);
35266 switch (fcode)
35267 {
35268 case IX86_BUILTIN_XSAVE:
35269 icode = CODE_FOR_xsave_rex64;
35270 break;
35271 case IX86_BUILTIN_XRSTOR:
35272 icode = CODE_FOR_xrstor_rex64;
35273 break;
35274 case IX86_BUILTIN_XSAVE64:
35275 icode = CODE_FOR_xsave64;
35276 break;
35277 case IX86_BUILTIN_XRSTOR64:
35278 icode = CODE_FOR_xrstor64;
35279 break;
35280 case IX86_BUILTIN_XSAVEOPT:
35281 icode = CODE_FOR_xsaveopt_rex64;
35282 break;
35283 case IX86_BUILTIN_XSAVEOPT64:
35284 icode = CODE_FOR_xsaveopt64;
35285 break;
35286 default:
35287 gcc_unreachable ();
35288 }
35289
35290 op2 = gen_lowpart (SImode, op2);
35291 op1 = gen_lowpart (SImode, op1);
35292 pat = GEN_FCN (icode) (op0, op1, op2);
35293 }
35294 else
35295 {
35296 switch (fcode)
35297 {
35298 case IX86_BUILTIN_XSAVE:
35299 icode = CODE_FOR_xsave;
35300 break;
35301 case IX86_BUILTIN_XRSTOR:
35302 icode = CODE_FOR_xrstor;
35303 break;
35304 case IX86_BUILTIN_XSAVEOPT:
35305 icode = CODE_FOR_xsaveopt;
35306 break;
35307 default:
35308 gcc_unreachable ();
35309 }
35310 pat = GEN_FCN (icode) (op0, op1);
35311 }
35312
35313 if (pat)
35314 emit_insn (pat);
35315 return 0;
35316
35317 case IX86_BUILTIN_LLWPCB:
35318 arg0 = CALL_EXPR_ARG (exp, 0);
35319 op0 = expand_normal (arg0);
35320 icode = CODE_FOR_lwp_llwpcb;
35321 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35322 op0 = ix86_zero_extend_to_Pmode (op0);
35323 emit_insn (gen_lwp_llwpcb (op0));
35324 return 0;
35325
35326 case IX86_BUILTIN_SLWPCB:
35327 icode = CODE_FOR_lwp_slwpcb;
35328 if (!target
35329 || !insn_data[icode].operand[0].predicate (target, Pmode))
35330 target = gen_reg_rtx (Pmode);
35331 emit_insn (gen_lwp_slwpcb (target));
35332 return target;
35333
35334 case IX86_BUILTIN_BEXTRI32:
35335 case IX86_BUILTIN_BEXTRI64:
35336 arg0 = CALL_EXPR_ARG (exp, 0);
35337 arg1 = CALL_EXPR_ARG (exp, 1);
35338 op0 = expand_normal (arg0);
35339 op1 = expand_normal (arg1);
35340 icode = (fcode == IX86_BUILTIN_BEXTRI32
35341 ? CODE_FOR_tbm_bextri_si
35342 : CODE_FOR_tbm_bextri_di);
35343 if (!CONST_INT_P (op1))
35344 {
35345 error ("last argument must be an immediate");
35346 return const0_rtx;
35347 }
35348 else
35349 {
35350 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35351 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35352 op1 = GEN_INT (length);
35353 op2 = GEN_INT (lsb_index);
35354 pat = GEN_FCN (icode) (target, op0, op1, op2);
35355 if (pat)
35356 emit_insn (pat);
35357 return target;
35358 }
35359
35360 case IX86_BUILTIN_RDRAND16_STEP:
35361 icode = CODE_FOR_rdrandhi_1;
35362 mode0 = HImode;
35363 goto rdrand_step;
35364
35365 case IX86_BUILTIN_RDRAND32_STEP:
35366 icode = CODE_FOR_rdrandsi_1;
35367 mode0 = SImode;
35368 goto rdrand_step;
35369
35370 case IX86_BUILTIN_RDRAND64_STEP:
35371 icode = CODE_FOR_rdranddi_1;
35372 mode0 = DImode;
35373
35374 rdrand_step:
35375 op0 = gen_reg_rtx (mode0);
35376 emit_insn (GEN_FCN (icode) (op0));
35377
35378 arg0 = CALL_EXPR_ARG (exp, 0);
35379 op1 = expand_normal (arg0);
35380 if (!address_operand (op1, VOIDmode))
35381 {
35382 op1 = convert_memory_address (Pmode, op1);
35383 op1 = copy_addr_to_reg (op1);
35384 }
35385 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35386
35387 op1 = gen_reg_rtx (SImode);
35388 emit_move_insn (op1, CONST1_RTX (SImode));
35389
35390 /* Emit SImode conditional move. */
35391 if (mode0 == HImode)
35392 {
35393 op2 = gen_reg_rtx (SImode);
35394 emit_insn (gen_zero_extendhisi2 (op2, op0));
35395 }
35396 else if (mode0 == SImode)
35397 op2 = op0;
35398 else
35399 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35400
35401 if (target == 0)
35402 target = gen_reg_rtx (SImode);
35403
35404 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35405 const0_rtx);
35406 emit_insn (gen_rtx_SET (VOIDmode, target,
35407 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35408 return target;
35409
35410 case IX86_BUILTIN_RDSEED16_STEP:
35411 icode = CODE_FOR_rdseedhi_1;
35412 mode0 = HImode;
35413 goto rdseed_step;
35414
35415 case IX86_BUILTIN_RDSEED32_STEP:
35416 icode = CODE_FOR_rdseedsi_1;
35417 mode0 = SImode;
35418 goto rdseed_step;
35419
35420 case IX86_BUILTIN_RDSEED64_STEP:
35421 icode = CODE_FOR_rdseeddi_1;
35422 mode0 = DImode;
35423
35424 rdseed_step:
35425 op0 = gen_reg_rtx (mode0);
35426 emit_insn (GEN_FCN (icode) (op0));
35427
35428 arg0 = CALL_EXPR_ARG (exp, 0);
35429 op1 = expand_normal (arg0);
35430 if (!address_operand (op1, VOIDmode))
35431 {
35432 op1 = convert_memory_address (Pmode, op1);
35433 op1 = copy_addr_to_reg (op1);
35434 }
35435 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35436
35437 op2 = gen_reg_rtx (QImode);
35438
35439 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35440 const0_rtx);
35441 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35442
35443 if (target == 0)
35444 target = gen_reg_rtx (SImode);
35445
35446 emit_insn (gen_zero_extendqisi2 (target, op2));
35447 return target;
35448
35449 case IX86_BUILTIN_ADDCARRYX32:
35450 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35451 mode0 = SImode;
35452 goto addcarryx;
35453
35454 case IX86_BUILTIN_ADDCARRYX64:
35455 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35456 mode0 = DImode;
35457
35458 addcarryx:
35459 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35460 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35461 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35462 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35463
35464 op0 = gen_reg_rtx (QImode);
35465
35466 /* Generate CF from input operand. */
35467 op1 = expand_normal (arg0);
35468 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35469 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35470
35471 /* Gen ADCX instruction to compute X+Y+CF. */
35472 op2 = expand_normal (arg1);
35473 op3 = expand_normal (arg2);
35474
35475 if (!REG_P (op2))
35476 op2 = copy_to_mode_reg (mode0, op2);
35477 if (!REG_P (op3))
35478 op3 = copy_to_mode_reg (mode0, op3);
35479
35480 op0 = gen_reg_rtx (mode0);
35481
35482 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35483 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35484 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35485
35486 /* Store the result. */
35487 op4 = expand_normal (arg3);
35488 if (!address_operand (op4, VOIDmode))
35489 {
35490 op4 = convert_memory_address (Pmode, op4);
35491 op4 = copy_addr_to_reg (op4);
35492 }
35493 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35494
35495 /* Return current CF value. */
35496 if (target == 0)
35497 target = gen_reg_rtx (QImode);
35498
35499 PUT_MODE (pat, QImode);
35500 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35501 return target;
35502
35503 case IX86_BUILTIN_READ_FLAGS:
35504 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35505
35506 if (optimize
35507 || target == NULL_RTX
35508 || !nonimmediate_operand (target, word_mode)
35509 || GET_MODE (target) != word_mode)
35510 target = gen_reg_rtx (word_mode);
35511
35512 emit_insn (gen_pop (target));
35513 return target;
35514
35515 case IX86_BUILTIN_WRITE_FLAGS:
35516
35517 arg0 = CALL_EXPR_ARG (exp, 0);
35518 op0 = expand_normal (arg0);
35519 if (!general_no_elim_operand (op0, word_mode))
35520 op0 = copy_to_mode_reg (word_mode, op0);
35521
35522 emit_insn (gen_push (op0));
35523 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35524 return 0;
35525
35526 case IX86_BUILTIN_KORTESTC16:
35527 icode = CODE_FOR_kortestchi;
35528 mode0 = HImode;
35529 mode1 = CCCmode;
35530 goto kortest;
35531
35532 case IX86_BUILTIN_KORTESTZ16:
35533 icode = CODE_FOR_kortestzhi;
35534 mode0 = HImode;
35535 mode1 = CCZmode;
35536
35537 kortest:
35538 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35539 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35540 op0 = expand_normal (arg0);
35541 op1 = expand_normal (arg1);
35542
35543 op0 = copy_to_reg (op0);
35544 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35545 op1 = copy_to_reg (op1);
35546 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35547
35548 target = gen_reg_rtx (QImode);
35549 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35550
35551 /* Emit kortest. */
35552 emit_insn (GEN_FCN (icode) (op0, op1));
35553 /* And use setcc to return result from flags. */
35554 ix86_expand_setcc (target, EQ,
35555 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35556 return target;
35557
35558 case IX86_BUILTIN_GATHERSIV2DF:
35559 icode = CODE_FOR_avx2_gathersiv2df;
35560 goto gather_gen;
35561 case IX86_BUILTIN_GATHERSIV4DF:
35562 icode = CODE_FOR_avx2_gathersiv4df;
35563 goto gather_gen;
35564 case IX86_BUILTIN_GATHERDIV2DF:
35565 icode = CODE_FOR_avx2_gatherdiv2df;
35566 goto gather_gen;
35567 case IX86_BUILTIN_GATHERDIV4DF:
35568 icode = CODE_FOR_avx2_gatherdiv4df;
35569 goto gather_gen;
35570 case IX86_BUILTIN_GATHERSIV4SF:
35571 icode = CODE_FOR_avx2_gathersiv4sf;
35572 goto gather_gen;
35573 case IX86_BUILTIN_GATHERSIV8SF:
35574 icode = CODE_FOR_avx2_gathersiv8sf;
35575 goto gather_gen;
35576 case IX86_BUILTIN_GATHERDIV4SF:
35577 icode = CODE_FOR_avx2_gatherdiv4sf;
35578 goto gather_gen;
35579 case IX86_BUILTIN_GATHERDIV8SF:
35580 icode = CODE_FOR_avx2_gatherdiv8sf;
35581 goto gather_gen;
35582 case IX86_BUILTIN_GATHERSIV2DI:
35583 icode = CODE_FOR_avx2_gathersiv2di;
35584 goto gather_gen;
35585 case IX86_BUILTIN_GATHERSIV4DI:
35586 icode = CODE_FOR_avx2_gathersiv4di;
35587 goto gather_gen;
35588 case IX86_BUILTIN_GATHERDIV2DI:
35589 icode = CODE_FOR_avx2_gatherdiv2di;
35590 goto gather_gen;
35591 case IX86_BUILTIN_GATHERDIV4DI:
35592 icode = CODE_FOR_avx2_gatherdiv4di;
35593 goto gather_gen;
35594 case IX86_BUILTIN_GATHERSIV4SI:
35595 icode = CODE_FOR_avx2_gathersiv4si;
35596 goto gather_gen;
35597 case IX86_BUILTIN_GATHERSIV8SI:
35598 icode = CODE_FOR_avx2_gathersiv8si;
35599 goto gather_gen;
35600 case IX86_BUILTIN_GATHERDIV4SI:
35601 icode = CODE_FOR_avx2_gatherdiv4si;
35602 goto gather_gen;
35603 case IX86_BUILTIN_GATHERDIV8SI:
35604 icode = CODE_FOR_avx2_gatherdiv8si;
35605 goto gather_gen;
35606 case IX86_BUILTIN_GATHERALTSIV4DF:
35607 icode = CODE_FOR_avx2_gathersiv4df;
35608 goto gather_gen;
35609 case IX86_BUILTIN_GATHERALTDIV8SF:
35610 icode = CODE_FOR_avx2_gatherdiv8sf;
35611 goto gather_gen;
35612 case IX86_BUILTIN_GATHERALTSIV4DI:
35613 icode = CODE_FOR_avx2_gathersiv4di;
35614 goto gather_gen;
35615 case IX86_BUILTIN_GATHERALTDIV8SI:
35616 icode = CODE_FOR_avx2_gatherdiv8si;
35617 goto gather_gen;
35618 case IX86_BUILTIN_GATHER3SIV16SF:
35619 icode = CODE_FOR_avx512f_gathersiv16sf;
35620 goto gather_gen;
35621 case IX86_BUILTIN_GATHER3SIV8DF:
35622 icode = CODE_FOR_avx512f_gathersiv8df;
35623 goto gather_gen;
35624 case IX86_BUILTIN_GATHER3DIV16SF:
35625 icode = CODE_FOR_avx512f_gatherdiv16sf;
35626 goto gather_gen;
35627 case IX86_BUILTIN_GATHER3DIV8DF:
35628 icode = CODE_FOR_avx512f_gatherdiv8df;
35629 goto gather_gen;
35630 case IX86_BUILTIN_GATHER3SIV16SI:
35631 icode = CODE_FOR_avx512f_gathersiv16si;
35632 goto gather_gen;
35633 case IX86_BUILTIN_GATHER3SIV8DI:
35634 icode = CODE_FOR_avx512f_gathersiv8di;
35635 goto gather_gen;
35636 case IX86_BUILTIN_GATHER3DIV16SI:
35637 icode = CODE_FOR_avx512f_gatherdiv16si;
35638 goto gather_gen;
35639 case IX86_BUILTIN_GATHER3DIV8DI:
35640 icode = CODE_FOR_avx512f_gatherdiv8di;
35641 goto gather_gen;
35642 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35643 icode = CODE_FOR_avx512f_gathersiv8df;
35644 goto gather_gen;
35645 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35646 icode = CODE_FOR_avx512f_gatherdiv16sf;
35647 goto gather_gen;
35648 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35649 icode = CODE_FOR_avx512f_gathersiv8di;
35650 goto gather_gen;
35651 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35652 icode = CODE_FOR_avx512f_gatherdiv16si;
35653 goto gather_gen;
35654 case IX86_BUILTIN_SCATTERSIV16SF:
35655 icode = CODE_FOR_avx512f_scattersiv16sf;
35656 goto scatter_gen;
35657 case IX86_BUILTIN_SCATTERSIV8DF:
35658 icode = CODE_FOR_avx512f_scattersiv8df;
35659 goto scatter_gen;
35660 case IX86_BUILTIN_SCATTERDIV16SF:
35661 icode = CODE_FOR_avx512f_scatterdiv16sf;
35662 goto scatter_gen;
35663 case IX86_BUILTIN_SCATTERDIV8DF:
35664 icode = CODE_FOR_avx512f_scatterdiv8df;
35665 goto scatter_gen;
35666 case IX86_BUILTIN_SCATTERSIV16SI:
35667 icode = CODE_FOR_avx512f_scattersiv16si;
35668 goto scatter_gen;
35669 case IX86_BUILTIN_SCATTERSIV8DI:
35670 icode = CODE_FOR_avx512f_scattersiv8di;
35671 goto scatter_gen;
35672 case IX86_BUILTIN_SCATTERDIV16SI:
35673 icode = CODE_FOR_avx512f_scatterdiv16si;
35674 goto scatter_gen;
35675 case IX86_BUILTIN_SCATTERDIV8DI:
35676 icode = CODE_FOR_avx512f_scatterdiv8di;
35677 goto scatter_gen;
35678
35679 case IX86_BUILTIN_GATHERPFDPD:
35680 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35681 goto vec_prefetch_gen;
35682 case IX86_BUILTIN_GATHERPFDPS:
35683 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35684 goto vec_prefetch_gen;
35685 case IX86_BUILTIN_GATHERPFQPD:
35686 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35687 goto vec_prefetch_gen;
35688 case IX86_BUILTIN_GATHERPFQPS:
35689 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35690 goto vec_prefetch_gen;
35691 case IX86_BUILTIN_SCATTERPFDPD:
35692 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35693 goto vec_prefetch_gen;
35694 case IX86_BUILTIN_SCATTERPFDPS:
35695 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35696 goto vec_prefetch_gen;
35697 case IX86_BUILTIN_SCATTERPFQPD:
35698 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35699 goto vec_prefetch_gen;
35700 case IX86_BUILTIN_SCATTERPFQPS:
35701 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35702 goto vec_prefetch_gen;
35703
35704 gather_gen:
35705 rtx half;
35706 rtx (*gen) (rtx, rtx);
35707
35708 arg0 = CALL_EXPR_ARG (exp, 0);
35709 arg1 = CALL_EXPR_ARG (exp, 1);
35710 arg2 = CALL_EXPR_ARG (exp, 2);
35711 arg3 = CALL_EXPR_ARG (exp, 3);
35712 arg4 = CALL_EXPR_ARG (exp, 4);
35713 op0 = expand_normal (arg0);
35714 op1 = expand_normal (arg1);
35715 op2 = expand_normal (arg2);
35716 op3 = expand_normal (arg3);
35717 op4 = expand_normal (arg4);
35718 /* Note the arg order is different from the operand order. */
35719 mode0 = insn_data[icode].operand[1].mode;
35720 mode2 = insn_data[icode].operand[3].mode;
35721 mode3 = insn_data[icode].operand[4].mode;
35722 mode4 = insn_data[icode].operand[5].mode;
35723
35724 if (target == NULL_RTX
35725 || GET_MODE (target) != insn_data[icode].operand[0].mode
35726 || !insn_data[icode].operand[0].predicate (target,
35727 GET_MODE (target)))
35728 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35729 else
35730 subtarget = target;
35731
35732 switch (fcode)
35733 {
35734 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35735 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35736 half = gen_reg_rtx (V8SImode);
35737 if (!nonimmediate_operand (op2, V16SImode))
35738 op2 = copy_to_mode_reg (V16SImode, op2);
35739 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35740 op2 = half;
35741 break;
35742 case IX86_BUILTIN_GATHERALTSIV4DF:
35743 case IX86_BUILTIN_GATHERALTSIV4DI:
35744 half = gen_reg_rtx (V4SImode);
35745 if (!nonimmediate_operand (op2, V8SImode))
35746 op2 = copy_to_mode_reg (V8SImode, op2);
35747 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35748 op2 = half;
35749 break;
35750 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35751 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35752 half = gen_reg_rtx (mode0);
35753 if (mode0 == V8SFmode)
35754 gen = gen_vec_extract_lo_v16sf;
35755 else
35756 gen = gen_vec_extract_lo_v16si;
35757 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35758 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35759 emit_insn (gen (half, op0));
35760 op0 = half;
35761 if (GET_MODE (op3) != VOIDmode)
35762 {
35763 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35764 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35765 emit_insn (gen (half, op3));
35766 op3 = half;
35767 }
35768 break;
35769 case IX86_BUILTIN_GATHERALTDIV8SF:
35770 case IX86_BUILTIN_GATHERALTDIV8SI:
35771 half = gen_reg_rtx (mode0);
35772 if (mode0 == V4SFmode)
35773 gen = gen_vec_extract_lo_v8sf;
35774 else
35775 gen = gen_vec_extract_lo_v8si;
35776 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35777 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35778 emit_insn (gen (half, op0));
35779 op0 = half;
35780 if (GET_MODE (op3) != VOIDmode)
35781 {
35782 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35783 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35784 emit_insn (gen (half, op3));
35785 op3 = half;
35786 }
35787 break;
35788 default:
35789 break;
35790 }
35791
35792 /* Force memory operand only with base register here. But we
35793 don't want to do it on memory operand for other builtin
35794 functions. */
35795 op1 = ix86_zero_extend_to_Pmode (op1);
35796
35797 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35798 op0 = copy_to_mode_reg (mode0, op0);
35799 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35800 op1 = copy_to_mode_reg (Pmode, op1);
35801 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35802 op2 = copy_to_mode_reg (mode2, op2);
35803 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35804 {
35805 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35806 op3 = copy_to_mode_reg (mode3, op3);
35807 }
35808 else
35809 {
35810 op3 = copy_to_reg (op3);
35811 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35812 }
35813 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35814 {
35815 error ("the last argument must be scale 1, 2, 4, 8");
35816 return const0_rtx;
35817 }
35818
35819 /* Optimize. If mask is known to have all high bits set,
35820 replace op0 with pc_rtx to signal that the instruction
35821 overwrites the whole destination and doesn't use its
35822 previous contents. */
35823 if (optimize)
35824 {
35825 if (TREE_CODE (arg3) == INTEGER_CST)
35826 {
35827 if (integer_all_onesp (arg3))
35828 op0 = pc_rtx;
35829 }
35830 else if (TREE_CODE (arg3) == VECTOR_CST)
35831 {
35832 unsigned int negative = 0;
35833 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35834 {
35835 tree cst = VECTOR_CST_ELT (arg3, i);
35836 if (TREE_CODE (cst) == INTEGER_CST
35837 && tree_int_cst_sign_bit (cst))
35838 negative++;
35839 else if (TREE_CODE (cst) == REAL_CST
35840 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35841 negative++;
35842 }
35843 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35844 op0 = pc_rtx;
35845 }
35846 else if (TREE_CODE (arg3) == SSA_NAME
35847 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35848 {
35849 /* Recognize also when mask is like:
35850 __v2df src = _mm_setzero_pd ();
35851 __v2df mask = _mm_cmpeq_pd (src, src);
35852 or
35853 __v8sf src = _mm256_setzero_ps ();
35854 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35855 as that is a cheaper way to load all ones into
35856 a register than having to load a constant from
35857 memory. */
35858 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35859 if (is_gimple_call (def_stmt))
35860 {
35861 tree fndecl = gimple_call_fndecl (def_stmt);
35862 if (fndecl
35863 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35864 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35865 {
35866 case IX86_BUILTIN_CMPPD:
35867 case IX86_BUILTIN_CMPPS:
35868 case IX86_BUILTIN_CMPPD256:
35869 case IX86_BUILTIN_CMPPS256:
35870 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35871 break;
35872 /* FALLTHRU */
35873 case IX86_BUILTIN_CMPEQPD:
35874 case IX86_BUILTIN_CMPEQPS:
35875 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
35876 && initializer_zerop (gimple_call_arg (def_stmt,
35877 1)))
35878 op0 = pc_rtx;
35879 break;
35880 default:
35881 break;
35882 }
35883 }
35884 }
35885 }
35886
35887 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
35888 if (! pat)
35889 return const0_rtx;
35890 emit_insn (pat);
35891
35892 switch (fcode)
35893 {
35894 case IX86_BUILTIN_GATHER3DIV16SF:
35895 if (target == NULL_RTX)
35896 target = gen_reg_rtx (V8SFmode);
35897 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
35898 break;
35899 case IX86_BUILTIN_GATHER3DIV16SI:
35900 if (target == NULL_RTX)
35901 target = gen_reg_rtx (V8SImode);
35902 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
35903 break;
35904 case IX86_BUILTIN_GATHERDIV8SF:
35905 if (target == NULL_RTX)
35906 target = gen_reg_rtx (V4SFmode);
35907 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
35908 break;
35909 case IX86_BUILTIN_GATHERDIV8SI:
35910 if (target == NULL_RTX)
35911 target = gen_reg_rtx (V4SImode);
35912 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
35913 break;
35914 default:
35915 target = subtarget;
35916 break;
35917 }
35918 return target;
35919
35920 scatter_gen:
35921 arg0 = CALL_EXPR_ARG (exp, 0);
35922 arg1 = CALL_EXPR_ARG (exp, 1);
35923 arg2 = CALL_EXPR_ARG (exp, 2);
35924 arg3 = CALL_EXPR_ARG (exp, 3);
35925 arg4 = CALL_EXPR_ARG (exp, 4);
35926 op0 = expand_normal (arg0);
35927 op1 = expand_normal (arg1);
35928 op2 = expand_normal (arg2);
35929 op3 = expand_normal (arg3);
35930 op4 = expand_normal (arg4);
35931 mode1 = insn_data[icode].operand[1].mode;
35932 mode2 = insn_data[icode].operand[2].mode;
35933 mode3 = insn_data[icode].operand[3].mode;
35934 mode4 = insn_data[icode].operand[4].mode;
35935
35936 /* Force memory operand only with base register here. But we
35937 don't want to do it on memory operand for other builtin
35938 functions. */
35939 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
35940
35941 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35942 op0 = copy_to_mode_reg (Pmode, op0);
35943
35944 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
35945 {
35946 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35947 op1 = copy_to_mode_reg (mode1, op1);
35948 }
35949 else
35950 {
35951 op1 = copy_to_reg (op1);
35952 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
35953 }
35954
35955 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35956 op2 = copy_to_mode_reg (mode2, op2);
35957
35958 if (!insn_data[icode].operand[3].predicate (op3, mode3))
35959 op3 = copy_to_mode_reg (mode3, op3);
35960
35961 if (!insn_data[icode].operand[4].predicate (op4, mode4))
35962 {
35963 error ("the last argument must be scale 1, 2, 4, 8");
35964 return const0_rtx;
35965 }
35966
35967 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
35968 if (! pat)
35969 return const0_rtx;
35970
35971 emit_insn (pat);
35972 return 0;
35973
35974 vec_prefetch_gen:
35975 arg0 = CALL_EXPR_ARG (exp, 0);
35976 arg1 = CALL_EXPR_ARG (exp, 1);
35977 arg2 = CALL_EXPR_ARG (exp, 2);
35978 arg3 = CALL_EXPR_ARG (exp, 3);
35979 arg4 = CALL_EXPR_ARG (exp, 4);
35980 op0 = expand_normal (arg0);
35981 op1 = expand_normal (arg1);
35982 op2 = expand_normal (arg2);
35983 op3 = expand_normal (arg3);
35984 op4 = expand_normal (arg4);
35985 mode0 = insn_data[icode].operand[0].mode;
35986 mode1 = insn_data[icode].operand[1].mode;
35987 mode3 = insn_data[icode].operand[3].mode;
35988 mode4 = insn_data[icode].operand[4].mode;
35989
35990 if (GET_MODE (op0) == mode0
35991 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
35992 {
35993 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35994 op0 = copy_to_mode_reg (mode0, op0);
35995 }
35996 else if (op0 != constm1_rtx)
35997 {
35998 op0 = copy_to_reg (op0);
35999 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36000 }
36001
36002 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36003 op1 = copy_to_mode_reg (mode1, op1);
36004
36005 /* Force memory operand only with base register here. But we
36006 don't want to do it on memory operand for other builtin
36007 functions. */
36008 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36009
36010 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36011 op2 = copy_to_mode_reg (Pmode, op2);
36012
36013 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36014 {
36015 error ("the forth argument must be scale 1, 2, 4, 8");
36016 return const0_rtx;
36017 }
36018
36019 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36020 {
36021 error ("incorrect hint operand");
36022 return const0_rtx;
36023 }
36024
36025 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36026 if (! pat)
36027 return const0_rtx;
36028
36029 emit_insn (pat);
36030
36031 return 0;
36032
36033 case IX86_BUILTIN_XABORT:
36034 icode = CODE_FOR_xabort;
36035 arg0 = CALL_EXPR_ARG (exp, 0);
36036 op0 = expand_normal (arg0);
36037 mode0 = insn_data[icode].operand[0].mode;
36038 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36039 {
36040 error ("the xabort's argument must be an 8-bit immediate");
36041 return const0_rtx;
36042 }
36043 emit_insn (gen_xabort (op0));
36044 return 0;
36045
36046 default:
36047 break;
36048 }
36049
36050 for (i = 0, d = bdesc_special_args;
36051 i < ARRAY_SIZE (bdesc_special_args);
36052 i++, d++)
36053 if (d->code == fcode)
36054 return ix86_expand_special_args_builtin (d, exp, target);
36055
36056 for (i = 0, d = bdesc_args;
36057 i < ARRAY_SIZE (bdesc_args);
36058 i++, d++)
36059 if (d->code == fcode)
36060 switch (fcode)
36061 {
36062 case IX86_BUILTIN_FABSQ:
36063 case IX86_BUILTIN_COPYSIGNQ:
36064 if (!TARGET_SSE)
36065 /* Emit a normal call if SSE isn't available. */
36066 return expand_call (exp, target, ignore);
36067 default:
36068 return ix86_expand_args_builtin (d, exp, target);
36069 }
36070
36071 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36072 if (d->code == fcode)
36073 return ix86_expand_sse_comi (d, exp, target);
36074
36075 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36076 if (d->code == fcode)
36077 return ix86_expand_round_builtin (d, exp, target);
36078
36079 for (i = 0, d = bdesc_pcmpestr;
36080 i < ARRAY_SIZE (bdesc_pcmpestr);
36081 i++, d++)
36082 if (d->code == fcode)
36083 return ix86_expand_sse_pcmpestr (d, exp, target);
36084
36085 for (i = 0, d = bdesc_pcmpistr;
36086 i < ARRAY_SIZE (bdesc_pcmpistr);
36087 i++, d++)
36088 if (d->code == fcode)
36089 return ix86_expand_sse_pcmpistr (d, exp, target);
36090
36091 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36092 if (d->code == fcode)
36093 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36094 (enum ix86_builtin_func_type)
36095 d->flag, d->comparison);
36096
36097 gcc_unreachable ();
36098 }
36099
36100 /* This returns the target-specific builtin with code CODE if
36101 current_function_decl has visibility on this builtin, which is checked
36102 using isa flags. Returns NULL_TREE otherwise. */
36103
36104 static tree ix86_get_builtin (enum ix86_builtins code)
36105 {
36106 struct cl_target_option *opts;
36107 tree target_tree = NULL_TREE;
36108
36109 /* Determine the isa flags of current_function_decl. */
36110
36111 if (current_function_decl)
36112 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36113
36114 if (target_tree == NULL)
36115 target_tree = target_option_default_node;
36116
36117 opts = TREE_TARGET_OPTION (target_tree);
36118
36119 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36120 return ix86_builtin_decl (code, true);
36121 else
36122 return NULL_TREE;
36123 }
36124
36125 /* Returns a function decl for a vectorized version of the builtin function
36126 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36127 if it is not available. */
36128
36129 static tree
36130 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36131 tree type_in)
36132 {
36133 enum machine_mode in_mode, out_mode;
36134 int in_n, out_n;
36135 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36136
36137 if (TREE_CODE (type_out) != VECTOR_TYPE
36138 || TREE_CODE (type_in) != VECTOR_TYPE
36139 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36140 return NULL_TREE;
36141
36142 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36143 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36144 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36145 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36146
36147 switch (fn)
36148 {
36149 case BUILT_IN_SQRT:
36150 if (out_mode == DFmode && in_mode == DFmode)
36151 {
36152 if (out_n == 2 && in_n == 2)
36153 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36154 else if (out_n == 4 && in_n == 4)
36155 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36156 else if (out_n == 8 && in_n == 8)
36157 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36158 }
36159 break;
36160
36161 case BUILT_IN_EXP2F:
36162 if (out_mode == SFmode && in_mode == SFmode)
36163 {
36164 if (out_n == 16 && in_n == 16)
36165 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36166 }
36167 break;
36168
36169 case BUILT_IN_SQRTF:
36170 if (out_mode == SFmode && in_mode == SFmode)
36171 {
36172 if (out_n == 4 && in_n == 4)
36173 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36174 else if (out_n == 8 && in_n == 8)
36175 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36176 else if (out_n == 16 && in_n == 16)
36177 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36178 }
36179 break;
36180
36181 case BUILT_IN_IFLOOR:
36182 case BUILT_IN_LFLOOR:
36183 case BUILT_IN_LLFLOOR:
36184 /* The round insn does not trap on denormals. */
36185 if (flag_trapping_math || !TARGET_ROUND)
36186 break;
36187
36188 if (out_mode == SImode && in_mode == DFmode)
36189 {
36190 if (out_n == 4 && in_n == 2)
36191 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36192 else if (out_n == 8 && in_n == 4)
36193 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36194 else if (out_n == 16 && in_n == 8)
36195 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36196 }
36197 break;
36198
36199 case BUILT_IN_IFLOORF:
36200 case BUILT_IN_LFLOORF:
36201 case BUILT_IN_LLFLOORF:
36202 /* The round insn does not trap on denormals. */
36203 if (flag_trapping_math || !TARGET_ROUND)
36204 break;
36205
36206 if (out_mode == SImode && in_mode == SFmode)
36207 {
36208 if (out_n == 4 && in_n == 4)
36209 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36210 else if (out_n == 8 && in_n == 8)
36211 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36212 }
36213 break;
36214
36215 case BUILT_IN_ICEIL:
36216 case BUILT_IN_LCEIL:
36217 case BUILT_IN_LLCEIL:
36218 /* The round insn does not trap on denormals. */
36219 if (flag_trapping_math || !TARGET_ROUND)
36220 break;
36221
36222 if (out_mode == SImode && in_mode == DFmode)
36223 {
36224 if (out_n == 4 && in_n == 2)
36225 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36226 else if (out_n == 8 && in_n == 4)
36227 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36228 else if (out_n == 16 && in_n == 8)
36229 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36230 }
36231 break;
36232
36233 case BUILT_IN_ICEILF:
36234 case BUILT_IN_LCEILF:
36235 case BUILT_IN_LLCEILF:
36236 /* The round insn does not trap on denormals. */
36237 if (flag_trapping_math || !TARGET_ROUND)
36238 break;
36239
36240 if (out_mode == SImode && in_mode == SFmode)
36241 {
36242 if (out_n == 4 && in_n == 4)
36243 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36244 else if (out_n == 8 && in_n == 8)
36245 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36246 }
36247 break;
36248
36249 case BUILT_IN_IRINT:
36250 case BUILT_IN_LRINT:
36251 case BUILT_IN_LLRINT:
36252 if (out_mode == SImode && in_mode == DFmode)
36253 {
36254 if (out_n == 4 && in_n == 2)
36255 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36256 else if (out_n == 8 && in_n == 4)
36257 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36258 }
36259 break;
36260
36261 case BUILT_IN_IRINTF:
36262 case BUILT_IN_LRINTF:
36263 case BUILT_IN_LLRINTF:
36264 if (out_mode == SImode && in_mode == SFmode)
36265 {
36266 if (out_n == 4 && in_n == 4)
36267 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36268 else if (out_n == 8 && in_n == 8)
36269 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36270 }
36271 break;
36272
36273 case BUILT_IN_IROUND:
36274 case BUILT_IN_LROUND:
36275 case BUILT_IN_LLROUND:
36276 /* The round insn does not trap on denormals. */
36277 if (flag_trapping_math || !TARGET_ROUND)
36278 break;
36279
36280 if (out_mode == SImode && in_mode == DFmode)
36281 {
36282 if (out_n == 4 && in_n == 2)
36283 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36284 else if (out_n == 8 && in_n == 4)
36285 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36286 else if (out_n == 16 && in_n == 8)
36287 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36288 }
36289 break;
36290
36291 case BUILT_IN_IROUNDF:
36292 case BUILT_IN_LROUNDF:
36293 case BUILT_IN_LLROUNDF:
36294 /* The round insn does not trap on denormals. */
36295 if (flag_trapping_math || !TARGET_ROUND)
36296 break;
36297
36298 if (out_mode == SImode && in_mode == SFmode)
36299 {
36300 if (out_n == 4 && in_n == 4)
36301 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36302 else if (out_n == 8 && in_n == 8)
36303 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36304 }
36305 break;
36306
36307 case BUILT_IN_COPYSIGN:
36308 if (out_mode == DFmode && in_mode == DFmode)
36309 {
36310 if (out_n == 2 && in_n == 2)
36311 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36312 else if (out_n == 4 && in_n == 4)
36313 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36314 else if (out_n == 8 && in_n == 8)
36315 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36316 }
36317 break;
36318
36319 case BUILT_IN_COPYSIGNF:
36320 if (out_mode == SFmode && in_mode == SFmode)
36321 {
36322 if (out_n == 4 && in_n == 4)
36323 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36324 else if (out_n == 8 && in_n == 8)
36325 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36326 else if (out_n == 16 && in_n == 16)
36327 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36328 }
36329 break;
36330
36331 case BUILT_IN_FLOOR:
36332 /* The round insn does not trap on denormals. */
36333 if (flag_trapping_math || !TARGET_ROUND)
36334 break;
36335
36336 if (out_mode == DFmode && in_mode == DFmode)
36337 {
36338 if (out_n == 2 && in_n == 2)
36339 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36340 else if (out_n == 4 && in_n == 4)
36341 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36342 }
36343 break;
36344
36345 case BUILT_IN_FLOORF:
36346 /* The round insn does not trap on denormals. */
36347 if (flag_trapping_math || !TARGET_ROUND)
36348 break;
36349
36350 if (out_mode == SFmode && in_mode == SFmode)
36351 {
36352 if (out_n == 4 && in_n == 4)
36353 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36354 else if (out_n == 8 && in_n == 8)
36355 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36356 }
36357 break;
36358
36359 case BUILT_IN_CEIL:
36360 /* The round insn does not trap on denormals. */
36361 if (flag_trapping_math || !TARGET_ROUND)
36362 break;
36363
36364 if (out_mode == DFmode && in_mode == DFmode)
36365 {
36366 if (out_n == 2 && in_n == 2)
36367 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36368 else if (out_n == 4 && in_n == 4)
36369 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36370 }
36371 break;
36372
36373 case BUILT_IN_CEILF:
36374 /* The round insn does not trap on denormals. */
36375 if (flag_trapping_math || !TARGET_ROUND)
36376 break;
36377
36378 if (out_mode == SFmode && in_mode == SFmode)
36379 {
36380 if (out_n == 4 && in_n == 4)
36381 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36382 else if (out_n == 8 && in_n == 8)
36383 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36384 }
36385 break;
36386
36387 case BUILT_IN_TRUNC:
36388 /* The round insn does not trap on denormals. */
36389 if (flag_trapping_math || !TARGET_ROUND)
36390 break;
36391
36392 if (out_mode == DFmode && in_mode == DFmode)
36393 {
36394 if (out_n == 2 && in_n == 2)
36395 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36396 else if (out_n == 4 && in_n == 4)
36397 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36398 }
36399 break;
36400
36401 case BUILT_IN_TRUNCF:
36402 /* The round insn does not trap on denormals. */
36403 if (flag_trapping_math || !TARGET_ROUND)
36404 break;
36405
36406 if (out_mode == SFmode && in_mode == SFmode)
36407 {
36408 if (out_n == 4 && in_n == 4)
36409 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36410 else if (out_n == 8 && in_n == 8)
36411 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36412 }
36413 break;
36414
36415 case BUILT_IN_RINT:
36416 /* The round insn does not trap on denormals. */
36417 if (flag_trapping_math || !TARGET_ROUND)
36418 break;
36419
36420 if (out_mode == DFmode && in_mode == DFmode)
36421 {
36422 if (out_n == 2 && in_n == 2)
36423 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36424 else if (out_n == 4 && in_n == 4)
36425 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36426 }
36427 break;
36428
36429 case BUILT_IN_RINTF:
36430 /* The round insn does not trap on denormals. */
36431 if (flag_trapping_math || !TARGET_ROUND)
36432 break;
36433
36434 if (out_mode == SFmode && in_mode == SFmode)
36435 {
36436 if (out_n == 4 && in_n == 4)
36437 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36438 else if (out_n == 8 && in_n == 8)
36439 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36440 }
36441 break;
36442
36443 case BUILT_IN_ROUND:
36444 /* The round insn does not trap on denormals. */
36445 if (flag_trapping_math || !TARGET_ROUND)
36446 break;
36447
36448 if (out_mode == DFmode && in_mode == DFmode)
36449 {
36450 if (out_n == 2 && in_n == 2)
36451 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36452 else if (out_n == 4 && in_n == 4)
36453 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36454 }
36455 break;
36456
36457 case BUILT_IN_ROUNDF:
36458 /* The round insn does not trap on denormals. */
36459 if (flag_trapping_math || !TARGET_ROUND)
36460 break;
36461
36462 if (out_mode == SFmode && in_mode == SFmode)
36463 {
36464 if (out_n == 4 && in_n == 4)
36465 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36466 else if (out_n == 8 && in_n == 8)
36467 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36468 }
36469 break;
36470
36471 case BUILT_IN_FMA:
36472 if (out_mode == DFmode && in_mode == DFmode)
36473 {
36474 if (out_n == 2 && in_n == 2)
36475 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36476 if (out_n == 4 && in_n == 4)
36477 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36478 }
36479 break;
36480
36481 case BUILT_IN_FMAF:
36482 if (out_mode == SFmode && in_mode == SFmode)
36483 {
36484 if (out_n == 4 && in_n == 4)
36485 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36486 if (out_n == 8 && in_n == 8)
36487 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36488 }
36489 break;
36490
36491 default:
36492 break;
36493 }
36494
36495 /* Dispatch to a handler for a vectorization library. */
36496 if (ix86_veclib_handler)
36497 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36498 type_in);
36499
36500 return NULL_TREE;
36501 }
36502
36503 /* Handler for an SVML-style interface to
36504 a library with vectorized intrinsics. */
36505
36506 static tree
36507 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36508 {
36509 char name[20];
36510 tree fntype, new_fndecl, args;
36511 unsigned arity;
36512 const char *bname;
36513 enum machine_mode el_mode, in_mode;
36514 int n, in_n;
36515
36516 /* The SVML is suitable for unsafe math only. */
36517 if (!flag_unsafe_math_optimizations)
36518 return NULL_TREE;
36519
36520 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36521 n = TYPE_VECTOR_SUBPARTS (type_out);
36522 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36523 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36524 if (el_mode != in_mode
36525 || n != in_n)
36526 return NULL_TREE;
36527
36528 switch (fn)
36529 {
36530 case BUILT_IN_EXP:
36531 case BUILT_IN_LOG:
36532 case BUILT_IN_LOG10:
36533 case BUILT_IN_POW:
36534 case BUILT_IN_TANH:
36535 case BUILT_IN_TAN:
36536 case BUILT_IN_ATAN:
36537 case BUILT_IN_ATAN2:
36538 case BUILT_IN_ATANH:
36539 case BUILT_IN_CBRT:
36540 case BUILT_IN_SINH:
36541 case BUILT_IN_SIN:
36542 case BUILT_IN_ASINH:
36543 case BUILT_IN_ASIN:
36544 case BUILT_IN_COSH:
36545 case BUILT_IN_COS:
36546 case BUILT_IN_ACOSH:
36547 case BUILT_IN_ACOS:
36548 if (el_mode != DFmode || n != 2)
36549 return NULL_TREE;
36550 break;
36551
36552 case BUILT_IN_EXPF:
36553 case BUILT_IN_LOGF:
36554 case BUILT_IN_LOG10F:
36555 case BUILT_IN_POWF:
36556 case BUILT_IN_TANHF:
36557 case BUILT_IN_TANF:
36558 case BUILT_IN_ATANF:
36559 case BUILT_IN_ATAN2F:
36560 case BUILT_IN_ATANHF:
36561 case BUILT_IN_CBRTF:
36562 case BUILT_IN_SINHF:
36563 case BUILT_IN_SINF:
36564 case BUILT_IN_ASINHF:
36565 case BUILT_IN_ASINF:
36566 case BUILT_IN_COSHF:
36567 case BUILT_IN_COSF:
36568 case BUILT_IN_ACOSHF:
36569 case BUILT_IN_ACOSF:
36570 if (el_mode != SFmode || n != 4)
36571 return NULL_TREE;
36572 break;
36573
36574 default:
36575 return NULL_TREE;
36576 }
36577
36578 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36579
36580 if (fn == BUILT_IN_LOGF)
36581 strcpy (name, "vmlsLn4");
36582 else if (fn == BUILT_IN_LOG)
36583 strcpy (name, "vmldLn2");
36584 else if (n == 4)
36585 {
36586 sprintf (name, "vmls%s", bname+10);
36587 name[strlen (name)-1] = '4';
36588 }
36589 else
36590 sprintf (name, "vmld%s2", bname+10);
36591
36592 /* Convert to uppercase. */
36593 name[4] &= ~0x20;
36594
36595 arity = 0;
36596 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36597 args;
36598 args = TREE_CHAIN (args))
36599 arity++;
36600
36601 if (arity == 1)
36602 fntype = build_function_type_list (type_out, type_in, NULL);
36603 else
36604 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36605
36606 /* Build a function declaration for the vectorized function. */
36607 new_fndecl = build_decl (BUILTINS_LOCATION,
36608 FUNCTION_DECL, get_identifier (name), fntype);
36609 TREE_PUBLIC (new_fndecl) = 1;
36610 DECL_EXTERNAL (new_fndecl) = 1;
36611 DECL_IS_NOVOPS (new_fndecl) = 1;
36612 TREE_READONLY (new_fndecl) = 1;
36613
36614 return new_fndecl;
36615 }
36616
36617 /* Handler for an ACML-style interface to
36618 a library with vectorized intrinsics. */
36619
36620 static tree
36621 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36622 {
36623 char name[20] = "__vr.._";
36624 tree fntype, new_fndecl, args;
36625 unsigned arity;
36626 const char *bname;
36627 enum machine_mode el_mode, in_mode;
36628 int n, in_n;
36629
36630 /* The ACML is 64bits only and suitable for unsafe math only as
36631 it does not correctly support parts of IEEE with the required
36632 precision such as denormals. */
36633 if (!TARGET_64BIT
36634 || !flag_unsafe_math_optimizations)
36635 return NULL_TREE;
36636
36637 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36638 n = TYPE_VECTOR_SUBPARTS (type_out);
36639 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36640 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36641 if (el_mode != in_mode
36642 || n != in_n)
36643 return NULL_TREE;
36644
36645 switch (fn)
36646 {
36647 case BUILT_IN_SIN:
36648 case BUILT_IN_COS:
36649 case BUILT_IN_EXP:
36650 case BUILT_IN_LOG:
36651 case BUILT_IN_LOG2:
36652 case BUILT_IN_LOG10:
36653 name[4] = 'd';
36654 name[5] = '2';
36655 if (el_mode != DFmode
36656 || n != 2)
36657 return NULL_TREE;
36658 break;
36659
36660 case BUILT_IN_SINF:
36661 case BUILT_IN_COSF:
36662 case BUILT_IN_EXPF:
36663 case BUILT_IN_POWF:
36664 case BUILT_IN_LOGF:
36665 case BUILT_IN_LOG2F:
36666 case BUILT_IN_LOG10F:
36667 name[4] = 's';
36668 name[5] = '4';
36669 if (el_mode != SFmode
36670 || n != 4)
36671 return NULL_TREE;
36672 break;
36673
36674 default:
36675 return NULL_TREE;
36676 }
36677
36678 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36679 sprintf (name + 7, "%s", bname+10);
36680
36681 arity = 0;
36682 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36683 args;
36684 args = TREE_CHAIN (args))
36685 arity++;
36686
36687 if (arity == 1)
36688 fntype = build_function_type_list (type_out, type_in, NULL);
36689 else
36690 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36691
36692 /* Build a function declaration for the vectorized function. */
36693 new_fndecl = build_decl (BUILTINS_LOCATION,
36694 FUNCTION_DECL, get_identifier (name), fntype);
36695 TREE_PUBLIC (new_fndecl) = 1;
36696 DECL_EXTERNAL (new_fndecl) = 1;
36697 DECL_IS_NOVOPS (new_fndecl) = 1;
36698 TREE_READONLY (new_fndecl) = 1;
36699
36700 return new_fndecl;
36701 }
36702
36703 /* Returns a decl of a function that implements gather load with
36704 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36705 Return NULL_TREE if it is not available. */
36706
36707 static tree
36708 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36709 const_tree index_type, int scale)
36710 {
36711 bool si;
36712 enum ix86_builtins code;
36713
36714 if (! TARGET_AVX2)
36715 return NULL_TREE;
36716
36717 if ((TREE_CODE (index_type) != INTEGER_TYPE
36718 && !POINTER_TYPE_P (index_type))
36719 || (TYPE_MODE (index_type) != SImode
36720 && TYPE_MODE (index_type) != DImode))
36721 return NULL_TREE;
36722
36723 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36724 return NULL_TREE;
36725
36726 /* v*gather* insn sign extends index to pointer mode. */
36727 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36728 && TYPE_UNSIGNED (index_type))
36729 return NULL_TREE;
36730
36731 if (scale <= 0
36732 || scale > 8
36733 || (scale & (scale - 1)) != 0)
36734 return NULL_TREE;
36735
36736 si = TYPE_MODE (index_type) == SImode;
36737 switch (TYPE_MODE (mem_vectype))
36738 {
36739 case V2DFmode:
36740 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36741 break;
36742 case V4DFmode:
36743 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36744 break;
36745 case V2DImode:
36746 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36747 break;
36748 case V4DImode:
36749 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36750 break;
36751 case V4SFmode:
36752 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36753 break;
36754 case V8SFmode:
36755 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36756 break;
36757 case V4SImode:
36758 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36759 break;
36760 case V8SImode:
36761 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36762 break;
36763 case V8DFmode:
36764 if (TARGET_AVX512F)
36765 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36766 else
36767 return NULL_TREE;
36768 break;
36769 case V8DImode:
36770 if (TARGET_AVX512F)
36771 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36772 else
36773 return NULL_TREE;
36774 break;
36775 case V16SFmode:
36776 if (TARGET_AVX512F)
36777 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36778 else
36779 return NULL_TREE;
36780 break;
36781 case V16SImode:
36782 if (TARGET_AVX512F)
36783 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36784 else
36785 return NULL_TREE;
36786 break;
36787 default:
36788 return NULL_TREE;
36789 }
36790
36791 return ix86_get_builtin (code);
36792 }
36793
36794 /* Returns a code for a target-specific builtin that implements
36795 reciprocal of the function, or NULL_TREE if not available. */
36796
36797 static tree
36798 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
36799 bool sqrt ATTRIBUTE_UNUSED)
36800 {
36801 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36802 && flag_finite_math_only && !flag_trapping_math
36803 && flag_unsafe_math_optimizations))
36804 return NULL_TREE;
36805
36806 if (md_fn)
36807 /* Machine dependent builtins. */
36808 switch (fn)
36809 {
36810 /* Vectorized version of sqrt to rsqrt conversion. */
36811 case IX86_BUILTIN_SQRTPS_NR:
36812 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36813
36814 case IX86_BUILTIN_SQRTPS_NR256:
36815 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36816
36817 default:
36818 return NULL_TREE;
36819 }
36820 else
36821 /* Normal builtins. */
36822 switch (fn)
36823 {
36824 /* Sqrt to rsqrt conversion. */
36825 case BUILT_IN_SQRTF:
36826 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36827
36828 default:
36829 return NULL_TREE;
36830 }
36831 }
36832 \f
36833 /* Helper for avx_vpermilps256_operand et al. This is also used by
36834 the expansion functions to turn the parallel back into a mask.
36835 The return value is 0 for no match and the imm8+1 for a match. */
36836
36837 int
36838 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36839 {
36840 unsigned i, nelt = GET_MODE_NUNITS (mode);
36841 unsigned mask = 0;
36842 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36843
36844 if (XVECLEN (par, 0) != (int) nelt)
36845 return 0;
36846
36847 /* Validate that all of the elements are constants, and not totally
36848 out of range. Copy the data into an integral array to make the
36849 subsequent checks easier. */
36850 for (i = 0; i < nelt; ++i)
36851 {
36852 rtx er = XVECEXP (par, 0, i);
36853 unsigned HOST_WIDE_INT ei;
36854
36855 if (!CONST_INT_P (er))
36856 return 0;
36857 ei = INTVAL (er);
36858 if (ei >= nelt)
36859 return 0;
36860 ipar[i] = ei;
36861 }
36862
36863 switch (mode)
36864 {
36865 case V8DFmode:
36866 /* In the 512-bit DFmode case, we can only move elements within
36867 a 128-bit lane. First fill the second part of the mask,
36868 then fallthru. */
36869 for (i = 4; i < 6; ++i)
36870 {
36871 if (ipar[i] < 4 || ipar[i] >= 6)
36872 return 0;
36873 mask |= (ipar[i] - 4) << i;
36874 }
36875 for (i = 6; i < 8; ++i)
36876 {
36877 if (ipar[i] < 6)
36878 return 0;
36879 mask |= (ipar[i] - 6) << i;
36880 }
36881 /* FALLTHRU */
36882
36883 case V4DFmode:
36884 /* In the 256-bit DFmode case, we can only move elements within
36885 a 128-bit lane. */
36886 for (i = 0; i < 2; ++i)
36887 {
36888 if (ipar[i] >= 2)
36889 return 0;
36890 mask |= ipar[i] << i;
36891 }
36892 for (i = 2; i < 4; ++i)
36893 {
36894 if (ipar[i] < 2)
36895 return 0;
36896 mask |= (ipar[i] - 2) << i;
36897 }
36898 break;
36899
36900 case V16SFmode:
36901 /* In 512 bit SFmode case, permutation in the upper 256 bits
36902 must mirror the permutation in the lower 256-bits. */
36903 for (i = 0; i < 8; ++i)
36904 if (ipar[i] + 8 != ipar[i + 8])
36905 return 0;
36906 /* FALLTHRU */
36907
36908 case V8SFmode:
36909 /* In 256 bit SFmode case, we have full freedom of
36910 movement within the low 128-bit lane, but the high 128-bit
36911 lane must mirror the exact same pattern. */
36912 for (i = 0; i < 4; ++i)
36913 if (ipar[i] + 4 != ipar[i + 4])
36914 return 0;
36915 nelt = 4;
36916 /* FALLTHRU */
36917
36918 case V2DFmode:
36919 case V4SFmode:
36920 /* In the 128-bit case, we've full freedom in the placement of
36921 the elements from the source operand. */
36922 for (i = 0; i < nelt; ++i)
36923 mask |= ipar[i] << (i * (nelt / 2));
36924 break;
36925
36926 default:
36927 gcc_unreachable ();
36928 }
36929
36930 /* Make sure success has a non-zero value by adding one. */
36931 return mask + 1;
36932 }
36933
36934 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
36935 the expansion functions to turn the parallel back into a mask.
36936 The return value is 0 for no match and the imm8+1 for a match. */
36937
36938 int
36939 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
36940 {
36941 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
36942 unsigned mask = 0;
36943 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
36944
36945 if (XVECLEN (par, 0) != (int) nelt)
36946 return 0;
36947
36948 /* Validate that all of the elements are constants, and not totally
36949 out of range. Copy the data into an integral array to make the
36950 subsequent checks easier. */
36951 for (i = 0; i < nelt; ++i)
36952 {
36953 rtx er = XVECEXP (par, 0, i);
36954 unsigned HOST_WIDE_INT ei;
36955
36956 if (!CONST_INT_P (er))
36957 return 0;
36958 ei = INTVAL (er);
36959 if (ei >= 2 * nelt)
36960 return 0;
36961 ipar[i] = ei;
36962 }
36963
36964 /* Validate that the halves of the permute are halves. */
36965 for (i = 0; i < nelt2 - 1; ++i)
36966 if (ipar[i] + 1 != ipar[i + 1])
36967 return 0;
36968 for (i = nelt2; i < nelt - 1; ++i)
36969 if (ipar[i] + 1 != ipar[i + 1])
36970 return 0;
36971
36972 /* Reconstruct the mask. */
36973 for (i = 0; i < 2; ++i)
36974 {
36975 unsigned e = ipar[i * nelt2];
36976 if (e % nelt2)
36977 return 0;
36978 e /= nelt2;
36979 mask |= e << (i * 4);
36980 }
36981
36982 /* Make sure success has a non-zero value by adding one. */
36983 return mask + 1;
36984 }
36985 \f
36986 /* Return a register priority for hard reg REGNO. */
36987 static int
36988 ix86_register_priority (int hard_regno)
36989 {
36990 /* ebp and r13 as the base always wants a displacement, r12 as the
36991 base always wants an index. So discourage their usage in an
36992 address. */
36993 if (hard_regno == R12_REG || hard_regno == R13_REG)
36994 return 0;
36995 if (hard_regno == BP_REG)
36996 return 1;
36997 /* New x86-64 int registers result in bigger code size. Discourage
36998 them. */
36999 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37000 return 2;
37001 /* New x86-64 SSE registers result in bigger code size. Discourage
37002 them. */
37003 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37004 return 2;
37005 /* Usage of AX register results in smaller code. Prefer it. */
37006 if (hard_regno == 0)
37007 return 4;
37008 return 3;
37009 }
37010
37011 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37012
37013 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37014 QImode must go into class Q_REGS.
37015 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37016 movdf to do mem-to-mem moves through integer regs. */
37017
37018 static reg_class_t
37019 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37020 {
37021 enum machine_mode mode = GET_MODE (x);
37022
37023 /* We're only allowed to return a subclass of CLASS. Many of the
37024 following checks fail for NO_REGS, so eliminate that early. */
37025 if (regclass == NO_REGS)
37026 return NO_REGS;
37027
37028 /* All classes can load zeros. */
37029 if (x == CONST0_RTX (mode))
37030 return regclass;
37031
37032 /* Force constants into memory if we are loading a (nonzero) constant into
37033 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37034 instructions to load from a constant. */
37035 if (CONSTANT_P (x)
37036 && (MAYBE_MMX_CLASS_P (regclass)
37037 || MAYBE_SSE_CLASS_P (regclass)
37038 || MAYBE_MASK_CLASS_P (regclass)))
37039 return NO_REGS;
37040
37041 /* Prefer SSE regs only, if we can use them for math. */
37042 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37043 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37044
37045 /* Floating-point constants need more complex checks. */
37046 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37047 {
37048 /* General regs can load everything. */
37049 if (reg_class_subset_p (regclass, GENERAL_REGS))
37050 return regclass;
37051
37052 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37053 zero above. We only want to wind up preferring 80387 registers if
37054 we plan on doing computation with them. */
37055 if (TARGET_80387
37056 && standard_80387_constant_p (x) > 0)
37057 {
37058 /* Limit class to non-sse. */
37059 if (regclass == FLOAT_SSE_REGS)
37060 return FLOAT_REGS;
37061 if (regclass == FP_TOP_SSE_REGS)
37062 return FP_TOP_REG;
37063 if (regclass == FP_SECOND_SSE_REGS)
37064 return FP_SECOND_REG;
37065 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37066 return regclass;
37067 }
37068
37069 return NO_REGS;
37070 }
37071
37072 /* Generally when we see PLUS here, it's the function invariant
37073 (plus soft-fp const_int). Which can only be computed into general
37074 regs. */
37075 if (GET_CODE (x) == PLUS)
37076 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37077
37078 /* QImode constants are easy to load, but non-constant QImode data
37079 must go into Q_REGS. */
37080 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37081 {
37082 if (reg_class_subset_p (regclass, Q_REGS))
37083 return regclass;
37084 if (reg_class_subset_p (Q_REGS, regclass))
37085 return Q_REGS;
37086 return NO_REGS;
37087 }
37088
37089 return regclass;
37090 }
37091
37092 /* Discourage putting floating-point values in SSE registers unless
37093 SSE math is being used, and likewise for the 387 registers. */
37094 static reg_class_t
37095 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37096 {
37097 enum machine_mode mode = GET_MODE (x);
37098
37099 /* Restrict the output reload class to the register bank that we are doing
37100 math on. If we would like not to return a subset of CLASS, reject this
37101 alternative: if reload cannot do this, it will still use its choice. */
37102 mode = GET_MODE (x);
37103 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37104 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37105
37106 if (X87_FLOAT_MODE_P (mode))
37107 {
37108 if (regclass == FP_TOP_SSE_REGS)
37109 return FP_TOP_REG;
37110 else if (regclass == FP_SECOND_SSE_REGS)
37111 return FP_SECOND_REG;
37112 else
37113 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37114 }
37115
37116 return regclass;
37117 }
37118
37119 static reg_class_t
37120 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37121 enum machine_mode mode, secondary_reload_info *sri)
37122 {
37123 /* Double-word spills from general registers to non-offsettable memory
37124 references (zero-extended addresses) require special handling. */
37125 if (TARGET_64BIT
37126 && MEM_P (x)
37127 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37128 && INTEGER_CLASS_P (rclass)
37129 && !offsettable_memref_p (x))
37130 {
37131 sri->icode = (in_p
37132 ? CODE_FOR_reload_noff_load
37133 : CODE_FOR_reload_noff_store);
37134 /* Add the cost of moving address to a temporary. */
37135 sri->extra_cost = 1;
37136
37137 return NO_REGS;
37138 }
37139
37140 /* QImode spills from non-QI registers require
37141 intermediate register on 32bit targets. */
37142 if (mode == QImode
37143 && (MAYBE_MASK_CLASS_P (rclass)
37144 || (!TARGET_64BIT && !in_p
37145 && INTEGER_CLASS_P (rclass)
37146 && MAYBE_NON_Q_CLASS_P (rclass))))
37147 {
37148 int regno;
37149
37150 if (REG_P (x))
37151 regno = REGNO (x);
37152 else
37153 regno = -1;
37154
37155 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37156 regno = true_regnum (x);
37157
37158 /* Return Q_REGS if the operand is in memory. */
37159 if (regno == -1)
37160 return Q_REGS;
37161 }
37162
37163 /* This condition handles corner case where an expression involving
37164 pointers gets vectorized. We're trying to use the address of a
37165 stack slot as a vector initializer.
37166
37167 (set (reg:V2DI 74 [ vect_cst_.2 ])
37168 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37169
37170 Eventually frame gets turned into sp+offset like this:
37171
37172 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37173 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37174 (const_int 392 [0x188]))))
37175
37176 That later gets turned into:
37177
37178 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37179 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37180 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37181
37182 We'll have the following reload recorded:
37183
37184 Reload 0: reload_in (DI) =
37185 (plus:DI (reg/f:DI 7 sp)
37186 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37187 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37188 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37189 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37190 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37191 reload_reg_rtx: (reg:V2DI 22 xmm1)
37192
37193 Which isn't going to work since SSE instructions can't handle scalar
37194 additions. Returning GENERAL_REGS forces the addition into integer
37195 register and reload can handle subsequent reloads without problems. */
37196
37197 if (in_p && GET_CODE (x) == PLUS
37198 && SSE_CLASS_P (rclass)
37199 && SCALAR_INT_MODE_P (mode))
37200 return GENERAL_REGS;
37201
37202 return NO_REGS;
37203 }
37204
37205 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37206
37207 static bool
37208 ix86_class_likely_spilled_p (reg_class_t rclass)
37209 {
37210 switch (rclass)
37211 {
37212 case AREG:
37213 case DREG:
37214 case CREG:
37215 case BREG:
37216 case AD_REGS:
37217 case SIREG:
37218 case DIREG:
37219 case SSE_FIRST_REG:
37220 case FP_TOP_REG:
37221 case FP_SECOND_REG:
37222 return true;
37223
37224 default:
37225 break;
37226 }
37227
37228 return false;
37229 }
37230
37231 /* If we are copying between general and FP registers, we need a memory
37232 location. The same is true for SSE and MMX registers.
37233
37234 To optimize register_move_cost performance, allow inline variant.
37235
37236 The macro can't work reliably when one of the CLASSES is class containing
37237 registers from multiple units (SSE, MMX, integer). We avoid this by never
37238 combining those units in single alternative in the machine description.
37239 Ensure that this constraint holds to avoid unexpected surprises.
37240
37241 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37242 enforce these sanity checks. */
37243
37244 static inline bool
37245 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37246 enum machine_mode mode, int strict)
37247 {
37248 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37249 return false;
37250 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37251 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37252 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37253 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37254 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37255 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37256 {
37257 gcc_assert (!strict || lra_in_progress);
37258 return true;
37259 }
37260
37261 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37262 return true;
37263
37264 /* ??? This is a lie. We do have moves between mmx/general, and for
37265 mmx/sse2. But by saying we need secondary memory we discourage the
37266 register allocator from using the mmx registers unless needed. */
37267 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37268 return true;
37269
37270 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37271 {
37272 /* SSE1 doesn't have any direct moves from other classes. */
37273 if (!TARGET_SSE2)
37274 return true;
37275
37276 /* If the target says that inter-unit moves are more expensive
37277 than moving through memory, then don't generate them. */
37278 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37279 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37280 return true;
37281
37282 /* Between SSE and general, we have moves no larger than word size. */
37283 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37284 return true;
37285 }
37286
37287 return false;
37288 }
37289
37290 bool
37291 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37292 enum machine_mode mode, int strict)
37293 {
37294 return inline_secondary_memory_needed (class1, class2, mode, strict);
37295 }
37296
37297 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37298
37299 On the 80386, this is the size of MODE in words,
37300 except in the FP regs, where a single reg is always enough. */
37301
37302 static unsigned char
37303 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37304 {
37305 if (MAYBE_INTEGER_CLASS_P (rclass))
37306 {
37307 if (mode == XFmode)
37308 return (TARGET_64BIT ? 2 : 3);
37309 else if (mode == XCmode)
37310 return (TARGET_64BIT ? 4 : 6);
37311 else
37312 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37313 }
37314 else
37315 {
37316 if (COMPLEX_MODE_P (mode))
37317 return 2;
37318 else
37319 return 1;
37320 }
37321 }
37322
37323 /* Return true if the registers in CLASS cannot represent the change from
37324 modes FROM to TO. */
37325
37326 bool
37327 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37328 enum reg_class regclass)
37329 {
37330 if (from == to)
37331 return false;
37332
37333 /* x87 registers can't do subreg at all, as all values are reformatted
37334 to extended precision. */
37335 if (MAYBE_FLOAT_CLASS_P (regclass))
37336 return true;
37337
37338 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37339 {
37340 /* Vector registers do not support QI or HImode loads. If we don't
37341 disallow a change to these modes, reload will assume it's ok to
37342 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37343 the vec_dupv4hi pattern. */
37344 if (GET_MODE_SIZE (from) < 4)
37345 return true;
37346
37347 /* Vector registers do not support subreg with nonzero offsets, which
37348 are otherwise valid for integer registers. Since we can't see
37349 whether we have a nonzero offset from here, prohibit all
37350 nonparadoxical subregs changing size. */
37351 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37352 return true;
37353 }
37354
37355 return false;
37356 }
37357
37358 /* Return the cost of moving data of mode M between a
37359 register and memory. A value of 2 is the default; this cost is
37360 relative to those in `REGISTER_MOVE_COST'.
37361
37362 This function is used extensively by register_move_cost that is used to
37363 build tables at startup. Make it inline in this case.
37364 When IN is 2, return maximum of in and out move cost.
37365
37366 If moving between registers and memory is more expensive than
37367 between two registers, you should define this macro to express the
37368 relative cost.
37369
37370 Model also increased moving costs of QImode registers in non
37371 Q_REGS classes.
37372 */
37373 static inline int
37374 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37375 int in)
37376 {
37377 int cost;
37378 if (FLOAT_CLASS_P (regclass))
37379 {
37380 int index;
37381 switch (mode)
37382 {
37383 case SFmode:
37384 index = 0;
37385 break;
37386 case DFmode:
37387 index = 1;
37388 break;
37389 case XFmode:
37390 index = 2;
37391 break;
37392 default:
37393 return 100;
37394 }
37395 if (in == 2)
37396 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37397 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37398 }
37399 if (SSE_CLASS_P (regclass))
37400 {
37401 int index;
37402 switch (GET_MODE_SIZE (mode))
37403 {
37404 case 4:
37405 index = 0;
37406 break;
37407 case 8:
37408 index = 1;
37409 break;
37410 case 16:
37411 index = 2;
37412 break;
37413 default:
37414 return 100;
37415 }
37416 if (in == 2)
37417 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37418 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37419 }
37420 if (MMX_CLASS_P (regclass))
37421 {
37422 int index;
37423 switch (GET_MODE_SIZE (mode))
37424 {
37425 case 4:
37426 index = 0;
37427 break;
37428 case 8:
37429 index = 1;
37430 break;
37431 default:
37432 return 100;
37433 }
37434 if (in)
37435 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37436 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37437 }
37438 switch (GET_MODE_SIZE (mode))
37439 {
37440 case 1:
37441 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37442 {
37443 if (!in)
37444 return ix86_cost->int_store[0];
37445 if (TARGET_PARTIAL_REG_DEPENDENCY
37446 && optimize_function_for_speed_p (cfun))
37447 cost = ix86_cost->movzbl_load;
37448 else
37449 cost = ix86_cost->int_load[0];
37450 if (in == 2)
37451 return MAX (cost, ix86_cost->int_store[0]);
37452 return cost;
37453 }
37454 else
37455 {
37456 if (in == 2)
37457 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37458 if (in)
37459 return ix86_cost->movzbl_load;
37460 else
37461 return ix86_cost->int_store[0] + 4;
37462 }
37463 break;
37464 case 2:
37465 if (in == 2)
37466 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37467 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37468 default:
37469 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37470 if (mode == TFmode)
37471 mode = XFmode;
37472 if (in == 2)
37473 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37474 else if (in)
37475 cost = ix86_cost->int_load[2];
37476 else
37477 cost = ix86_cost->int_store[2];
37478 return (cost * (((int) GET_MODE_SIZE (mode)
37479 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37480 }
37481 }
37482
37483 static int
37484 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37485 bool in)
37486 {
37487 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37488 }
37489
37490
37491 /* Return the cost of moving data from a register in class CLASS1 to
37492 one in class CLASS2.
37493
37494 It is not required that the cost always equal 2 when FROM is the same as TO;
37495 on some machines it is expensive to move between registers if they are not
37496 general registers. */
37497
37498 static int
37499 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37500 reg_class_t class2_i)
37501 {
37502 enum reg_class class1 = (enum reg_class) class1_i;
37503 enum reg_class class2 = (enum reg_class) class2_i;
37504
37505 /* In case we require secondary memory, compute cost of the store followed
37506 by load. In order to avoid bad register allocation choices, we need
37507 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37508
37509 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37510 {
37511 int cost = 1;
37512
37513 cost += inline_memory_move_cost (mode, class1, 2);
37514 cost += inline_memory_move_cost (mode, class2, 2);
37515
37516 /* In case of copying from general_purpose_register we may emit multiple
37517 stores followed by single load causing memory size mismatch stall.
37518 Count this as arbitrarily high cost of 20. */
37519 if (targetm.class_max_nregs (class1, mode)
37520 > targetm.class_max_nregs (class2, mode))
37521 cost += 20;
37522
37523 /* In the case of FP/MMX moves, the registers actually overlap, and we
37524 have to switch modes in order to treat them differently. */
37525 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37526 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37527 cost += 20;
37528
37529 return cost;
37530 }
37531
37532 /* Moves between SSE/MMX and integer unit are expensive. */
37533 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37534 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37535
37536 /* ??? By keeping returned value relatively high, we limit the number
37537 of moves between integer and MMX/SSE registers for all targets.
37538 Additionally, high value prevents problem with x86_modes_tieable_p(),
37539 where integer modes in MMX/SSE registers are not tieable
37540 because of missing QImode and HImode moves to, from or between
37541 MMX/SSE registers. */
37542 return MAX (8, ix86_cost->mmxsse_to_integer);
37543
37544 if (MAYBE_FLOAT_CLASS_P (class1))
37545 return ix86_cost->fp_move;
37546 if (MAYBE_SSE_CLASS_P (class1))
37547 return ix86_cost->sse_move;
37548 if (MAYBE_MMX_CLASS_P (class1))
37549 return ix86_cost->mmx_move;
37550 return 2;
37551 }
37552
37553 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37554 MODE. */
37555
37556 bool
37557 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37558 {
37559 /* Flags and only flags can only hold CCmode values. */
37560 if (CC_REGNO_P (regno))
37561 return GET_MODE_CLASS (mode) == MODE_CC;
37562 if (GET_MODE_CLASS (mode) == MODE_CC
37563 || GET_MODE_CLASS (mode) == MODE_RANDOM
37564 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37565 return false;
37566 if (STACK_REGNO_P (regno))
37567 return VALID_FP_MODE_P (mode);
37568 if (MASK_REGNO_P (regno))
37569 return VALID_MASK_REG_MODE (mode);
37570 if (SSE_REGNO_P (regno))
37571 {
37572 /* We implement the move patterns for all vector modes into and
37573 out of SSE registers, even when no operation instructions
37574 are available. */
37575
37576 /* For AVX-512 we allow, regardless of regno:
37577 - XI mode
37578 - any of 512-bit wide vector mode
37579 - any scalar mode. */
37580 if (TARGET_AVX512F
37581 && (mode == XImode
37582 || VALID_AVX512F_REG_MODE (mode)
37583 || VALID_AVX512F_SCALAR_MODE (mode)))
37584 return true;
37585
37586 /* xmm16-xmm31 are only available for AVX-512. */
37587 if (EXT_REX_SSE_REGNO_P (regno))
37588 return false;
37589
37590 /* OImode and AVX modes are available only when AVX is enabled. */
37591 return ((TARGET_AVX
37592 && VALID_AVX256_REG_OR_OI_MODE (mode))
37593 || VALID_SSE_REG_MODE (mode)
37594 || VALID_SSE2_REG_MODE (mode)
37595 || VALID_MMX_REG_MODE (mode)
37596 || VALID_MMX_REG_MODE_3DNOW (mode));
37597 }
37598 if (MMX_REGNO_P (regno))
37599 {
37600 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37601 so if the register is available at all, then we can move data of
37602 the given mode into or out of it. */
37603 return (VALID_MMX_REG_MODE (mode)
37604 || VALID_MMX_REG_MODE_3DNOW (mode));
37605 }
37606
37607 if (mode == QImode)
37608 {
37609 /* Take care for QImode values - they can be in non-QI regs,
37610 but then they do cause partial register stalls. */
37611 if (ANY_QI_REGNO_P (regno))
37612 return true;
37613 if (!TARGET_PARTIAL_REG_STALL)
37614 return true;
37615 /* LRA checks if the hard register is OK for the given mode.
37616 QImode values can live in non-QI regs, so we allow all
37617 registers here. */
37618 if (lra_in_progress)
37619 return true;
37620 return !can_create_pseudo_p ();
37621 }
37622 /* We handle both integer and floats in the general purpose registers. */
37623 else if (VALID_INT_MODE_P (mode))
37624 return true;
37625 else if (VALID_FP_MODE_P (mode))
37626 return true;
37627 else if (VALID_DFP_MODE_P (mode))
37628 return true;
37629 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37630 on to use that value in smaller contexts, this can easily force a
37631 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37632 supporting DImode, allow it. */
37633 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37634 return true;
37635
37636 return false;
37637 }
37638
37639 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37640 tieable integer mode. */
37641
37642 static bool
37643 ix86_tieable_integer_mode_p (enum machine_mode mode)
37644 {
37645 switch (mode)
37646 {
37647 case HImode:
37648 case SImode:
37649 return true;
37650
37651 case QImode:
37652 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37653
37654 case DImode:
37655 return TARGET_64BIT;
37656
37657 default:
37658 return false;
37659 }
37660 }
37661
37662 /* Return true if MODE1 is accessible in a register that can hold MODE2
37663 without copying. That is, all register classes that can hold MODE2
37664 can also hold MODE1. */
37665
37666 bool
37667 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37668 {
37669 if (mode1 == mode2)
37670 return true;
37671
37672 if (ix86_tieable_integer_mode_p (mode1)
37673 && ix86_tieable_integer_mode_p (mode2))
37674 return true;
37675
37676 /* MODE2 being XFmode implies fp stack or general regs, which means we
37677 can tie any smaller floating point modes to it. Note that we do not
37678 tie this with TFmode. */
37679 if (mode2 == XFmode)
37680 return mode1 == SFmode || mode1 == DFmode;
37681
37682 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37683 that we can tie it with SFmode. */
37684 if (mode2 == DFmode)
37685 return mode1 == SFmode;
37686
37687 /* If MODE2 is only appropriate for an SSE register, then tie with
37688 any other mode acceptable to SSE registers. */
37689 if (GET_MODE_SIZE (mode2) == 32
37690 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37691 return (GET_MODE_SIZE (mode1) == 32
37692 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37693 if (GET_MODE_SIZE (mode2) == 16
37694 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37695 return (GET_MODE_SIZE (mode1) == 16
37696 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37697
37698 /* If MODE2 is appropriate for an MMX register, then tie
37699 with any other mode acceptable to MMX registers. */
37700 if (GET_MODE_SIZE (mode2) == 8
37701 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37702 return (GET_MODE_SIZE (mode1) == 8
37703 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37704
37705 return false;
37706 }
37707
37708 /* Return the cost of moving between two registers of mode MODE. */
37709
37710 static int
37711 ix86_set_reg_reg_cost (enum machine_mode mode)
37712 {
37713 unsigned int units = UNITS_PER_WORD;
37714
37715 switch (GET_MODE_CLASS (mode))
37716 {
37717 default:
37718 break;
37719
37720 case MODE_CC:
37721 units = GET_MODE_SIZE (CCmode);
37722 break;
37723
37724 case MODE_FLOAT:
37725 if ((TARGET_SSE && mode == TFmode)
37726 || (TARGET_80387 && mode == XFmode)
37727 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37728 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37729 units = GET_MODE_SIZE (mode);
37730 break;
37731
37732 case MODE_COMPLEX_FLOAT:
37733 if ((TARGET_SSE && mode == TCmode)
37734 || (TARGET_80387 && mode == XCmode)
37735 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37736 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37737 units = GET_MODE_SIZE (mode);
37738 break;
37739
37740 case MODE_VECTOR_INT:
37741 case MODE_VECTOR_FLOAT:
37742 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37743 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37744 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37745 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37746 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37747 units = GET_MODE_SIZE (mode);
37748 }
37749
37750 /* Return the cost of moving between two registers of mode MODE,
37751 assuming that the move will be in pieces of at most UNITS bytes. */
37752 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37753 }
37754
37755 /* Compute a (partial) cost for rtx X. Return true if the complete
37756 cost has been computed, and false if subexpressions should be
37757 scanned. In either case, *TOTAL contains the cost result. */
37758
37759 static bool
37760 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37761 bool speed)
37762 {
37763 rtx mask;
37764 enum rtx_code code = (enum rtx_code) code_i;
37765 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37766 enum machine_mode mode = GET_MODE (x);
37767 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37768
37769 switch (code)
37770 {
37771 case SET:
37772 if (register_operand (SET_DEST (x), VOIDmode)
37773 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37774 {
37775 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37776 return true;
37777 }
37778 return false;
37779
37780 case CONST_INT:
37781 case CONST:
37782 case LABEL_REF:
37783 case SYMBOL_REF:
37784 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37785 *total = 3;
37786 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37787 *total = 2;
37788 else if (flag_pic && SYMBOLIC_CONST (x)
37789 && (!TARGET_64BIT
37790 || (!GET_CODE (x) != LABEL_REF
37791 && (GET_CODE (x) != SYMBOL_REF
37792 || !SYMBOL_REF_LOCAL_P (x)))))
37793 *total = 1;
37794 else
37795 *total = 0;
37796 return true;
37797
37798 case CONST_DOUBLE:
37799 if (mode == VOIDmode)
37800 {
37801 *total = 0;
37802 return true;
37803 }
37804 switch (standard_80387_constant_p (x))
37805 {
37806 case 1: /* 0.0 */
37807 *total = 1;
37808 return true;
37809 default: /* Other constants */
37810 *total = 2;
37811 return true;
37812 case 0:
37813 case -1:
37814 break;
37815 }
37816 if (SSE_FLOAT_MODE_P (mode))
37817 {
37818 case CONST_VECTOR:
37819 switch (standard_sse_constant_p (x))
37820 {
37821 case 0:
37822 break;
37823 case 1: /* 0: xor eliminates false dependency */
37824 *total = 0;
37825 return true;
37826 default: /* -1: cmp contains false dependency */
37827 *total = 1;
37828 return true;
37829 }
37830 }
37831 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37832 it'll probably end up. Add a penalty for size. */
37833 *total = (COSTS_N_INSNS (1)
37834 + (flag_pic != 0 && !TARGET_64BIT)
37835 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37836 return true;
37837
37838 case ZERO_EXTEND:
37839 /* The zero extensions is often completely free on x86_64, so make
37840 it as cheap as possible. */
37841 if (TARGET_64BIT && mode == DImode
37842 && GET_MODE (XEXP (x, 0)) == SImode)
37843 *total = 1;
37844 else if (TARGET_ZERO_EXTEND_WITH_AND)
37845 *total = cost->add;
37846 else
37847 *total = cost->movzx;
37848 return false;
37849
37850 case SIGN_EXTEND:
37851 *total = cost->movsx;
37852 return false;
37853
37854 case ASHIFT:
37855 if (SCALAR_INT_MODE_P (mode)
37856 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37857 && CONST_INT_P (XEXP (x, 1)))
37858 {
37859 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37860 if (value == 1)
37861 {
37862 *total = cost->add;
37863 return false;
37864 }
37865 if ((value == 2 || value == 3)
37866 && cost->lea <= cost->shift_const)
37867 {
37868 *total = cost->lea;
37869 return false;
37870 }
37871 }
37872 /* FALLTHRU */
37873
37874 case ROTATE:
37875 case ASHIFTRT:
37876 case LSHIFTRT:
37877 case ROTATERT:
37878 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
37879 {
37880 /* ??? Should be SSE vector operation cost. */
37881 /* At least for published AMD latencies, this really is the same
37882 as the latency for a simple fpu operation like fabs. */
37883 /* V*QImode is emulated with 1-11 insns. */
37884 if (mode == V16QImode || mode == V32QImode)
37885 {
37886 int count = 11;
37887 if (TARGET_XOP && mode == V16QImode)
37888 {
37889 /* For XOP we use vpshab, which requires a broadcast of the
37890 value to the variable shift insn. For constants this
37891 means a V16Q const in mem; even when we can perform the
37892 shift with one insn set the cost to prefer paddb. */
37893 if (CONSTANT_P (XEXP (x, 1)))
37894 {
37895 *total = (cost->fabs
37896 + rtx_cost (XEXP (x, 0), code, 0, speed)
37897 + (speed ? 2 : COSTS_N_BYTES (16)));
37898 return true;
37899 }
37900 count = 3;
37901 }
37902 else if (TARGET_SSSE3)
37903 count = 7;
37904 *total = cost->fabs * count;
37905 }
37906 else
37907 *total = cost->fabs;
37908 }
37909 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37910 {
37911 if (CONST_INT_P (XEXP (x, 1)))
37912 {
37913 if (INTVAL (XEXP (x, 1)) > 32)
37914 *total = cost->shift_const + COSTS_N_INSNS (2);
37915 else
37916 *total = cost->shift_const * 2;
37917 }
37918 else
37919 {
37920 if (GET_CODE (XEXP (x, 1)) == AND)
37921 *total = cost->shift_var * 2;
37922 else
37923 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
37924 }
37925 }
37926 else
37927 {
37928 if (CONST_INT_P (XEXP (x, 1)))
37929 *total = cost->shift_const;
37930 else if (GET_CODE (XEXP (x, 1)) == SUBREG
37931 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
37932 {
37933 /* Return the cost after shift-and truncation. */
37934 *total = cost->shift_var;
37935 return true;
37936 }
37937 else
37938 *total = cost->shift_var;
37939 }
37940 return false;
37941
37942 case FMA:
37943 {
37944 rtx sub;
37945
37946 gcc_assert (FLOAT_MODE_P (mode));
37947 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
37948
37949 /* ??? SSE scalar/vector cost should be used here. */
37950 /* ??? Bald assumption that fma has the same cost as fmul. */
37951 *total = cost->fmul;
37952 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
37953
37954 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
37955 sub = XEXP (x, 0);
37956 if (GET_CODE (sub) == NEG)
37957 sub = XEXP (sub, 0);
37958 *total += rtx_cost (sub, FMA, 0, speed);
37959
37960 sub = XEXP (x, 2);
37961 if (GET_CODE (sub) == NEG)
37962 sub = XEXP (sub, 0);
37963 *total += rtx_cost (sub, FMA, 2, speed);
37964 return true;
37965 }
37966
37967 case MULT:
37968 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
37969 {
37970 /* ??? SSE scalar cost should be used here. */
37971 *total = cost->fmul;
37972 return false;
37973 }
37974 else if (X87_FLOAT_MODE_P (mode))
37975 {
37976 *total = cost->fmul;
37977 return false;
37978 }
37979 else if (FLOAT_MODE_P (mode))
37980 {
37981 /* ??? SSE vector cost should be used here. */
37982 *total = cost->fmul;
37983 return false;
37984 }
37985 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
37986 {
37987 /* V*QImode is emulated with 7-13 insns. */
37988 if (mode == V16QImode || mode == V32QImode)
37989 {
37990 int extra = 11;
37991 if (TARGET_XOP && mode == V16QImode)
37992 extra = 5;
37993 else if (TARGET_SSSE3)
37994 extra = 6;
37995 *total = cost->fmul * 2 + cost->fabs * extra;
37996 }
37997 /* V*DImode is emulated with 5-8 insns. */
37998 else if (mode == V2DImode || mode == V4DImode)
37999 {
38000 if (TARGET_XOP && mode == V2DImode)
38001 *total = cost->fmul * 2 + cost->fabs * 3;
38002 else
38003 *total = cost->fmul * 3 + cost->fabs * 5;
38004 }
38005 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38006 insns, including two PMULUDQ. */
38007 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38008 *total = cost->fmul * 2 + cost->fabs * 5;
38009 else
38010 *total = cost->fmul;
38011 return false;
38012 }
38013 else
38014 {
38015 rtx op0 = XEXP (x, 0);
38016 rtx op1 = XEXP (x, 1);
38017 int nbits;
38018 if (CONST_INT_P (XEXP (x, 1)))
38019 {
38020 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38021 for (nbits = 0; value != 0; value &= value - 1)
38022 nbits++;
38023 }
38024 else
38025 /* This is arbitrary. */
38026 nbits = 7;
38027
38028 /* Compute costs correctly for widening multiplication. */
38029 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38030 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38031 == GET_MODE_SIZE (mode))
38032 {
38033 int is_mulwiden = 0;
38034 enum machine_mode inner_mode = GET_MODE (op0);
38035
38036 if (GET_CODE (op0) == GET_CODE (op1))
38037 is_mulwiden = 1, op1 = XEXP (op1, 0);
38038 else if (CONST_INT_P (op1))
38039 {
38040 if (GET_CODE (op0) == SIGN_EXTEND)
38041 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38042 == INTVAL (op1);
38043 else
38044 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38045 }
38046
38047 if (is_mulwiden)
38048 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38049 }
38050
38051 *total = (cost->mult_init[MODE_INDEX (mode)]
38052 + nbits * cost->mult_bit
38053 + rtx_cost (op0, outer_code, opno, speed)
38054 + rtx_cost (op1, outer_code, opno, speed));
38055
38056 return true;
38057 }
38058
38059 case DIV:
38060 case UDIV:
38061 case MOD:
38062 case UMOD:
38063 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38064 /* ??? SSE cost should be used here. */
38065 *total = cost->fdiv;
38066 else if (X87_FLOAT_MODE_P (mode))
38067 *total = cost->fdiv;
38068 else if (FLOAT_MODE_P (mode))
38069 /* ??? SSE vector cost should be used here. */
38070 *total = cost->fdiv;
38071 else
38072 *total = cost->divide[MODE_INDEX (mode)];
38073 return false;
38074
38075 case PLUS:
38076 if (GET_MODE_CLASS (mode) == MODE_INT
38077 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38078 {
38079 if (GET_CODE (XEXP (x, 0)) == PLUS
38080 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38081 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38082 && CONSTANT_P (XEXP (x, 1)))
38083 {
38084 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38085 if (val == 2 || val == 4 || val == 8)
38086 {
38087 *total = cost->lea;
38088 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38089 outer_code, opno, speed);
38090 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38091 outer_code, opno, speed);
38092 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38093 return true;
38094 }
38095 }
38096 else if (GET_CODE (XEXP (x, 0)) == MULT
38097 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38098 {
38099 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38100 if (val == 2 || val == 4 || val == 8)
38101 {
38102 *total = cost->lea;
38103 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38104 outer_code, opno, speed);
38105 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38106 return true;
38107 }
38108 }
38109 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38110 {
38111 *total = cost->lea;
38112 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38113 outer_code, opno, speed);
38114 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38115 outer_code, opno, speed);
38116 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38117 return true;
38118 }
38119 }
38120 /* FALLTHRU */
38121
38122 case MINUS:
38123 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38124 {
38125 /* ??? SSE cost should be used here. */
38126 *total = cost->fadd;
38127 return false;
38128 }
38129 else if (X87_FLOAT_MODE_P (mode))
38130 {
38131 *total = cost->fadd;
38132 return false;
38133 }
38134 else if (FLOAT_MODE_P (mode))
38135 {
38136 /* ??? SSE vector cost should be used here. */
38137 *total = cost->fadd;
38138 return false;
38139 }
38140 /* FALLTHRU */
38141
38142 case AND:
38143 case IOR:
38144 case XOR:
38145 if (GET_MODE_CLASS (mode) == MODE_INT
38146 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38147 {
38148 *total = (cost->add * 2
38149 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38150 << (GET_MODE (XEXP (x, 0)) != DImode))
38151 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38152 << (GET_MODE (XEXP (x, 1)) != DImode)));
38153 return true;
38154 }
38155 /* FALLTHRU */
38156
38157 case NEG:
38158 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38159 {
38160 /* ??? SSE cost should be used here. */
38161 *total = cost->fchs;
38162 return false;
38163 }
38164 else if (X87_FLOAT_MODE_P (mode))
38165 {
38166 *total = cost->fchs;
38167 return false;
38168 }
38169 else if (FLOAT_MODE_P (mode))
38170 {
38171 /* ??? SSE vector cost should be used here. */
38172 *total = cost->fchs;
38173 return false;
38174 }
38175 /* FALLTHRU */
38176
38177 case NOT:
38178 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38179 {
38180 /* ??? Should be SSE vector operation cost. */
38181 /* At least for published AMD latencies, this really is the same
38182 as the latency for a simple fpu operation like fabs. */
38183 *total = cost->fabs;
38184 }
38185 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38186 *total = cost->add * 2;
38187 else
38188 *total = cost->add;
38189 return false;
38190
38191 case COMPARE:
38192 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38193 && XEXP (XEXP (x, 0), 1) == const1_rtx
38194 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38195 && XEXP (x, 1) == const0_rtx)
38196 {
38197 /* This kind of construct is implemented using test[bwl].
38198 Treat it as if we had an AND. */
38199 *total = (cost->add
38200 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38201 + rtx_cost (const1_rtx, outer_code, opno, speed));
38202 return true;
38203 }
38204 return false;
38205
38206 case FLOAT_EXTEND:
38207 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38208 *total = 0;
38209 return false;
38210
38211 case ABS:
38212 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38213 /* ??? SSE cost should be used here. */
38214 *total = cost->fabs;
38215 else if (X87_FLOAT_MODE_P (mode))
38216 *total = cost->fabs;
38217 else if (FLOAT_MODE_P (mode))
38218 /* ??? SSE vector cost should be used here. */
38219 *total = cost->fabs;
38220 return false;
38221
38222 case SQRT:
38223 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38224 /* ??? SSE cost should be used here. */
38225 *total = cost->fsqrt;
38226 else if (X87_FLOAT_MODE_P (mode))
38227 *total = cost->fsqrt;
38228 else if (FLOAT_MODE_P (mode))
38229 /* ??? SSE vector cost should be used here. */
38230 *total = cost->fsqrt;
38231 return false;
38232
38233 case UNSPEC:
38234 if (XINT (x, 1) == UNSPEC_TP)
38235 *total = 0;
38236 return false;
38237
38238 case VEC_SELECT:
38239 case VEC_CONCAT:
38240 case VEC_DUPLICATE:
38241 /* ??? Assume all of these vector manipulation patterns are
38242 recognizable. In which case they all pretty much have the
38243 same cost. */
38244 *total = cost->fabs;
38245 return true;
38246 case VEC_MERGE:
38247 mask = XEXP (x, 2);
38248 /* This is masked instruction, assume the same cost,
38249 as nonmasked variant. */
38250 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38251 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38252 else
38253 *total = cost->fabs;
38254 return true;
38255
38256 default:
38257 return false;
38258 }
38259 }
38260
38261 #if TARGET_MACHO
38262
38263 static int current_machopic_label_num;
38264
38265 /* Given a symbol name and its associated stub, write out the
38266 definition of the stub. */
38267
38268 void
38269 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38270 {
38271 unsigned int length;
38272 char *binder_name, *symbol_name, lazy_ptr_name[32];
38273 int label = ++current_machopic_label_num;
38274
38275 /* For 64-bit we shouldn't get here. */
38276 gcc_assert (!TARGET_64BIT);
38277
38278 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38279 symb = targetm.strip_name_encoding (symb);
38280
38281 length = strlen (stub);
38282 binder_name = XALLOCAVEC (char, length + 32);
38283 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38284
38285 length = strlen (symb);
38286 symbol_name = XALLOCAVEC (char, length + 32);
38287 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38288
38289 sprintf (lazy_ptr_name, "L%d$lz", label);
38290
38291 if (MACHOPIC_ATT_STUB)
38292 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38293 else if (MACHOPIC_PURE)
38294 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38295 else
38296 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38297
38298 fprintf (file, "%s:\n", stub);
38299 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38300
38301 if (MACHOPIC_ATT_STUB)
38302 {
38303 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38304 }
38305 else if (MACHOPIC_PURE)
38306 {
38307 /* PIC stub. */
38308 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38309 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38310 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38311 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38312 label, lazy_ptr_name, label);
38313 fprintf (file, "\tjmp\t*%%ecx\n");
38314 }
38315 else
38316 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38317
38318 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38319 it needs no stub-binding-helper. */
38320 if (MACHOPIC_ATT_STUB)
38321 return;
38322
38323 fprintf (file, "%s:\n", binder_name);
38324
38325 if (MACHOPIC_PURE)
38326 {
38327 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38328 fprintf (file, "\tpushl\t%%ecx\n");
38329 }
38330 else
38331 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38332
38333 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38334
38335 /* N.B. Keep the correspondence of these
38336 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38337 old-pic/new-pic/non-pic stubs; altering this will break
38338 compatibility with existing dylibs. */
38339 if (MACHOPIC_PURE)
38340 {
38341 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38342 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38343 }
38344 else
38345 /* 16-byte -mdynamic-no-pic stub. */
38346 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38347
38348 fprintf (file, "%s:\n", lazy_ptr_name);
38349 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38350 fprintf (file, ASM_LONG "%s\n", binder_name);
38351 }
38352 #endif /* TARGET_MACHO */
38353
38354 /* Order the registers for register allocator. */
38355
38356 void
38357 x86_order_regs_for_local_alloc (void)
38358 {
38359 int pos = 0;
38360 int i;
38361
38362 /* First allocate the local general purpose registers. */
38363 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38364 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38365 reg_alloc_order [pos++] = i;
38366
38367 /* Global general purpose registers. */
38368 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38369 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38370 reg_alloc_order [pos++] = i;
38371
38372 /* x87 registers come first in case we are doing FP math
38373 using them. */
38374 if (!TARGET_SSE_MATH)
38375 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38376 reg_alloc_order [pos++] = i;
38377
38378 /* SSE registers. */
38379 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38380 reg_alloc_order [pos++] = i;
38381 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38382 reg_alloc_order [pos++] = i;
38383
38384 /* Extended REX SSE registers. */
38385 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38386 reg_alloc_order [pos++] = i;
38387
38388 /* Mask register. */
38389 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38390 reg_alloc_order [pos++] = i;
38391
38392 /* x87 registers. */
38393 if (TARGET_SSE_MATH)
38394 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38395 reg_alloc_order [pos++] = i;
38396
38397 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38398 reg_alloc_order [pos++] = i;
38399
38400 /* Initialize the rest of array as we do not allocate some registers
38401 at all. */
38402 while (pos < FIRST_PSEUDO_REGISTER)
38403 reg_alloc_order [pos++] = 0;
38404 }
38405
38406 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38407 in struct attribute_spec handler. */
38408 static tree
38409 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38410 tree args,
38411 int flags ATTRIBUTE_UNUSED,
38412 bool *no_add_attrs)
38413 {
38414 if (TREE_CODE (*node) != FUNCTION_TYPE
38415 && TREE_CODE (*node) != METHOD_TYPE
38416 && TREE_CODE (*node) != FIELD_DECL
38417 && TREE_CODE (*node) != TYPE_DECL)
38418 {
38419 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38420 name);
38421 *no_add_attrs = true;
38422 return NULL_TREE;
38423 }
38424 if (TARGET_64BIT)
38425 {
38426 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38427 name);
38428 *no_add_attrs = true;
38429 return NULL_TREE;
38430 }
38431 if (is_attribute_p ("callee_pop_aggregate_return", name))
38432 {
38433 tree cst;
38434
38435 cst = TREE_VALUE (args);
38436 if (TREE_CODE (cst) != INTEGER_CST)
38437 {
38438 warning (OPT_Wattributes,
38439 "%qE attribute requires an integer constant argument",
38440 name);
38441 *no_add_attrs = true;
38442 }
38443 else if (compare_tree_int (cst, 0) != 0
38444 && compare_tree_int (cst, 1) != 0)
38445 {
38446 warning (OPT_Wattributes,
38447 "argument to %qE attribute is neither zero, nor one",
38448 name);
38449 *no_add_attrs = true;
38450 }
38451
38452 return NULL_TREE;
38453 }
38454
38455 return NULL_TREE;
38456 }
38457
38458 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38459 struct attribute_spec.handler. */
38460 static tree
38461 ix86_handle_abi_attribute (tree *node, tree name,
38462 tree args ATTRIBUTE_UNUSED,
38463 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38464 {
38465 if (TREE_CODE (*node) != FUNCTION_TYPE
38466 && TREE_CODE (*node) != METHOD_TYPE
38467 && TREE_CODE (*node) != FIELD_DECL
38468 && TREE_CODE (*node) != TYPE_DECL)
38469 {
38470 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38471 name);
38472 *no_add_attrs = true;
38473 return NULL_TREE;
38474 }
38475
38476 /* Can combine regparm with all attributes but fastcall. */
38477 if (is_attribute_p ("ms_abi", name))
38478 {
38479 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38480 {
38481 error ("ms_abi and sysv_abi attributes are not compatible");
38482 }
38483
38484 return NULL_TREE;
38485 }
38486 else if (is_attribute_p ("sysv_abi", name))
38487 {
38488 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38489 {
38490 error ("ms_abi and sysv_abi attributes are not compatible");
38491 }
38492
38493 return NULL_TREE;
38494 }
38495
38496 return NULL_TREE;
38497 }
38498
38499 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38500 struct attribute_spec.handler. */
38501 static tree
38502 ix86_handle_struct_attribute (tree *node, tree name,
38503 tree args ATTRIBUTE_UNUSED,
38504 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38505 {
38506 tree *type = NULL;
38507 if (DECL_P (*node))
38508 {
38509 if (TREE_CODE (*node) == TYPE_DECL)
38510 type = &TREE_TYPE (*node);
38511 }
38512 else
38513 type = node;
38514
38515 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38516 {
38517 warning (OPT_Wattributes, "%qE attribute ignored",
38518 name);
38519 *no_add_attrs = true;
38520 }
38521
38522 else if ((is_attribute_p ("ms_struct", name)
38523 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38524 || ((is_attribute_p ("gcc_struct", name)
38525 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38526 {
38527 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38528 name);
38529 *no_add_attrs = true;
38530 }
38531
38532 return NULL_TREE;
38533 }
38534
38535 static tree
38536 ix86_handle_fndecl_attribute (tree *node, tree name,
38537 tree args ATTRIBUTE_UNUSED,
38538 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38539 {
38540 if (TREE_CODE (*node) != FUNCTION_DECL)
38541 {
38542 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38543 name);
38544 *no_add_attrs = true;
38545 }
38546 return NULL_TREE;
38547 }
38548
38549 static bool
38550 ix86_ms_bitfield_layout_p (const_tree record_type)
38551 {
38552 return ((TARGET_MS_BITFIELD_LAYOUT
38553 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38554 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38555 }
38556
38557 /* Returns an expression indicating where the this parameter is
38558 located on entry to the FUNCTION. */
38559
38560 static rtx
38561 x86_this_parameter (tree function)
38562 {
38563 tree type = TREE_TYPE (function);
38564 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38565 int nregs;
38566
38567 if (TARGET_64BIT)
38568 {
38569 const int *parm_regs;
38570
38571 if (ix86_function_type_abi (type) == MS_ABI)
38572 parm_regs = x86_64_ms_abi_int_parameter_registers;
38573 else
38574 parm_regs = x86_64_int_parameter_registers;
38575 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38576 }
38577
38578 nregs = ix86_function_regparm (type, function);
38579
38580 if (nregs > 0 && !stdarg_p (type))
38581 {
38582 int regno;
38583 unsigned int ccvt = ix86_get_callcvt (type);
38584
38585 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38586 regno = aggr ? DX_REG : CX_REG;
38587 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38588 {
38589 regno = CX_REG;
38590 if (aggr)
38591 return gen_rtx_MEM (SImode,
38592 plus_constant (Pmode, stack_pointer_rtx, 4));
38593 }
38594 else
38595 {
38596 regno = AX_REG;
38597 if (aggr)
38598 {
38599 regno = DX_REG;
38600 if (nregs == 1)
38601 return gen_rtx_MEM (SImode,
38602 plus_constant (Pmode,
38603 stack_pointer_rtx, 4));
38604 }
38605 }
38606 return gen_rtx_REG (SImode, regno);
38607 }
38608
38609 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38610 aggr ? 8 : 4));
38611 }
38612
38613 /* Determine whether x86_output_mi_thunk can succeed. */
38614
38615 static bool
38616 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
38617 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
38618 HOST_WIDE_INT vcall_offset, const_tree function)
38619 {
38620 /* 64-bit can handle anything. */
38621 if (TARGET_64BIT)
38622 return true;
38623
38624 /* For 32-bit, everything's fine if we have one free register. */
38625 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38626 return true;
38627
38628 /* Need a free register for vcall_offset. */
38629 if (vcall_offset)
38630 return false;
38631
38632 /* Need a free register for GOT references. */
38633 if (flag_pic && !targetm.binds_local_p (function))
38634 return false;
38635
38636 /* Otherwise ok. */
38637 return true;
38638 }
38639
38640 /* Output the assembler code for a thunk function. THUNK_DECL is the
38641 declaration for the thunk function itself, FUNCTION is the decl for
38642 the target function. DELTA is an immediate constant offset to be
38643 added to THIS. If VCALL_OFFSET is nonzero, the word at
38644 *(*this + vcall_offset) should be added to THIS. */
38645
38646 static void
38647 x86_output_mi_thunk (FILE *file,
38648 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
38649 HOST_WIDE_INT vcall_offset, tree function)
38650 {
38651 rtx this_param = x86_this_parameter (function);
38652 rtx this_reg, tmp, fnaddr;
38653 unsigned int tmp_regno;
38654
38655 if (TARGET_64BIT)
38656 tmp_regno = R10_REG;
38657 else
38658 {
38659 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38660 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38661 tmp_regno = AX_REG;
38662 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38663 tmp_regno = DX_REG;
38664 else
38665 tmp_regno = CX_REG;
38666 }
38667
38668 emit_note (NOTE_INSN_PROLOGUE_END);
38669
38670 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38671 pull it in now and let DELTA benefit. */
38672 if (REG_P (this_param))
38673 this_reg = this_param;
38674 else if (vcall_offset)
38675 {
38676 /* Put the this parameter into %eax. */
38677 this_reg = gen_rtx_REG (Pmode, AX_REG);
38678 emit_move_insn (this_reg, this_param);
38679 }
38680 else
38681 this_reg = NULL_RTX;
38682
38683 /* Adjust the this parameter by a fixed constant. */
38684 if (delta)
38685 {
38686 rtx delta_rtx = GEN_INT (delta);
38687 rtx delta_dst = this_reg ? this_reg : this_param;
38688
38689 if (TARGET_64BIT)
38690 {
38691 if (!x86_64_general_operand (delta_rtx, Pmode))
38692 {
38693 tmp = gen_rtx_REG (Pmode, tmp_regno);
38694 emit_move_insn (tmp, delta_rtx);
38695 delta_rtx = tmp;
38696 }
38697 }
38698
38699 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38700 }
38701
38702 /* Adjust the this parameter by a value stored in the vtable. */
38703 if (vcall_offset)
38704 {
38705 rtx vcall_addr, vcall_mem, this_mem;
38706
38707 tmp = gen_rtx_REG (Pmode, tmp_regno);
38708
38709 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38710 if (Pmode != ptr_mode)
38711 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38712 emit_move_insn (tmp, this_mem);
38713
38714 /* Adjust the this parameter. */
38715 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38716 if (TARGET_64BIT
38717 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38718 {
38719 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38720 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38721 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38722 }
38723
38724 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38725 if (Pmode != ptr_mode)
38726 emit_insn (gen_addsi_1_zext (this_reg,
38727 gen_rtx_REG (ptr_mode,
38728 REGNO (this_reg)),
38729 vcall_mem));
38730 else
38731 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38732 }
38733
38734 /* If necessary, drop THIS back to its stack slot. */
38735 if (this_reg && this_reg != this_param)
38736 emit_move_insn (this_param, this_reg);
38737
38738 fnaddr = XEXP (DECL_RTL (function), 0);
38739 if (TARGET_64BIT)
38740 {
38741 if (!flag_pic || targetm.binds_local_p (function)
38742 || TARGET_PECOFF)
38743 ;
38744 else
38745 {
38746 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38747 tmp = gen_rtx_CONST (Pmode, tmp);
38748 fnaddr = gen_const_mem (Pmode, tmp);
38749 }
38750 }
38751 else
38752 {
38753 if (!flag_pic || targetm.binds_local_p (function))
38754 ;
38755 #if TARGET_MACHO
38756 else if (TARGET_MACHO)
38757 {
38758 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38759 fnaddr = XEXP (fnaddr, 0);
38760 }
38761 #endif /* TARGET_MACHO */
38762 else
38763 {
38764 tmp = gen_rtx_REG (Pmode, CX_REG);
38765 output_set_got (tmp, NULL_RTX);
38766
38767 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38768 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38769 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38770 fnaddr = gen_const_mem (Pmode, fnaddr);
38771 }
38772 }
38773
38774 /* Our sibling call patterns do not allow memories, because we have no
38775 predicate that can distinguish between frame and non-frame memory.
38776 For our purposes here, we can get away with (ab)using a jump pattern,
38777 because we're going to do no optimization. */
38778 if (MEM_P (fnaddr))
38779 emit_jump_insn (gen_indirect_jump (fnaddr));
38780 else
38781 {
38782 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38783 fnaddr = legitimize_pic_address (fnaddr,
38784 gen_rtx_REG (Pmode, tmp_regno));
38785
38786 if (!sibcall_insn_operand (fnaddr, word_mode))
38787 {
38788 tmp = gen_rtx_REG (word_mode, tmp_regno);
38789 if (GET_MODE (fnaddr) != word_mode)
38790 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38791 emit_move_insn (tmp, fnaddr);
38792 fnaddr = tmp;
38793 }
38794
38795 tmp = gen_rtx_MEM (QImode, fnaddr);
38796 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38797 tmp = emit_call_insn (tmp);
38798 SIBLING_CALL_P (tmp) = 1;
38799 }
38800 emit_barrier ();
38801
38802 /* Emit just enough of rest_of_compilation to get the insns emitted.
38803 Note that use_thunk calls assemble_start_function et al. */
38804 tmp = get_insns ();
38805 shorten_branches (tmp);
38806 final_start_function (tmp, file, 1);
38807 final (tmp, file, 1);
38808 final_end_function ();
38809 }
38810
38811 static void
38812 x86_file_start (void)
38813 {
38814 default_file_start ();
38815 if (TARGET_16BIT)
38816 fputs ("\t.code16gcc\n", asm_out_file);
38817 #if TARGET_MACHO
38818 darwin_file_start ();
38819 #endif
38820 if (X86_FILE_START_VERSION_DIRECTIVE)
38821 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38822 if (X86_FILE_START_FLTUSED)
38823 fputs ("\t.global\t__fltused\n", asm_out_file);
38824 if (ix86_asm_dialect == ASM_INTEL)
38825 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38826 }
38827
38828 int
38829 x86_field_alignment (tree field, int computed)
38830 {
38831 enum machine_mode mode;
38832 tree type = TREE_TYPE (field);
38833
38834 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38835 return computed;
38836 mode = TYPE_MODE (strip_array_types (type));
38837 if (mode == DFmode || mode == DCmode
38838 || GET_MODE_CLASS (mode) == MODE_INT
38839 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38840 return MIN (32, computed);
38841 return computed;
38842 }
38843
38844 /* Output assembler code to FILE to increment profiler label # LABELNO
38845 for profiling a function entry. */
38846 void
38847 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38848 {
38849 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38850 : MCOUNT_NAME);
38851
38852 if (TARGET_64BIT)
38853 {
38854 #ifndef NO_PROFILE_COUNTERS
38855 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38856 #endif
38857
38858 if (!TARGET_PECOFF && flag_pic)
38859 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38860 else
38861 fprintf (file, "\tcall\t%s\n", mcount_name);
38862 }
38863 else if (flag_pic)
38864 {
38865 #ifndef NO_PROFILE_COUNTERS
38866 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
38867 LPREFIX, labelno);
38868 #endif
38869 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
38870 }
38871 else
38872 {
38873 #ifndef NO_PROFILE_COUNTERS
38874 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
38875 LPREFIX, labelno);
38876 #endif
38877 fprintf (file, "\tcall\t%s\n", mcount_name);
38878 }
38879 }
38880
38881 /* We don't have exact information about the insn sizes, but we may assume
38882 quite safely that we are informed about all 1 byte insns and memory
38883 address sizes. This is enough to eliminate unnecessary padding in
38884 99% of cases. */
38885
38886 static int
38887 min_insn_size (rtx insn)
38888 {
38889 int l = 0, len;
38890
38891 if (!INSN_P (insn) || !active_insn_p (insn))
38892 return 0;
38893
38894 /* Discard alignments we've emit and jump instructions. */
38895 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
38896 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
38897 return 0;
38898
38899 /* Important case - calls are always 5 bytes.
38900 It is common to have many calls in the row. */
38901 if (CALL_P (insn)
38902 && symbolic_reference_mentioned_p (PATTERN (insn))
38903 && !SIBLING_CALL_P (insn))
38904 return 5;
38905 len = get_attr_length (insn);
38906 if (len <= 1)
38907 return 1;
38908
38909 /* For normal instructions we rely on get_attr_length being exact,
38910 with a few exceptions. */
38911 if (!JUMP_P (insn))
38912 {
38913 enum attr_type type = get_attr_type (insn);
38914
38915 switch (type)
38916 {
38917 case TYPE_MULTI:
38918 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
38919 || asm_noperands (PATTERN (insn)) >= 0)
38920 return 0;
38921 break;
38922 case TYPE_OTHER:
38923 case TYPE_FCMP:
38924 break;
38925 default:
38926 /* Otherwise trust get_attr_length. */
38927 return len;
38928 }
38929
38930 l = get_attr_length_address (insn);
38931 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
38932 l = 4;
38933 }
38934 if (l)
38935 return 1+l;
38936 else
38937 return 2;
38938 }
38939
38940 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
38941
38942 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
38943 window. */
38944
38945 static void
38946 ix86_avoid_jump_mispredicts (void)
38947 {
38948 rtx insn, start = get_insns ();
38949 int nbytes = 0, njumps = 0;
38950 int isjump = 0;
38951
38952 /* Look for all minimal intervals of instructions containing 4 jumps.
38953 The intervals are bounded by START and INSN. NBYTES is the total
38954 size of instructions in the interval including INSN and not including
38955 START. When the NBYTES is smaller than 16 bytes, it is possible
38956 that the end of START and INSN ends up in the same 16byte page.
38957
38958 The smallest offset in the page INSN can start is the case where START
38959 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
38960 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
38961
38962 Don't consider asm goto as jump, while it can contain a jump, it doesn't
38963 have to, control transfer to label(s) can be performed through other
38964 means, and also we estimate minimum length of all asm stmts as 0. */
38965 for (insn = start; insn; insn = NEXT_INSN (insn))
38966 {
38967 int min_size;
38968
38969 if (LABEL_P (insn))
38970 {
38971 int align = label_to_alignment (insn);
38972 int max_skip = label_to_max_skip (insn);
38973
38974 if (max_skip > 15)
38975 max_skip = 15;
38976 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
38977 already in the current 16 byte page, because otherwise
38978 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
38979 bytes to reach 16 byte boundary. */
38980 if (align <= 0
38981 || (align <= 3 && max_skip != (1 << align) - 1))
38982 max_skip = 0;
38983 if (dump_file)
38984 fprintf (dump_file, "Label %i with max_skip %i\n",
38985 INSN_UID (insn), max_skip);
38986 if (max_skip)
38987 {
38988 while (nbytes + max_skip >= 16)
38989 {
38990 start = NEXT_INSN (start);
38991 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
38992 || CALL_P (start))
38993 njumps--, isjump = 1;
38994 else
38995 isjump = 0;
38996 nbytes -= min_insn_size (start);
38997 }
38998 }
38999 continue;
39000 }
39001
39002 min_size = min_insn_size (insn);
39003 nbytes += min_size;
39004 if (dump_file)
39005 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39006 INSN_UID (insn), min_size);
39007 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39008 || CALL_P (insn))
39009 njumps++;
39010 else
39011 continue;
39012
39013 while (njumps > 3)
39014 {
39015 start = NEXT_INSN (start);
39016 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39017 || CALL_P (start))
39018 njumps--, isjump = 1;
39019 else
39020 isjump = 0;
39021 nbytes -= min_insn_size (start);
39022 }
39023 gcc_assert (njumps >= 0);
39024 if (dump_file)
39025 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39026 INSN_UID (start), INSN_UID (insn), nbytes);
39027
39028 if (njumps == 3 && isjump && nbytes < 16)
39029 {
39030 int padsize = 15 - nbytes + min_insn_size (insn);
39031
39032 if (dump_file)
39033 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39034 INSN_UID (insn), padsize);
39035 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39036 }
39037 }
39038 }
39039 #endif
39040
39041 /* AMD Athlon works faster
39042 when RET is not destination of conditional jump or directly preceded
39043 by other jump instruction. We avoid the penalty by inserting NOP just
39044 before the RET instructions in such cases. */
39045 static void
39046 ix86_pad_returns (void)
39047 {
39048 edge e;
39049 edge_iterator ei;
39050
39051 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39052 {
39053 basic_block bb = e->src;
39054 rtx ret = BB_END (bb);
39055 rtx prev;
39056 bool replace = false;
39057
39058 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39059 || optimize_bb_for_size_p (bb))
39060 continue;
39061 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39062 if (active_insn_p (prev) || LABEL_P (prev))
39063 break;
39064 if (prev && LABEL_P (prev))
39065 {
39066 edge e;
39067 edge_iterator ei;
39068
39069 FOR_EACH_EDGE (e, ei, bb->preds)
39070 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39071 && !(e->flags & EDGE_FALLTHRU))
39072 {
39073 replace = true;
39074 break;
39075 }
39076 }
39077 if (!replace)
39078 {
39079 prev = prev_active_insn (ret);
39080 if (prev
39081 && ((JUMP_P (prev) && any_condjump_p (prev))
39082 || CALL_P (prev)))
39083 replace = true;
39084 /* Empty functions get branch mispredict even when
39085 the jump destination is not visible to us. */
39086 if (!prev && !optimize_function_for_size_p (cfun))
39087 replace = true;
39088 }
39089 if (replace)
39090 {
39091 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39092 delete_insn (ret);
39093 }
39094 }
39095 }
39096
39097 /* Count the minimum number of instructions in BB. Return 4 if the
39098 number of instructions >= 4. */
39099
39100 static int
39101 ix86_count_insn_bb (basic_block bb)
39102 {
39103 rtx insn;
39104 int insn_count = 0;
39105
39106 /* Count number of instructions in this block. Return 4 if the number
39107 of instructions >= 4. */
39108 FOR_BB_INSNS (bb, insn)
39109 {
39110 /* Only happen in exit blocks. */
39111 if (JUMP_P (insn)
39112 && ANY_RETURN_P (PATTERN (insn)))
39113 break;
39114
39115 if (NONDEBUG_INSN_P (insn)
39116 && GET_CODE (PATTERN (insn)) != USE
39117 && GET_CODE (PATTERN (insn)) != CLOBBER)
39118 {
39119 insn_count++;
39120 if (insn_count >= 4)
39121 return insn_count;
39122 }
39123 }
39124
39125 return insn_count;
39126 }
39127
39128
39129 /* Count the minimum number of instructions in code path in BB.
39130 Return 4 if the number of instructions >= 4. */
39131
39132 static int
39133 ix86_count_insn (basic_block bb)
39134 {
39135 edge e;
39136 edge_iterator ei;
39137 int min_prev_count;
39138
39139 /* Only bother counting instructions along paths with no
39140 more than 2 basic blocks between entry and exit. Given
39141 that BB has an edge to exit, determine if a predecessor
39142 of BB has an edge from entry. If so, compute the number
39143 of instructions in the predecessor block. If there
39144 happen to be multiple such blocks, compute the minimum. */
39145 min_prev_count = 4;
39146 FOR_EACH_EDGE (e, ei, bb->preds)
39147 {
39148 edge prev_e;
39149 edge_iterator prev_ei;
39150
39151 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39152 {
39153 min_prev_count = 0;
39154 break;
39155 }
39156 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39157 {
39158 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39159 {
39160 int count = ix86_count_insn_bb (e->src);
39161 if (count < min_prev_count)
39162 min_prev_count = count;
39163 break;
39164 }
39165 }
39166 }
39167
39168 if (min_prev_count < 4)
39169 min_prev_count += ix86_count_insn_bb (bb);
39170
39171 return min_prev_count;
39172 }
39173
39174 /* Pad short function to 4 instructions. */
39175
39176 static void
39177 ix86_pad_short_function (void)
39178 {
39179 edge e;
39180 edge_iterator ei;
39181
39182 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39183 {
39184 rtx ret = BB_END (e->src);
39185 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39186 {
39187 int insn_count = ix86_count_insn (e->src);
39188
39189 /* Pad short function. */
39190 if (insn_count < 4)
39191 {
39192 rtx insn = ret;
39193
39194 /* Find epilogue. */
39195 while (insn
39196 && (!NOTE_P (insn)
39197 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39198 insn = PREV_INSN (insn);
39199
39200 if (!insn)
39201 insn = ret;
39202
39203 /* Two NOPs count as one instruction. */
39204 insn_count = 2 * (4 - insn_count);
39205 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39206 }
39207 }
39208 }
39209 }
39210
39211 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39212 the epilogue, the Windows system unwinder will apply epilogue logic and
39213 produce incorrect offsets. This can be avoided by adding a nop between
39214 the last insn that can throw and the first insn of the epilogue. */
39215
39216 static void
39217 ix86_seh_fixup_eh_fallthru (void)
39218 {
39219 edge e;
39220 edge_iterator ei;
39221
39222 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39223 {
39224 rtx insn, next;
39225
39226 /* Find the beginning of the epilogue. */
39227 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39228 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39229 break;
39230 if (insn == NULL)
39231 continue;
39232
39233 /* We only care about preceding insns that can throw. */
39234 insn = prev_active_insn (insn);
39235 if (insn == NULL || !can_throw_internal (insn))
39236 continue;
39237
39238 /* Do not separate calls from their debug information. */
39239 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39240 if (NOTE_P (next)
39241 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39242 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39243 insn = next;
39244 else
39245 break;
39246
39247 emit_insn_after (gen_nops (const1_rtx), insn);
39248 }
39249 }
39250
39251 /* Implement machine specific optimizations. We implement padding of returns
39252 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39253 static void
39254 ix86_reorg (void)
39255 {
39256 /* We are freeing block_for_insn in the toplev to keep compatibility
39257 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39258 compute_bb_for_insn ();
39259
39260 if (TARGET_SEH && current_function_has_exception_handlers ())
39261 ix86_seh_fixup_eh_fallthru ();
39262
39263 if (optimize && optimize_function_for_speed_p (cfun))
39264 {
39265 if (TARGET_PAD_SHORT_FUNCTION)
39266 ix86_pad_short_function ();
39267 else if (TARGET_PAD_RETURNS)
39268 ix86_pad_returns ();
39269 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39270 if (TARGET_FOUR_JUMP_LIMIT)
39271 ix86_avoid_jump_mispredicts ();
39272 #endif
39273 }
39274 }
39275
39276 /* Return nonzero when QImode register that must be represented via REX prefix
39277 is used. */
39278 bool
39279 x86_extended_QIreg_mentioned_p (rtx insn)
39280 {
39281 int i;
39282 extract_insn_cached (insn);
39283 for (i = 0; i < recog_data.n_operands; i++)
39284 if (GENERAL_REG_P (recog_data.operand[i])
39285 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39286 return true;
39287 return false;
39288 }
39289
39290 /* Return nonzero when P points to register encoded via REX prefix.
39291 Called via for_each_rtx. */
39292 static int
39293 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
39294 {
39295 unsigned int regno;
39296 if (!REG_P (*p))
39297 return 0;
39298 regno = REGNO (*p);
39299 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39300 }
39301
39302 /* Return true when INSN mentions register that must be encoded using REX
39303 prefix. */
39304 bool
39305 x86_extended_reg_mentioned_p (rtx insn)
39306 {
39307 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39308 extended_reg_mentioned_1, NULL);
39309 }
39310
39311 /* If profitable, negate (without causing overflow) integer constant
39312 of mode MODE at location LOC. Return true in this case. */
39313 bool
39314 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39315 {
39316 HOST_WIDE_INT val;
39317
39318 if (!CONST_INT_P (*loc))
39319 return false;
39320
39321 switch (mode)
39322 {
39323 case DImode:
39324 /* DImode x86_64 constants must fit in 32 bits. */
39325 gcc_assert (x86_64_immediate_operand (*loc, mode));
39326
39327 mode = SImode;
39328 break;
39329
39330 case SImode:
39331 case HImode:
39332 case QImode:
39333 break;
39334
39335 default:
39336 gcc_unreachable ();
39337 }
39338
39339 /* Avoid overflows. */
39340 if (mode_signbit_p (mode, *loc))
39341 return false;
39342
39343 val = INTVAL (*loc);
39344
39345 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39346 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39347 if ((val < 0 && val != -128)
39348 || val == 128)
39349 {
39350 *loc = GEN_INT (-val);
39351 return true;
39352 }
39353
39354 return false;
39355 }
39356
39357 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39358 optabs would emit if we didn't have TFmode patterns. */
39359
39360 void
39361 x86_emit_floatuns (rtx operands[2])
39362 {
39363 rtx neglab, donelab, i0, i1, f0, in, out;
39364 enum machine_mode mode, inmode;
39365
39366 inmode = GET_MODE (operands[1]);
39367 gcc_assert (inmode == SImode || inmode == DImode);
39368
39369 out = operands[0];
39370 in = force_reg (inmode, operands[1]);
39371 mode = GET_MODE (out);
39372 neglab = gen_label_rtx ();
39373 donelab = gen_label_rtx ();
39374 f0 = gen_reg_rtx (mode);
39375
39376 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39377
39378 expand_float (out, in, 0);
39379
39380 emit_jump_insn (gen_jump (donelab));
39381 emit_barrier ();
39382
39383 emit_label (neglab);
39384
39385 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39386 1, OPTAB_DIRECT);
39387 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39388 1, OPTAB_DIRECT);
39389 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39390
39391 expand_float (f0, i0, 0);
39392
39393 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39394
39395 emit_label (donelab);
39396 }
39397 \f
39398 /* AVX512F does support 64-byte integer vector operations,
39399 thus the longest vector we are faced with is V64QImode. */
39400 #define MAX_VECT_LEN 64
39401
39402 struct expand_vec_perm_d
39403 {
39404 rtx target, op0, op1;
39405 unsigned char perm[MAX_VECT_LEN];
39406 enum machine_mode vmode;
39407 unsigned char nelt;
39408 bool one_operand_p;
39409 bool testing_p;
39410 };
39411
39412 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39413 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39414 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39415
39416 /* Get a vector mode of the same size as the original but with elements
39417 twice as wide. This is only guaranteed to apply to integral vectors. */
39418
39419 static inline enum machine_mode
39420 get_mode_wider_vector (enum machine_mode o)
39421 {
39422 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39423 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39424 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39425 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39426 return n;
39427 }
39428
39429 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39430 fill target with val via vec_duplicate. */
39431
39432 static bool
39433 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39434 {
39435 bool ok;
39436 rtx insn, dup;
39437
39438 /* First attempt to recognize VAL as-is. */
39439 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39440 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39441 if (recog_memoized (insn) < 0)
39442 {
39443 rtx seq;
39444 /* If that fails, force VAL into a register. */
39445
39446 start_sequence ();
39447 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39448 seq = get_insns ();
39449 end_sequence ();
39450 if (seq)
39451 emit_insn_before (seq, insn);
39452
39453 ok = recog_memoized (insn) >= 0;
39454 gcc_assert (ok);
39455 }
39456 return true;
39457 }
39458
39459 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39460 with all elements equal to VAR. Return true if successful. */
39461
39462 static bool
39463 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39464 rtx target, rtx val)
39465 {
39466 bool ok;
39467
39468 switch (mode)
39469 {
39470 case V2SImode:
39471 case V2SFmode:
39472 if (!mmx_ok)
39473 return false;
39474 /* FALLTHRU */
39475
39476 case V4DFmode:
39477 case V4DImode:
39478 case V8SFmode:
39479 case V8SImode:
39480 case V2DFmode:
39481 case V2DImode:
39482 case V4SFmode:
39483 case V4SImode:
39484 case V16SImode:
39485 case V8DImode:
39486 case V16SFmode:
39487 case V8DFmode:
39488 return ix86_vector_duplicate_value (mode, target, val);
39489
39490 case V4HImode:
39491 if (!mmx_ok)
39492 return false;
39493 if (TARGET_SSE || TARGET_3DNOW_A)
39494 {
39495 rtx x;
39496
39497 val = gen_lowpart (SImode, val);
39498 x = gen_rtx_TRUNCATE (HImode, val);
39499 x = gen_rtx_VEC_DUPLICATE (mode, x);
39500 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39501 return true;
39502 }
39503 goto widen;
39504
39505 case V8QImode:
39506 if (!mmx_ok)
39507 return false;
39508 goto widen;
39509
39510 case V8HImode:
39511 if (TARGET_SSE2)
39512 {
39513 struct expand_vec_perm_d dperm;
39514 rtx tmp1, tmp2;
39515
39516 permute:
39517 memset (&dperm, 0, sizeof (dperm));
39518 dperm.target = target;
39519 dperm.vmode = mode;
39520 dperm.nelt = GET_MODE_NUNITS (mode);
39521 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39522 dperm.one_operand_p = true;
39523
39524 /* Extend to SImode using a paradoxical SUBREG. */
39525 tmp1 = gen_reg_rtx (SImode);
39526 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39527
39528 /* Insert the SImode value as low element of a V4SImode vector. */
39529 tmp2 = gen_reg_rtx (V4SImode);
39530 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39531 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39532
39533 ok = (expand_vec_perm_1 (&dperm)
39534 || expand_vec_perm_broadcast_1 (&dperm));
39535 gcc_assert (ok);
39536 return ok;
39537 }
39538 goto widen;
39539
39540 case V16QImode:
39541 if (TARGET_SSE2)
39542 goto permute;
39543 goto widen;
39544
39545 widen:
39546 /* Replicate the value once into the next wider mode and recurse. */
39547 {
39548 enum machine_mode smode, wsmode, wvmode;
39549 rtx x;
39550
39551 smode = GET_MODE_INNER (mode);
39552 wvmode = get_mode_wider_vector (mode);
39553 wsmode = GET_MODE_INNER (wvmode);
39554
39555 val = convert_modes (wsmode, smode, val, true);
39556 x = expand_simple_binop (wsmode, ASHIFT, val,
39557 GEN_INT (GET_MODE_BITSIZE (smode)),
39558 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39559 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39560
39561 x = gen_reg_rtx (wvmode);
39562 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39563 gcc_assert (ok);
39564 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39565 return ok;
39566 }
39567
39568 case V16HImode:
39569 case V32QImode:
39570 {
39571 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39572 rtx x = gen_reg_rtx (hvmode);
39573
39574 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39575 gcc_assert (ok);
39576
39577 x = gen_rtx_VEC_CONCAT (mode, x, x);
39578 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39579 }
39580 return true;
39581
39582 default:
39583 return false;
39584 }
39585 }
39586
39587 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39588 whose ONE_VAR element is VAR, and other elements are zero. Return true
39589 if successful. */
39590
39591 static bool
39592 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39593 rtx target, rtx var, int one_var)
39594 {
39595 enum machine_mode vsimode;
39596 rtx new_target;
39597 rtx x, tmp;
39598 bool use_vector_set = false;
39599
39600 switch (mode)
39601 {
39602 case V2DImode:
39603 /* For SSE4.1, we normally use vector set. But if the second
39604 element is zero and inter-unit moves are OK, we use movq
39605 instead. */
39606 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39607 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39608 && one_var == 0));
39609 break;
39610 case V16QImode:
39611 case V4SImode:
39612 case V4SFmode:
39613 use_vector_set = TARGET_SSE4_1;
39614 break;
39615 case V8HImode:
39616 use_vector_set = TARGET_SSE2;
39617 break;
39618 case V4HImode:
39619 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39620 break;
39621 case V32QImode:
39622 case V16HImode:
39623 case V8SImode:
39624 case V8SFmode:
39625 case V4DFmode:
39626 use_vector_set = TARGET_AVX;
39627 break;
39628 case V4DImode:
39629 /* Use ix86_expand_vector_set in 64bit mode only. */
39630 use_vector_set = TARGET_AVX && TARGET_64BIT;
39631 break;
39632 default:
39633 break;
39634 }
39635
39636 if (use_vector_set)
39637 {
39638 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39639 var = force_reg (GET_MODE_INNER (mode), var);
39640 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39641 return true;
39642 }
39643
39644 switch (mode)
39645 {
39646 case V2SFmode:
39647 case V2SImode:
39648 if (!mmx_ok)
39649 return false;
39650 /* FALLTHRU */
39651
39652 case V2DFmode:
39653 case V2DImode:
39654 if (one_var != 0)
39655 return false;
39656 var = force_reg (GET_MODE_INNER (mode), var);
39657 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39658 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39659 return true;
39660
39661 case V4SFmode:
39662 case V4SImode:
39663 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39664 new_target = gen_reg_rtx (mode);
39665 else
39666 new_target = target;
39667 var = force_reg (GET_MODE_INNER (mode), var);
39668 x = gen_rtx_VEC_DUPLICATE (mode, var);
39669 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39670 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39671 if (one_var != 0)
39672 {
39673 /* We need to shuffle the value to the correct position, so
39674 create a new pseudo to store the intermediate result. */
39675
39676 /* With SSE2, we can use the integer shuffle insns. */
39677 if (mode != V4SFmode && TARGET_SSE2)
39678 {
39679 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39680 const1_rtx,
39681 GEN_INT (one_var == 1 ? 0 : 1),
39682 GEN_INT (one_var == 2 ? 0 : 1),
39683 GEN_INT (one_var == 3 ? 0 : 1)));
39684 if (target != new_target)
39685 emit_move_insn (target, new_target);
39686 return true;
39687 }
39688
39689 /* Otherwise convert the intermediate result to V4SFmode and
39690 use the SSE1 shuffle instructions. */
39691 if (mode != V4SFmode)
39692 {
39693 tmp = gen_reg_rtx (V4SFmode);
39694 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39695 }
39696 else
39697 tmp = new_target;
39698
39699 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39700 const1_rtx,
39701 GEN_INT (one_var == 1 ? 0 : 1),
39702 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39703 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39704
39705 if (mode != V4SFmode)
39706 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39707 else if (tmp != target)
39708 emit_move_insn (target, tmp);
39709 }
39710 else if (target != new_target)
39711 emit_move_insn (target, new_target);
39712 return true;
39713
39714 case V8HImode:
39715 case V16QImode:
39716 vsimode = V4SImode;
39717 goto widen;
39718 case V4HImode:
39719 case V8QImode:
39720 if (!mmx_ok)
39721 return false;
39722 vsimode = V2SImode;
39723 goto widen;
39724 widen:
39725 if (one_var != 0)
39726 return false;
39727
39728 /* Zero extend the variable element to SImode and recurse. */
39729 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39730
39731 x = gen_reg_rtx (vsimode);
39732 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39733 var, one_var))
39734 gcc_unreachable ();
39735
39736 emit_move_insn (target, gen_lowpart (mode, x));
39737 return true;
39738
39739 default:
39740 return false;
39741 }
39742 }
39743
39744 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39745 consisting of the values in VALS. It is known that all elements
39746 except ONE_VAR are constants. Return true if successful. */
39747
39748 static bool
39749 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39750 rtx target, rtx vals, int one_var)
39751 {
39752 rtx var = XVECEXP (vals, 0, one_var);
39753 enum machine_mode wmode;
39754 rtx const_vec, x;
39755
39756 const_vec = copy_rtx (vals);
39757 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39758 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39759
39760 switch (mode)
39761 {
39762 case V2DFmode:
39763 case V2DImode:
39764 case V2SFmode:
39765 case V2SImode:
39766 /* For the two element vectors, it's just as easy to use
39767 the general case. */
39768 return false;
39769
39770 case V4DImode:
39771 /* Use ix86_expand_vector_set in 64bit mode only. */
39772 if (!TARGET_64BIT)
39773 return false;
39774 case V4DFmode:
39775 case V8SFmode:
39776 case V8SImode:
39777 case V16HImode:
39778 case V32QImode:
39779 case V4SFmode:
39780 case V4SImode:
39781 case V8HImode:
39782 case V4HImode:
39783 break;
39784
39785 case V16QImode:
39786 if (TARGET_SSE4_1)
39787 break;
39788 wmode = V8HImode;
39789 goto widen;
39790 case V8QImode:
39791 wmode = V4HImode;
39792 goto widen;
39793 widen:
39794 /* There's no way to set one QImode entry easily. Combine
39795 the variable value with its adjacent constant value, and
39796 promote to an HImode set. */
39797 x = XVECEXP (vals, 0, one_var ^ 1);
39798 if (one_var & 1)
39799 {
39800 var = convert_modes (HImode, QImode, var, true);
39801 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39802 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39803 x = GEN_INT (INTVAL (x) & 0xff);
39804 }
39805 else
39806 {
39807 var = convert_modes (HImode, QImode, var, true);
39808 x = gen_int_mode (INTVAL (x) << 8, HImode);
39809 }
39810 if (x != const0_rtx)
39811 var = expand_simple_binop (HImode, IOR, var, x, var,
39812 1, OPTAB_LIB_WIDEN);
39813
39814 x = gen_reg_rtx (wmode);
39815 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39816 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39817
39818 emit_move_insn (target, gen_lowpart (mode, x));
39819 return true;
39820
39821 default:
39822 return false;
39823 }
39824
39825 emit_move_insn (target, const_vec);
39826 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39827 return true;
39828 }
39829
39830 /* A subroutine of ix86_expand_vector_init_general. Use vector
39831 concatenate to handle the most general case: all values variable,
39832 and none identical. */
39833
39834 static void
39835 ix86_expand_vector_init_concat (enum machine_mode mode,
39836 rtx target, rtx *ops, int n)
39837 {
39838 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39839 rtx first[16], second[8], third[4];
39840 rtvec v;
39841 int i, j;
39842
39843 switch (n)
39844 {
39845 case 2:
39846 switch (mode)
39847 {
39848 case V16SImode:
39849 cmode = V8SImode;
39850 break;
39851 case V16SFmode:
39852 cmode = V8SFmode;
39853 break;
39854 case V8DImode:
39855 cmode = V4DImode;
39856 break;
39857 case V8DFmode:
39858 cmode = V4DFmode;
39859 break;
39860 case V8SImode:
39861 cmode = V4SImode;
39862 break;
39863 case V8SFmode:
39864 cmode = V4SFmode;
39865 break;
39866 case V4DImode:
39867 cmode = V2DImode;
39868 break;
39869 case V4DFmode:
39870 cmode = V2DFmode;
39871 break;
39872 case V4SImode:
39873 cmode = V2SImode;
39874 break;
39875 case V4SFmode:
39876 cmode = V2SFmode;
39877 break;
39878 case V2DImode:
39879 cmode = DImode;
39880 break;
39881 case V2SImode:
39882 cmode = SImode;
39883 break;
39884 case V2DFmode:
39885 cmode = DFmode;
39886 break;
39887 case V2SFmode:
39888 cmode = SFmode;
39889 break;
39890 default:
39891 gcc_unreachable ();
39892 }
39893
39894 if (!register_operand (ops[1], cmode))
39895 ops[1] = force_reg (cmode, ops[1]);
39896 if (!register_operand (ops[0], cmode))
39897 ops[0] = force_reg (cmode, ops[0]);
39898 emit_insn (gen_rtx_SET (VOIDmode, target,
39899 gen_rtx_VEC_CONCAT (mode, ops[0],
39900 ops[1])));
39901 break;
39902
39903 case 4:
39904 switch (mode)
39905 {
39906 case V4DImode:
39907 cmode = V2DImode;
39908 break;
39909 case V4DFmode:
39910 cmode = V2DFmode;
39911 break;
39912 case V4SImode:
39913 cmode = V2SImode;
39914 break;
39915 case V4SFmode:
39916 cmode = V2SFmode;
39917 break;
39918 default:
39919 gcc_unreachable ();
39920 }
39921 goto half;
39922
39923 case 8:
39924 switch (mode)
39925 {
39926 case V8DImode:
39927 cmode = V2DImode;
39928 hmode = V4DImode;
39929 break;
39930 case V8DFmode:
39931 cmode = V2DFmode;
39932 hmode = V4DFmode;
39933 break;
39934 case V8SImode:
39935 cmode = V2SImode;
39936 hmode = V4SImode;
39937 break;
39938 case V8SFmode:
39939 cmode = V2SFmode;
39940 hmode = V4SFmode;
39941 break;
39942 default:
39943 gcc_unreachable ();
39944 }
39945 goto half;
39946
39947 case 16:
39948 switch (mode)
39949 {
39950 case V16SImode:
39951 cmode = V2SImode;
39952 hmode = V4SImode;
39953 gmode = V8SImode;
39954 break;
39955 case V16SFmode:
39956 cmode = V2SFmode;
39957 hmode = V4SFmode;
39958 gmode = V8SFmode;
39959 break;
39960 default:
39961 gcc_unreachable ();
39962 }
39963 goto half;
39964
39965 half:
39966 /* FIXME: We process inputs backward to help RA. PR 36222. */
39967 i = n - 1;
39968 j = (n >> 1) - 1;
39969 for (; i > 0; i -= 2, j--)
39970 {
39971 first[j] = gen_reg_rtx (cmode);
39972 v = gen_rtvec (2, ops[i - 1], ops[i]);
39973 ix86_expand_vector_init (false, first[j],
39974 gen_rtx_PARALLEL (cmode, v));
39975 }
39976
39977 n >>= 1;
39978 if (n > 4)
39979 {
39980 gcc_assert (hmode != VOIDmode);
39981 gcc_assert (gmode != VOIDmode);
39982 for (i = j = 0; i < n; i += 2, j++)
39983 {
39984 second[j] = gen_reg_rtx (hmode);
39985 ix86_expand_vector_init_concat (hmode, second [j],
39986 &first [i], 2);
39987 }
39988 n >>= 1;
39989 for (i = j = 0; i < n; i += 2, j++)
39990 {
39991 third[j] = gen_reg_rtx (gmode);
39992 ix86_expand_vector_init_concat (gmode, third[j],
39993 &second[i], 2);
39994 }
39995 n >>= 1;
39996 ix86_expand_vector_init_concat (mode, target, third, n);
39997 }
39998 else if (n > 2)
39999 {
40000 gcc_assert (hmode != VOIDmode);
40001 for (i = j = 0; i < n; i += 2, j++)
40002 {
40003 second[j] = gen_reg_rtx (hmode);
40004 ix86_expand_vector_init_concat (hmode, second [j],
40005 &first [i], 2);
40006 }
40007 n >>= 1;
40008 ix86_expand_vector_init_concat (mode, target, second, n);
40009 }
40010 else
40011 ix86_expand_vector_init_concat (mode, target, first, n);
40012 break;
40013
40014 default:
40015 gcc_unreachable ();
40016 }
40017 }
40018
40019 /* A subroutine of ix86_expand_vector_init_general. Use vector
40020 interleave to handle the most general case: all values variable,
40021 and none identical. */
40022
40023 static void
40024 ix86_expand_vector_init_interleave (enum machine_mode mode,
40025 rtx target, rtx *ops, int n)
40026 {
40027 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40028 int i, j;
40029 rtx op0, op1;
40030 rtx (*gen_load_even) (rtx, rtx, rtx);
40031 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40032 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40033
40034 switch (mode)
40035 {
40036 case V8HImode:
40037 gen_load_even = gen_vec_setv8hi;
40038 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40039 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40040 inner_mode = HImode;
40041 first_imode = V4SImode;
40042 second_imode = V2DImode;
40043 third_imode = VOIDmode;
40044 break;
40045 case V16QImode:
40046 gen_load_even = gen_vec_setv16qi;
40047 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40048 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40049 inner_mode = QImode;
40050 first_imode = V8HImode;
40051 second_imode = V4SImode;
40052 third_imode = V2DImode;
40053 break;
40054 default:
40055 gcc_unreachable ();
40056 }
40057
40058 for (i = 0; i < n; i++)
40059 {
40060 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40061 op0 = gen_reg_rtx (SImode);
40062 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40063
40064 /* Insert the SImode value as low element of V4SImode vector. */
40065 op1 = gen_reg_rtx (V4SImode);
40066 op0 = gen_rtx_VEC_MERGE (V4SImode,
40067 gen_rtx_VEC_DUPLICATE (V4SImode,
40068 op0),
40069 CONST0_RTX (V4SImode),
40070 const1_rtx);
40071 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40072
40073 /* Cast the V4SImode vector back to a vector in orignal mode. */
40074 op0 = gen_reg_rtx (mode);
40075 emit_move_insn (op0, gen_lowpart (mode, op1));
40076
40077 /* Load even elements into the second position. */
40078 emit_insn (gen_load_even (op0,
40079 force_reg (inner_mode,
40080 ops [i + i + 1]),
40081 const1_rtx));
40082
40083 /* Cast vector to FIRST_IMODE vector. */
40084 ops[i] = gen_reg_rtx (first_imode);
40085 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40086 }
40087
40088 /* Interleave low FIRST_IMODE vectors. */
40089 for (i = j = 0; i < n; i += 2, j++)
40090 {
40091 op0 = gen_reg_rtx (first_imode);
40092 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40093
40094 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40095 ops[j] = gen_reg_rtx (second_imode);
40096 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40097 }
40098
40099 /* Interleave low SECOND_IMODE vectors. */
40100 switch (second_imode)
40101 {
40102 case V4SImode:
40103 for (i = j = 0; i < n / 2; i += 2, j++)
40104 {
40105 op0 = gen_reg_rtx (second_imode);
40106 emit_insn (gen_interleave_second_low (op0, ops[i],
40107 ops[i + 1]));
40108
40109 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40110 vector. */
40111 ops[j] = gen_reg_rtx (third_imode);
40112 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40113 }
40114 second_imode = V2DImode;
40115 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40116 /* FALLTHRU */
40117
40118 case V2DImode:
40119 op0 = gen_reg_rtx (second_imode);
40120 emit_insn (gen_interleave_second_low (op0, ops[0],
40121 ops[1]));
40122
40123 /* Cast the SECOND_IMODE vector back to a vector on original
40124 mode. */
40125 emit_insn (gen_rtx_SET (VOIDmode, target,
40126 gen_lowpart (mode, op0)));
40127 break;
40128
40129 default:
40130 gcc_unreachable ();
40131 }
40132 }
40133
40134 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40135 all values variable, and none identical. */
40136
40137 static void
40138 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40139 rtx target, rtx vals)
40140 {
40141 rtx ops[64], op0, op1;
40142 enum machine_mode half_mode = VOIDmode;
40143 int n, i;
40144
40145 switch (mode)
40146 {
40147 case V2SFmode:
40148 case V2SImode:
40149 if (!mmx_ok && !TARGET_SSE)
40150 break;
40151 /* FALLTHRU */
40152
40153 case V16SImode:
40154 case V16SFmode:
40155 case V8DFmode:
40156 case V8DImode:
40157 case V8SFmode:
40158 case V8SImode:
40159 case V4DFmode:
40160 case V4DImode:
40161 case V4SFmode:
40162 case V4SImode:
40163 case V2DFmode:
40164 case V2DImode:
40165 n = GET_MODE_NUNITS (mode);
40166 for (i = 0; i < n; i++)
40167 ops[i] = XVECEXP (vals, 0, i);
40168 ix86_expand_vector_init_concat (mode, target, ops, n);
40169 return;
40170
40171 case V32QImode:
40172 half_mode = V16QImode;
40173 goto half;
40174
40175 case V16HImode:
40176 half_mode = V8HImode;
40177 goto half;
40178
40179 half:
40180 n = GET_MODE_NUNITS (mode);
40181 for (i = 0; i < n; i++)
40182 ops[i] = XVECEXP (vals, 0, i);
40183 op0 = gen_reg_rtx (half_mode);
40184 op1 = gen_reg_rtx (half_mode);
40185 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40186 n >> 2);
40187 ix86_expand_vector_init_interleave (half_mode, op1,
40188 &ops [n >> 1], n >> 2);
40189 emit_insn (gen_rtx_SET (VOIDmode, target,
40190 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40191 return;
40192
40193 case V16QImode:
40194 if (!TARGET_SSE4_1)
40195 break;
40196 /* FALLTHRU */
40197
40198 case V8HImode:
40199 if (!TARGET_SSE2)
40200 break;
40201
40202 /* Don't use ix86_expand_vector_init_interleave if we can't
40203 move from GPR to SSE register directly. */
40204 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40205 break;
40206
40207 n = GET_MODE_NUNITS (mode);
40208 for (i = 0; i < n; i++)
40209 ops[i] = XVECEXP (vals, 0, i);
40210 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40211 return;
40212
40213 case V4HImode:
40214 case V8QImode:
40215 break;
40216
40217 default:
40218 gcc_unreachable ();
40219 }
40220
40221 {
40222 int i, j, n_elts, n_words, n_elt_per_word;
40223 enum machine_mode inner_mode;
40224 rtx words[4], shift;
40225
40226 inner_mode = GET_MODE_INNER (mode);
40227 n_elts = GET_MODE_NUNITS (mode);
40228 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40229 n_elt_per_word = n_elts / n_words;
40230 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40231
40232 for (i = 0; i < n_words; ++i)
40233 {
40234 rtx word = NULL_RTX;
40235
40236 for (j = 0; j < n_elt_per_word; ++j)
40237 {
40238 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40239 elt = convert_modes (word_mode, inner_mode, elt, true);
40240
40241 if (j == 0)
40242 word = elt;
40243 else
40244 {
40245 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40246 word, 1, OPTAB_LIB_WIDEN);
40247 word = expand_simple_binop (word_mode, IOR, word, elt,
40248 word, 1, OPTAB_LIB_WIDEN);
40249 }
40250 }
40251
40252 words[i] = word;
40253 }
40254
40255 if (n_words == 1)
40256 emit_move_insn (target, gen_lowpart (mode, words[0]));
40257 else if (n_words == 2)
40258 {
40259 rtx tmp = gen_reg_rtx (mode);
40260 emit_clobber (tmp);
40261 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40262 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40263 emit_move_insn (target, tmp);
40264 }
40265 else if (n_words == 4)
40266 {
40267 rtx tmp = gen_reg_rtx (V4SImode);
40268 gcc_assert (word_mode == SImode);
40269 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40270 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40271 emit_move_insn (target, gen_lowpart (mode, tmp));
40272 }
40273 else
40274 gcc_unreachable ();
40275 }
40276 }
40277
40278 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40279 instructions unless MMX_OK is true. */
40280
40281 void
40282 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40283 {
40284 enum machine_mode mode = GET_MODE (target);
40285 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40286 int n_elts = GET_MODE_NUNITS (mode);
40287 int n_var = 0, one_var = -1;
40288 bool all_same = true, all_const_zero = true;
40289 int i;
40290 rtx x;
40291
40292 for (i = 0; i < n_elts; ++i)
40293 {
40294 x = XVECEXP (vals, 0, i);
40295 if (!(CONST_INT_P (x)
40296 || GET_CODE (x) == CONST_DOUBLE
40297 || GET_CODE (x) == CONST_FIXED))
40298 n_var++, one_var = i;
40299 else if (x != CONST0_RTX (inner_mode))
40300 all_const_zero = false;
40301 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40302 all_same = false;
40303 }
40304
40305 /* Constants are best loaded from the constant pool. */
40306 if (n_var == 0)
40307 {
40308 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40309 return;
40310 }
40311
40312 /* If all values are identical, broadcast the value. */
40313 if (all_same
40314 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40315 XVECEXP (vals, 0, 0)))
40316 return;
40317
40318 /* Values where only one field is non-constant are best loaded from
40319 the pool and overwritten via move later. */
40320 if (n_var == 1)
40321 {
40322 if (all_const_zero
40323 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40324 XVECEXP (vals, 0, one_var),
40325 one_var))
40326 return;
40327
40328 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40329 return;
40330 }
40331
40332 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40333 }
40334
40335 void
40336 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40337 {
40338 enum machine_mode mode = GET_MODE (target);
40339 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40340 enum machine_mode half_mode;
40341 bool use_vec_merge = false;
40342 rtx tmp;
40343 static rtx (*gen_extract[6][2]) (rtx, rtx)
40344 = {
40345 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40346 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40347 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40348 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40349 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40350 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40351 };
40352 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40353 = {
40354 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40355 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40356 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40357 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40358 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40359 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40360 };
40361 int i, j, n;
40362
40363 switch (mode)
40364 {
40365 case V2SFmode:
40366 case V2SImode:
40367 if (mmx_ok)
40368 {
40369 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40370 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40371 if (elt == 0)
40372 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40373 else
40374 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40375 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40376 return;
40377 }
40378 break;
40379
40380 case V2DImode:
40381 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40382 if (use_vec_merge)
40383 break;
40384
40385 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40386 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40387 if (elt == 0)
40388 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40389 else
40390 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40391 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40392 return;
40393
40394 case V2DFmode:
40395 {
40396 rtx op0, op1;
40397
40398 /* For the two element vectors, we implement a VEC_CONCAT with
40399 the extraction of the other element. */
40400
40401 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40402 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40403
40404 if (elt == 0)
40405 op0 = val, op1 = tmp;
40406 else
40407 op0 = tmp, op1 = val;
40408
40409 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40410 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40411 }
40412 return;
40413
40414 case V4SFmode:
40415 use_vec_merge = TARGET_SSE4_1;
40416 if (use_vec_merge)
40417 break;
40418
40419 switch (elt)
40420 {
40421 case 0:
40422 use_vec_merge = true;
40423 break;
40424
40425 case 1:
40426 /* tmp = target = A B C D */
40427 tmp = copy_to_reg (target);
40428 /* target = A A B B */
40429 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40430 /* target = X A B B */
40431 ix86_expand_vector_set (false, target, val, 0);
40432 /* target = A X C D */
40433 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40434 const1_rtx, const0_rtx,
40435 GEN_INT (2+4), GEN_INT (3+4)));
40436 return;
40437
40438 case 2:
40439 /* tmp = target = A B C D */
40440 tmp = copy_to_reg (target);
40441 /* tmp = X B C D */
40442 ix86_expand_vector_set (false, tmp, val, 0);
40443 /* target = A B X D */
40444 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40445 const0_rtx, const1_rtx,
40446 GEN_INT (0+4), GEN_INT (3+4)));
40447 return;
40448
40449 case 3:
40450 /* tmp = target = A B C D */
40451 tmp = copy_to_reg (target);
40452 /* tmp = X B C D */
40453 ix86_expand_vector_set (false, tmp, val, 0);
40454 /* target = A B X D */
40455 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40456 const0_rtx, const1_rtx,
40457 GEN_INT (2+4), GEN_INT (0+4)));
40458 return;
40459
40460 default:
40461 gcc_unreachable ();
40462 }
40463 break;
40464
40465 case V4SImode:
40466 use_vec_merge = TARGET_SSE4_1;
40467 if (use_vec_merge)
40468 break;
40469
40470 /* Element 0 handled by vec_merge below. */
40471 if (elt == 0)
40472 {
40473 use_vec_merge = true;
40474 break;
40475 }
40476
40477 if (TARGET_SSE2)
40478 {
40479 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40480 store into element 0, then shuffle them back. */
40481
40482 rtx order[4];
40483
40484 order[0] = GEN_INT (elt);
40485 order[1] = const1_rtx;
40486 order[2] = const2_rtx;
40487 order[3] = GEN_INT (3);
40488 order[elt] = const0_rtx;
40489
40490 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40491 order[1], order[2], order[3]));
40492
40493 ix86_expand_vector_set (false, target, val, 0);
40494
40495 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40496 order[1], order[2], order[3]));
40497 }
40498 else
40499 {
40500 /* For SSE1, we have to reuse the V4SF code. */
40501 rtx t = gen_reg_rtx (V4SFmode);
40502 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40503 emit_move_insn (target, gen_lowpart (mode, t));
40504 }
40505 return;
40506
40507 case V8HImode:
40508 use_vec_merge = TARGET_SSE2;
40509 break;
40510 case V4HImode:
40511 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40512 break;
40513
40514 case V16QImode:
40515 use_vec_merge = TARGET_SSE4_1;
40516 break;
40517
40518 case V8QImode:
40519 break;
40520
40521 case V32QImode:
40522 half_mode = V16QImode;
40523 j = 0;
40524 n = 16;
40525 goto half;
40526
40527 case V16HImode:
40528 half_mode = V8HImode;
40529 j = 1;
40530 n = 8;
40531 goto half;
40532
40533 case V8SImode:
40534 half_mode = V4SImode;
40535 j = 2;
40536 n = 4;
40537 goto half;
40538
40539 case V4DImode:
40540 half_mode = V2DImode;
40541 j = 3;
40542 n = 2;
40543 goto half;
40544
40545 case V8SFmode:
40546 half_mode = V4SFmode;
40547 j = 4;
40548 n = 4;
40549 goto half;
40550
40551 case V4DFmode:
40552 half_mode = V2DFmode;
40553 j = 5;
40554 n = 2;
40555 goto half;
40556
40557 half:
40558 /* Compute offset. */
40559 i = elt / n;
40560 elt %= n;
40561
40562 gcc_assert (i <= 1);
40563
40564 /* Extract the half. */
40565 tmp = gen_reg_rtx (half_mode);
40566 emit_insn (gen_extract[j][i] (tmp, target));
40567
40568 /* Put val in tmp at elt. */
40569 ix86_expand_vector_set (false, tmp, val, elt);
40570
40571 /* Put it back. */
40572 emit_insn (gen_insert[j][i] (target, target, tmp));
40573 return;
40574
40575 default:
40576 break;
40577 }
40578
40579 if (use_vec_merge)
40580 {
40581 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40582 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40583 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40584 }
40585 else
40586 {
40587 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40588
40589 emit_move_insn (mem, target);
40590
40591 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40592 emit_move_insn (tmp, val);
40593
40594 emit_move_insn (target, mem);
40595 }
40596 }
40597
40598 void
40599 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40600 {
40601 enum machine_mode mode = GET_MODE (vec);
40602 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40603 bool use_vec_extr = false;
40604 rtx tmp;
40605
40606 switch (mode)
40607 {
40608 case V2SImode:
40609 case V2SFmode:
40610 if (!mmx_ok)
40611 break;
40612 /* FALLTHRU */
40613
40614 case V2DFmode:
40615 case V2DImode:
40616 use_vec_extr = true;
40617 break;
40618
40619 case V4SFmode:
40620 use_vec_extr = TARGET_SSE4_1;
40621 if (use_vec_extr)
40622 break;
40623
40624 switch (elt)
40625 {
40626 case 0:
40627 tmp = vec;
40628 break;
40629
40630 case 1:
40631 case 3:
40632 tmp = gen_reg_rtx (mode);
40633 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40634 GEN_INT (elt), GEN_INT (elt),
40635 GEN_INT (elt+4), GEN_INT (elt+4)));
40636 break;
40637
40638 case 2:
40639 tmp = gen_reg_rtx (mode);
40640 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40641 break;
40642
40643 default:
40644 gcc_unreachable ();
40645 }
40646 vec = tmp;
40647 use_vec_extr = true;
40648 elt = 0;
40649 break;
40650
40651 case V4SImode:
40652 use_vec_extr = TARGET_SSE4_1;
40653 if (use_vec_extr)
40654 break;
40655
40656 if (TARGET_SSE2)
40657 {
40658 switch (elt)
40659 {
40660 case 0:
40661 tmp = vec;
40662 break;
40663
40664 case 1:
40665 case 3:
40666 tmp = gen_reg_rtx (mode);
40667 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40668 GEN_INT (elt), GEN_INT (elt),
40669 GEN_INT (elt), GEN_INT (elt)));
40670 break;
40671
40672 case 2:
40673 tmp = gen_reg_rtx (mode);
40674 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40675 break;
40676
40677 default:
40678 gcc_unreachable ();
40679 }
40680 vec = tmp;
40681 use_vec_extr = true;
40682 elt = 0;
40683 }
40684 else
40685 {
40686 /* For SSE1, we have to reuse the V4SF code. */
40687 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40688 gen_lowpart (V4SFmode, vec), elt);
40689 return;
40690 }
40691 break;
40692
40693 case V8HImode:
40694 use_vec_extr = TARGET_SSE2;
40695 break;
40696 case V4HImode:
40697 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40698 break;
40699
40700 case V16QImode:
40701 use_vec_extr = TARGET_SSE4_1;
40702 break;
40703
40704 case V8SFmode:
40705 if (TARGET_AVX)
40706 {
40707 tmp = gen_reg_rtx (V4SFmode);
40708 if (elt < 4)
40709 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40710 else
40711 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40712 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40713 return;
40714 }
40715 break;
40716
40717 case V4DFmode:
40718 if (TARGET_AVX)
40719 {
40720 tmp = gen_reg_rtx (V2DFmode);
40721 if (elt < 2)
40722 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40723 else
40724 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40725 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40726 return;
40727 }
40728 break;
40729
40730 case V32QImode:
40731 if (TARGET_AVX)
40732 {
40733 tmp = gen_reg_rtx (V16QImode);
40734 if (elt < 16)
40735 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40736 else
40737 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40738 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40739 return;
40740 }
40741 break;
40742
40743 case V16HImode:
40744 if (TARGET_AVX)
40745 {
40746 tmp = gen_reg_rtx (V8HImode);
40747 if (elt < 8)
40748 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40749 else
40750 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40751 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40752 return;
40753 }
40754 break;
40755
40756 case V8SImode:
40757 if (TARGET_AVX)
40758 {
40759 tmp = gen_reg_rtx (V4SImode);
40760 if (elt < 4)
40761 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40762 else
40763 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40764 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40765 return;
40766 }
40767 break;
40768
40769 case V4DImode:
40770 if (TARGET_AVX)
40771 {
40772 tmp = gen_reg_rtx (V2DImode);
40773 if (elt < 2)
40774 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40775 else
40776 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40777 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40778 return;
40779 }
40780 break;
40781
40782 case V16SFmode:
40783 tmp = gen_reg_rtx (V8SFmode);
40784 if (elt < 8)
40785 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40786 else
40787 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40788 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40789 return;
40790
40791 case V8DFmode:
40792 tmp = gen_reg_rtx (V4DFmode);
40793 if (elt < 4)
40794 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40795 else
40796 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40797 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40798 return;
40799
40800 case V16SImode:
40801 tmp = gen_reg_rtx (V8SImode);
40802 if (elt < 8)
40803 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40804 else
40805 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40806 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40807 return;
40808
40809 case V8DImode:
40810 tmp = gen_reg_rtx (V4DImode);
40811 if (elt < 4)
40812 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40813 else
40814 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40815 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40816 return;
40817
40818 case V8QImode:
40819 /* ??? Could extract the appropriate HImode element and shift. */
40820 default:
40821 break;
40822 }
40823
40824 if (use_vec_extr)
40825 {
40826 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40827 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40828
40829 /* Let the rtl optimizers know about the zero extension performed. */
40830 if (inner_mode == QImode || inner_mode == HImode)
40831 {
40832 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40833 target = gen_lowpart (SImode, target);
40834 }
40835
40836 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40837 }
40838 else
40839 {
40840 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40841
40842 emit_move_insn (mem, vec);
40843
40844 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40845 emit_move_insn (target, tmp);
40846 }
40847 }
40848
40849 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40850 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40851 The upper bits of DEST are undefined, though they shouldn't cause
40852 exceptions (some bits from src or all zeros are ok). */
40853
40854 static void
40855 emit_reduc_half (rtx dest, rtx src, int i)
40856 {
40857 rtx tem, d = dest;
40858 switch (GET_MODE (src))
40859 {
40860 case V4SFmode:
40861 if (i == 128)
40862 tem = gen_sse_movhlps (dest, src, src);
40863 else
40864 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40865 GEN_INT (1 + 4), GEN_INT (1 + 4));
40866 break;
40867 case V2DFmode:
40868 tem = gen_vec_interleave_highv2df (dest, src, src);
40869 break;
40870 case V16QImode:
40871 case V8HImode:
40872 case V4SImode:
40873 case V2DImode:
40874 d = gen_reg_rtx (V1TImode);
40875 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
40876 GEN_INT (i / 2));
40877 break;
40878 case V8SFmode:
40879 if (i == 256)
40880 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
40881 else
40882 tem = gen_avx_shufps256 (dest, src, src,
40883 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
40884 break;
40885 case V4DFmode:
40886 if (i == 256)
40887 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
40888 else
40889 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
40890 break;
40891 case V32QImode:
40892 case V16HImode:
40893 case V8SImode:
40894 case V4DImode:
40895 if (i == 256)
40896 {
40897 if (GET_MODE (dest) != V4DImode)
40898 d = gen_reg_rtx (V4DImode);
40899 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
40900 gen_lowpart (V4DImode, src),
40901 const1_rtx);
40902 }
40903 else
40904 {
40905 d = gen_reg_rtx (V2TImode);
40906 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
40907 GEN_INT (i / 2));
40908 }
40909 break;
40910 case V16SImode:
40911 case V16SFmode:
40912 case V8DImode:
40913 case V8DFmode:
40914 if (i > 128)
40915 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
40916 gen_lowpart (V16SImode, src),
40917 gen_lowpart (V16SImode, src),
40918 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
40919 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
40920 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
40921 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
40922 GEN_INT (0xC), GEN_INT (0xD),
40923 GEN_INT (0xE), GEN_INT (0xF),
40924 GEN_INT (0x10), GEN_INT (0x11),
40925 GEN_INT (0x12), GEN_INT (0x13),
40926 GEN_INT (0x14), GEN_INT (0x15),
40927 GEN_INT (0x16), GEN_INT (0x17));
40928 else
40929 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
40930 gen_lowpart (V16SImode, src),
40931 GEN_INT (i == 128 ? 0x2 : 0x1),
40932 GEN_INT (0x3),
40933 GEN_INT (0x3),
40934 GEN_INT (0x3),
40935 GEN_INT (i == 128 ? 0x6 : 0x5),
40936 GEN_INT (0x7),
40937 GEN_INT (0x7),
40938 GEN_INT (0x7),
40939 GEN_INT (i == 128 ? 0xA : 0x9),
40940 GEN_INT (0xB),
40941 GEN_INT (0xB),
40942 GEN_INT (0xB),
40943 GEN_INT (i == 128 ? 0xE : 0xD),
40944 GEN_INT (0xF),
40945 GEN_INT (0xF),
40946 GEN_INT (0xF));
40947 break;
40948 default:
40949 gcc_unreachable ();
40950 }
40951 emit_insn (tem);
40952 if (d != dest)
40953 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
40954 }
40955
40956 /* Expand a vector reduction. FN is the binary pattern to reduce;
40957 DEST is the destination; IN is the input vector. */
40958
40959 void
40960 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
40961 {
40962 rtx half, dst, vec = in;
40963 enum machine_mode mode = GET_MODE (in);
40964 int i;
40965
40966 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
40967 if (TARGET_SSE4_1
40968 && mode == V8HImode
40969 && fn == gen_uminv8hi3)
40970 {
40971 emit_insn (gen_sse4_1_phminposuw (dest, in));
40972 return;
40973 }
40974
40975 for (i = GET_MODE_BITSIZE (mode);
40976 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
40977 i >>= 1)
40978 {
40979 half = gen_reg_rtx (mode);
40980 emit_reduc_half (half, vec, i);
40981 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
40982 dst = dest;
40983 else
40984 dst = gen_reg_rtx (mode);
40985 emit_insn (fn (dst, half, vec));
40986 vec = dst;
40987 }
40988 }
40989 \f
40990 /* Target hook for scalar_mode_supported_p. */
40991 static bool
40992 ix86_scalar_mode_supported_p (enum machine_mode mode)
40993 {
40994 if (DECIMAL_FLOAT_MODE_P (mode))
40995 return default_decimal_float_supported_p ();
40996 else if (mode == TFmode)
40997 return true;
40998 else
40999 return default_scalar_mode_supported_p (mode);
41000 }
41001
41002 /* Implements target hook vector_mode_supported_p. */
41003 static bool
41004 ix86_vector_mode_supported_p (enum machine_mode mode)
41005 {
41006 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41007 return true;
41008 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41009 return true;
41010 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41011 return true;
41012 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41013 return true;
41014 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41015 return true;
41016 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41017 return true;
41018 return false;
41019 }
41020
41021 /* Target hook for c_mode_for_suffix. */
41022 static enum machine_mode
41023 ix86_c_mode_for_suffix (char suffix)
41024 {
41025 if (suffix == 'q')
41026 return TFmode;
41027 if (suffix == 'w')
41028 return XFmode;
41029
41030 return VOIDmode;
41031 }
41032
41033 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41034
41035 We do this in the new i386 backend to maintain source compatibility
41036 with the old cc0-based compiler. */
41037
41038 static tree
41039 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
41040 tree inputs ATTRIBUTE_UNUSED,
41041 tree clobbers)
41042 {
41043 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41044 clobbers);
41045 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41046 clobbers);
41047 return clobbers;
41048 }
41049
41050 /* Implements target vector targetm.asm.encode_section_info. */
41051
41052 static void ATTRIBUTE_UNUSED
41053 ix86_encode_section_info (tree decl, rtx rtl, int first)
41054 {
41055 default_encode_section_info (decl, rtl, first);
41056
41057 if (TREE_CODE (decl) == VAR_DECL
41058 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41059 && ix86_in_large_data_p (decl))
41060 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41061 }
41062
41063 /* Worker function for REVERSE_CONDITION. */
41064
41065 enum rtx_code
41066 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41067 {
41068 return (mode != CCFPmode && mode != CCFPUmode
41069 ? reverse_condition (code)
41070 : reverse_condition_maybe_unordered (code));
41071 }
41072
41073 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41074 to OPERANDS[0]. */
41075
41076 const char *
41077 output_387_reg_move (rtx insn, rtx *operands)
41078 {
41079 if (REG_P (operands[0]))
41080 {
41081 if (REG_P (operands[1])
41082 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41083 {
41084 if (REGNO (operands[0]) == FIRST_STACK_REG)
41085 return output_387_ffreep (operands, 0);
41086 return "fstp\t%y0";
41087 }
41088 if (STACK_TOP_P (operands[0]))
41089 return "fld%Z1\t%y1";
41090 return "fst\t%y0";
41091 }
41092 else if (MEM_P (operands[0]))
41093 {
41094 gcc_assert (REG_P (operands[1]));
41095 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41096 return "fstp%Z0\t%y0";
41097 else
41098 {
41099 /* There is no non-popping store to memory for XFmode.
41100 So if we need one, follow the store with a load. */
41101 if (GET_MODE (operands[0]) == XFmode)
41102 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41103 else
41104 return "fst%Z0\t%y0";
41105 }
41106 }
41107 else
41108 gcc_unreachable();
41109 }
41110
41111 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41112 FP status register is set. */
41113
41114 void
41115 ix86_emit_fp_unordered_jump (rtx label)
41116 {
41117 rtx reg = gen_reg_rtx (HImode);
41118 rtx temp;
41119
41120 emit_insn (gen_x86_fnstsw_1 (reg));
41121
41122 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41123 {
41124 emit_insn (gen_x86_sahf_1 (reg));
41125
41126 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41127 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41128 }
41129 else
41130 {
41131 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41132
41133 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41134 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41135 }
41136
41137 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41138 gen_rtx_LABEL_REF (VOIDmode, label),
41139 pc_rtx);
41140 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41141
41142 emit_jump_insn (temp);
41143 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41144 }
41145
41146 /* Output code to perform a log1p XFmode calculation. */
41147
41148 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41149 {
41150 rtx label1 = gen_label_rtx ();
41151 rtx label2 = gen_label_rtx ();
41152
41153 rtx tmp = gen_reg_rtx (XFmode);
41154 rtx tmp2 = gen_reg_rtx (XFmode);
41155 rtx test;
41156
41157 emit_insn (gen_absxf2 (tmp, op1));
41158 test = gen_rtx_GE (VOIDmode, tmp,
41159 CONST_DOUBLE_FROM_REAL_VALUE (
41160 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41161 XFmode));
41162 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41163
41164 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41165 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41166 emit_jump (label2);
41167
41168 emit_label (label1);
41169 emit_move_insn (tmp, CONST1_RTX (XFmode));
41170 emit_insn (gen_addxf3 (tmp, op1, tmp));
41171 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41172 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41173
41174 emit_label (label2);
41175 }
41176
41177 /* Emit code for round calculation. */
41178 void ix86_emit_i387_round (rtx op0, rtx op1)
41179 {
41180 enum machine_mode inmode = GET_MODE (op1);
41181 enum machine_mode outmode = GET_MODE (op0);
41182 rtx e1, e2, res, tmp, tmp1, half;
41183 rtx scratch = gen_reg_rtx (HImode);
41184 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41185 rtx jump_label = gen_label_rtx ();
41186 rtx insn;
41187 rtx (*gen_abs) (rtx, rtx);
41188 rtx (*gen_neg) (rtx, rtx);
41189
41190 switch (inmode)
41191 {
41192 case SFmode:
41193 gen_abs = gen_abssf2;
41194 break;
41195 case DFmode:
41196 gen_abs = gen_absdf2;
41197 break;
41198 case XFmode:
41199 gen_abs = gen_absxf2;
41200 break;
41201 default:
41202 gcc_unreachable ();
41203 }
41204
41205 switch (outmode)
41206 {
41207 case SFmode:
41208 gen_neg = gen_negsf2;
41209 break;
41210 case DFmode:
41211 gen_neg = gen_negdf2;
41212 break;
41213 case XFmode:
41214 gen_neg = gen_negxf2;
41215 break;
41216 case HImode:
41217 gen_neg = gen_neghi2;
41218 break;
41219 case SImode:
41220 gen_neg = gen_negsi2;
41221 break;
41222 case DImode:
41223 gen_neg = gen_negdi2;
41224 break;
41225 default:
41226 gcc_unreachable ();
41227 }
41228
41229 e1 = gen_reg_rtx (inmode);
41230 e2 = gen_reg_rtx (inmode);
41231 res = gen_reg_rtx (outmode);
41232
41233 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41234
41235 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41236
41237 /* scratch = fxam(op1) */
41238 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41239 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41240 UNSPEC_FXAM)));
41241 /* e1 = fabs(op1) */
41242 emit_insn (gen_abs (e1, op1));
41243
41244 /* e2 = e1 + 0.5 */
41245 half = force_reg (inmode, half);
41246 emit_insn (gen_rtx_SET (VOIDmode, e2,
41247 gen_rtx_PLUS (inmode, e1, half)));
41248
41249 /* res = floor(e2) */
41250 if (inmode != XFmode)
41251 {
41252 tmp1 = gen_reg_rtx (XFmode);
41253
41254 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41255 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41256 }
41257 else
41258 tmp1 = e2;
41259
41260 switch (outmode)
41261 {
41262 case SFmode:
41263 case DFmode:
41264 {
41265 rtx tmp0 = gen_reg_rtx (XFmode);
41266
41267 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41268
41269 emit_insn (gen_rtx_SET (VOIDmode, res,
41270 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41271 UNSPEC_TRUNC_NOOP)));
41272 }
41273 break;
41274 case XFmode:
41275 emit_insn (gen_frndintxf2_floor (res, tmp1));
41276 break;
41277 case HImode:
41278 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41279 break;
41280 case SImode:
41281 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41282 break;
41283 case DImode:
41284 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41285 break;
41286 default:
41287 gcc_unreachable ();
41288 }
41289
41290 /* flags = signbit(a) */
41291 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41292
41293 /* if (flags) then res = -res */
41294 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41295 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41296 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41297 pc_rtx);
41298 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41299 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41300 JUMP_LABEL (insn) = jump_label;
41301
41302 emit_insn (gen_neg (res, res));
41303
41304 emit_label (jump_label);
41305 LABEL_NUSES (jump_label) = 1;
41306
41307 emit_move_insn (op0, res);
41308 }
41309
41310 /* Output code to perform a Newton-Rhapson approximation of a single precision
41311 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41312
41313 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41314 {
41315 rtx x0, x1, e0, e1;
41316
41317 x0 = gen_reg_rtx (mode);
41318 e0 = gen_reg_rtx (mode);
41319 e1 = gen_reg_rtx (mode);
41320 x1 = gen_reg_rtx (mode);
41321
41322 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41323
41324 b = force_reg (mode, b);
41325
41326 /* x0 = rcp(b) estimate */
41327 if (mode == V16SFmode || mode == V8DFmode)
41328 emit_insn (gen_rtx_SET (VOIDmode, x0,
41329 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41330 UNSPEC_RCP14)));
41331 else
41332 emit_insn (gen_rtx_SET (VOIDmode, x0,
41333 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41334 UNSPEC_RCP)));
41335
41336 /* e0 = x0 * b */
41337 emit_insn (gen_rtx_SET (VOIDmode, e0,
41338 gen_rtx_MULT (mode, x0, b)));
41339
41340 /* e0 = x0 * e0 */
41341 emit_insn (gen_rtx_SET (VOIDmode, e0,
41342 gen_rtx_MULT (mode, x0, e0)));
41343
41344 /* e1 = x0 + x0 */
41345 emit_insn (gen_rtx_SET (VOIDmode, e1,
41346 gen_rtx_PLUS (mode, x0, x0)));
41347
41348 /* x1 = e1 - e0 */
41349 emit_insn (gen_rtx_SET (VOIDmode, x1,
41350 gen_rtx_MINUS (mode, e1, e0)));
41351
41352 /* res = a * x1 */
41353 emit_insn (gen_rtx_SET (VOIDmode, res,
41354 gen_rtx_MULT (mode, a, x1)));
41355 }
41356
41357 /* Output code to perform a Newton-Rhapson approximation of a
41358 single precision floating point [reciprocal] square root. */
41359
41360 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41361 bool recip)
41362 {
41363 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41364 REAL_VALUE_TYPE r;
41365 int unspec;
41366
41367 x0 = gen_reg_rtx (mode);
41368 e0 = gen_reg_rtx (mode);
41369 e1 = gen_reg_rtx (mode);
41370 e2 = gen_reg_rtx (mode);
41371 e3 = gen_reg_rtx (mode);
41372
41373 real_from_integer (&r, VOIDmode, -3, -1, 0);
41374 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41375
41376 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41377 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41378 unspec = UNSPEC_RSQRT;
41379
41380 if (VECTOR_MODE_P (mode))
41381 {
41382 mthree = ix86_build_const_vector (mode, true, mthree);
41383 mhalf = ix86_build_const_vector (mode, true, mhalf);
41384 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41385 if (GET_MODE_SIZE (mode) == 64)
41386 unspec = UNSPEC_RSQRT14;
41387 }
41388
41389 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41390 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41391
41392 a = force_reg (mode, a);
41393
41394 /* x0 = rsqrt(a) estimate */
41395 emit_insn (gen_rtx_SET (VOIDmode, x0,
41396 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41397 unspec)));
41398
41399 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41400 if (!recip)
41401 {
41402 rtx zero, mask;
41403
41404 zero = gen_reg_rtx (mode);
41405 mask = gen_reg_rtx (mode);
41406
41407 zero = force_reg (mode, CONST0_RTX(mode));
41408
41409 /* Handle masked compare. */
41410 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41411 {
41412 mask = gen_reg_rtx (HImode);
41413 /* Imm value 0x4 corresponds to not-equal comparison. */
41414 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41415 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41416 }
41417 else
41418 {
41419 emit_insn (gen_rtx_SET (VOIDmode, mask,
41420 gen_rtx_NE (mode, zero, a)));
41421
41422 emit_insn (gen_rtx_SET (VOIDmode, x0,
41423 gen_rtx_AND (mode, x0, mask)));
41424 }
41425 }
41426
41427 /* e0 = x0 * a */
41428 emit_insn (gen_rtx_SET (VOIDmode, e0,
41429 gen_rtx_MULT (mode, x0, a)));
41430 /* e1 = e0 * x0 */
41431 emit_insn (gen_rtx_SET (VOIDmode, e1,
41432 gen_rtx_MULT (mode, e0, x0)));
41433
41434 /* e2 = e1 - 3. */
41435 mthree = force_reg (mode, mthree);
41436 emit_insn (gen_rtx_SET (VOIDmode, e2,
41437 gen_rtx_PLUS (mode, e1, mthree)));
41438
41439 mhalf = force_reg (mode, mhalf);
41440 if (recip)
41441 /* e3 = -.5 * x0 */
41442 emit_insn (gen_rtx_SET (VOIDmode, e3,
41443 gen_rtx_MULT (mode, x0, mhalf)));
41444 else
41445 /* e3 = -.5 * e0 */
41446 emit_insn (gen_rtx_SET (VOIDmode, e3,
41447 gen_rtx_MULT (mode, e0, mhalf)));
41448 /* ret = e2 * e3 */
41449 emit_insn (gen_rtx_SET (VOIDmode, res,
41450 gen_rtx_MULT (mode, e2, e3)));
41451 }
41452
41453 #ifdef TARGET_SOLARIS
41454 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41455
41456 static void
41457 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41458 tree decl)
41459 {
41460 /* With Binutils 2.15, the "@unwind" marker must be specified on
41461 every occurrence of the ".eh_frame" section, not just the first
41462 one. */
41463 if (TARGET_64BIT
41464 && strcmp (name, ".eh_frame") == 0)
41465 {
41466 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41467 flags & SECTION_WRITE ? "aw" : "a");
41468 return;
41469 }
41470
41471 #ifndef USE_GAS
41472 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41473 {
41474 solaris_elf_asm_comdat_section (name, flags, decl);
41475 return;
41476 }
41477 #endif
41478
41479 default_elf_asm_named_section (name, flags, decl);
41480 }
41481 #endif /* TARGET_SOLARIS */
41482
41483 /* Return the mangling of TYPE if it is an extended fundamental type. */
41484
41485 static const char *
41486 ix86_mangle_type (const_tree type)
41487 {
41488 type = TYPE_MAIN_VARIANT (type);
41489
41490 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41491 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41492 return NULL;
41493
41494 switch (TYPE_MODE (type))
41495 {
41496 case TFmode:
41497 /* __float128 is "g". */
41498 return "g";
41499 case XFmode:
41500 /* "long double" or __float80 is "e". */
41501 return "e";
41502 default:
41503 return NULL;
41504 }
41505 }
41506
41507 /* For 32-bit code we can save PIC register setup by using
41508 __stack_chk_fail_local hidden function instead of calling
41509 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41510 register, so it is better to call __stack_chk_fail directly. */
41511
41512 static tree ATTRIBUTE_UNUSED
41513 ix86_stack_protect_fail (void)
41514 {
41515 return TARGET_64BIT
41516 ? default_external_stack_protect_fail ()
41517 : default_hidden_stack_protect_fail ();
41518 }
41519
41520 /* Select a format to encode pointers in exception handling data. CODE
41521 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41522 true if the symbol may be affected by dynamic relocations.
41523
41524 ??? All x86 object file formats are capable of representing this.
41525 After all, the relocation needed is the same as for the call insn.
41526 Whether or not a particular assembler allows us to enter such, I
41527 guess we'll have to see. */
41528 int
41529 asm_preferred_eh_data_format (int code, int global)
41530 {
41531 if (flag_pic)
41532 {
41533 int type = DW_EH_PE_sdata8;
41534 if (!TARGET_64BIT
41535 || ix86_cmodel == CM_SMALL_PIC
41536 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41537 type = DW_EH_PE_sdata4;
41538 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41539 }
41540 if (ix86_cmodel == CM_SMALL
41541 || (ix86_cmodel == CM_MEDIUM && code))
41542 return DW_EH_PE_udata4;
41543 return DW_EH_PE_absptr;
41544 }
41545 \f
41546 /* Expand copysign from SIGN to the positive value ABS_VALUE
41547 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41548 the sign-bit. */
41549 static void
41550 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41551 {
41552 enum machine_mode mode = GET_MODE (sign);
41553 rtx sgn = gen_reg_rtx (mode);
41554 if (mask == NULL_RTX)
41555 {
41556 enum machine_mode vmode;
41557
41558 if (mode == SFmode)
41559 vmode = V4SFmode;
41560 else if (mode == DFmode)
41561 vmode = V2DFmode;
41562 else
41563 vmode = mode;
41564
41565 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41566 if (!VECTOR_MODE_P (mode))
41567 {
41568 /* We need to generate a scalar mode mask in this case. */
41569 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41570 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41571 mask = gen_reg_rtx (mode);
41572 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41573 }
41574 }
41575 else
41576 mask = gen_rtx_NOT (mode, mask);
41577 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41578 gen_rtx_AND (mode, mask, sign)));
41579 emit_insn (gen_rtx_SET (VOIDmode, result,
41580 gen_rtx_IOR (mode, abs_value, sgn)));
41581 }
41582
41583 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41584 mask for masking out the sign-bit is stored in *SMASK, if that is
41585 non-null. */
41586 static rtx
41587 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41588 {
41589 enum machine_mode vmode, mode = GET_MODE (op0);
41590 rtx xa, mask;
41591
41592 xa = gen_reg_rtx (mode);
41593 if (mode == SFmode)
41594 vmode = V4SFmode;
41595 else if (mode == DFmode)
41596 vmode = V2DFmode;
41597 else
41598 vmode = mode;
41599 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41600 if (!VECTOR_MODE_P (mode))
41601 {
41602 /* We need to generate a scalar mode mask in this case. */
41603 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41604 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41605 mask = gen_reg_rtx (mode);
41606 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41607 }
41608 emit_insn (gen_rtx_SET (VOIDmode, xa,
41609 gen_rtx_AND (mode, op0, mask)));
41610
41611 if (smask)
41612 *smask = mask;
41613
41614 return xa;
41615 }
41616
41617 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41618 swapping the operands if SWAP_OPERANDS is true. The expanded
41619 code is a forward jump to a newly created label in case the
41620 comparison is true. The generated label rtx is returned. */
41621 static rtx
41622 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41623 bool swap_operands)
41624 {
41625 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41626 rtx label, tmp;
41627
41628 if (swap_operands)
41629 {
41630 tmp = op0;
41631 op0 = op1;
41632 op1 = tmp;
41633 }
41634
41635 label = gen_label_rtx ();
41636 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41637 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41638 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41639 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41640 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41641 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41642 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41643 JUMP_LABEL (tmp) = label;
41644
41645 return label;
41646 }
41647
41648 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41649 using comparison code CODE. Operands are swapped for the comparison if
41650 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41651 static rtx
41652 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41653 bool swap_operands)
41654 {
41655 rtx (*insn)(rtx, rtx, rtx, rtx);
41656 enum machine_mode mode = GET_MODE (op0);
41657 rtx mask = gen_reg_rtx (mode);
41658
41659 if (swap_operands)
41660 {
41661 rtx tmp = op0;
41662 op0 = op1;
41663 op1 = tmp;
41664 }
41665
41666 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41667
41668 emit_insn (insn (mask, op0, op1,
41669 gen_rtx_fmt_ee (code, mode, op0, op1)));
41670 return mask;
41671 }
41672
41673 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41674 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41675 static rtx
41676 ix86_gen_TWO52 (enum machine_mode mode)
41677 {
41678 REAL_VALUE_TYPE TWO52r;
41679 rtx TWO52;
41680
41681 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41682 TWO52 = const_double_from_real_value (TWO52r, mode);
41683 TWO52 = force_reg (mode, TWO52);
41684
41685 return TWO52;
41686 }
41687
41688 /* Expand SSE sequence for computing lround from OP1 storing
41689 into OP0. */
41690 void
41691 ix86_expand_lround (rtx op0, rtx op1)
41692 {
41693 /* C code for the stuff we're doing below:
41694 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41695 return (long)tmp;
41696 */
41697 enum machine_mode mode = GET_MODE (op1);
41698 const struct real_format *fmt;
41699 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41700 rtx adj;
41701
41702 /* load nextafter (0.5, 0.0) */
41703 fmt = REAL_MODE_FORMAT (mode);
41704 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41705 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41706
41707 /* adj = copysign (0.5, op1) */
41708 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41709 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41710
41711 /* adj = op1 + adj */
41712 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41713
41714 /* op0 = (imode)adj */
41715 expand_fix (op0, adj, 0);
41716 }
41717
41718 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41719 into OPERAND0. */
41720 void
41721 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41722 {
41723 /* C code for the stuff we're doing below (for do_floor):
41724 xi = (long)op1;
41725 xi -= (double)xi > op1 ? 1 : 0;
41726 return xi;
41727 */
41728 enum machine_mode fmode = GET_MODE (op1);
41729 enum machine_mode imode = GET_MODE (op0);
41730 rtx ireg, freg, label, tmp;
41731
41732 /* reg = (long)op1 */
41733 ireg = gen_reg_rtx (imode);
41734 expand_fix (ireg, op1, 0);
41735
41736 /* freg = (double)reg */
41737 freg = gen_reg_rtx (fmode);
41738 expand_float (freg, ireg, 0);
41739
41740 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41741 label = ix86_expand_sse_compare_and_jump (UNLE,
41742 freg, op1, !do_floor);
41743 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41744 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41745 emit_move_insn (ireg, tmp);
41746
41747 emit_label (label);
41748 LABEL_NUSES (label) = 1;
41749
41750 emit_move_insn (op0, ireg);
41751 }
41752
41753 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41754 result in OPERAND0. */
41755 void
41756 ix86_expand_rint (rtx operand0, rtx operand1)
41757 {
41758 /* C code for the stuff we're doing below:
41759 xa = fabs (operand1);
41760 if (!isless (xa, 2**52))
41761 return operand1;
41762 xa = xa + 2**52 - 2**52;
41763 return copysign (xa, operand1);
41764 */
41765 enum machine_mode mode = GET_MODE (operand0);
41766 rtx res, xa, label, TWO52, mask;
41767
41768 res = gen_reg_rtx (mode);
41769 emit_move_insn (res, operand1);
41770
41771 /* xa = abs (operand1) */
41772 xa = ix86_expand_sse_fabs (res, &mask);
41773
41774 /* if (!isless (xa, TWO52)) goto label; */
41775 TWO52 = ix86_gen_TWO52 (mode);
41776 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41777
41778 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41779 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41780
41781 ix86_sse_copysign_to_positive (res, xa, res, mask);
41782
41783 emit_label (label);
41784 LABEL_NUSES (label) = 1;
41785
41786 emit_move_insn (operand0, res);
41787 }
41788
41789 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41790 into OPERAND0. */
41791 void
41792 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41793 {
41794 /* C code for the stuff we expand below.
41795 double xa = fabs (x), x2;
41796 if (!isless (xa, TWO52))
41797 return x;
41798 xa = xa + TWO52 - TWO52;
41799 x2 = copysign (xa, x);
41800 Compensate. Floor:
41801 if (x2 > x)
41802 x2 -= 1;
41803 Compensate. Ceil:
41804 if (x2 < x)
41805 x2 -= -1;
41806 return x2;
41807 */
41808 enum machine_mode mode = GET_MODE (operand0);
41809 rtx xa, TWO52, tmp, label, one, res, mask;
41810
41811 TWO52 = ix86_gen_TWO52 (mode);
41812
41813 /* Temporary for holding the result, initialized to the input
41814 operand to ease control flow. */
41815 res = gen_reg_rtx (mode);
41816 emit_move_insn (res, operand1);
41817
41818 /* xa = abs (operand1) */
41819 xa = ix86_expand_sse_fabs (res, &mask);
41820
41821 /* if (!isless (xa, TWO52)) goto label; */
41822 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41823
41824 /* xa = xa + TWO52 - TWO52; */
41825 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41826 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41827
41828 /* xa = copysign (xa, operand1) */
41829 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41830
41831 /* generate 1.0 or -1.0 */
41832 one = force_reg (mode,
41833 const_double_from_real_value (do_floor
41834 ? dconst1 : dconstm1, mode));
41835
41836 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41837 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41838 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41839 gen_rtx_AND (mode, one, tmp)));
41840 /* We always need to subtract here to preserve signed zero. */
41841 tmp = expand_simple_binop (mode, MINUS,
41842 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41843 emit_move_insn (res, tmp);
41844
41845 emit_label (label);
41846 LABEL_NUSES (label) = 1;
41847
41848 emit_move_insn (operand0, res);
41849 }
41850
41851 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41852 into OPERAND0. */
41853 void
41854 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41855 {
41856 /* C code for the stuff we expand below.
41857 double xa = fabs (x), x2;
41858 if (!isless (xa, TWO52))
41859 return x;
41860 x2 = (double)(long)x;
41861 Compensate. Floor:
41862 if (x2 > x)
41863 x2 -= 1;
41864 Compensate. Ceil:
41865 if (x2 < x)
41866 x2 += 1;
41867 if (HONOR_SIGNED_ZEROS (mode))
41868 return copysign (x2, x);
41869 return x2;
41870 */
41871 enum machine_mode mode = GET_MODE (operand0);
41872 rtx xa, xi, TWO52, tmp, label, one, res, mask;
41873
41874 TWO52 = ix86_gen_TWO52 (mode);
41875
41876 /* Temporary for holding the result, initialized to the input
41877 operand to ease control flow. */
41878 res = gen_reg_rtx (mode);
41879 emit_move_insn (res, operand1);
41880
41881 /* xa = abs (operand1) */
41882 xa = ix86_expand_sse_fabs (res, &mask);
41883
41884 /* if (!isless (xa, TWO52)) goto label; */
41885 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41886
41887 /* xa = (double)(long)x */
41888 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
41889 expand_fix (xi, res, 0);
41890 expand_float (xa, xi, 0);
41891
41892 /* generate 1.0 */
41893 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
41894
41895 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41896 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41897 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41898 gen_rtx_AND (mode, one, tmp)));
41899 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
41900 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41901 emit_move_insn (res, tmp);
41902
41903 if (HONOR_SIGNED_ZEROS (mode))
41904 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
41905
41906 emit_label (label);
41907 LABEL_NUSES (label) = 1;
41908
41909 emit_move_insn (operand0, res);
41910 }
41911
41912 /* Expand SSE sequence for computing round from OPERAND1 storing
41913 into OPERAND0. Sequence that works without relying on DImode truncation
41914 via cvttsd2siq that is only available on 64bit targets. */
41915 void
41916 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
41917 {
41918 /* C code for the stuff we expand below.
41919 double xa = fabs (x), xa2, x2;
41920 if (!isless (xa, TWO52))
41921 return x;
41922 Using the absolute value and copying back sign makes
41923 -0.0 -> -0.0 correct.
41924 xa2 = xa + TWO52 - TWO52;
41925 Compensate.
41926 dxa = xa2 - xa;
41927 if (dxa <= -0.5)
41928 xa2 += 1;
41929 else if (dxa > 0.5)
41930 xa2 -= 1;
41931 x2 = copysign (xa2, x);
41932 return x2;
41933 */
41934 enum machine_mode mode = GET_MODE (operand0);
41935 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
41936
41937 TWO52 = ix86_gen_TWO52 (mode);
41938
41939 /* Temporary for holding the result, initialized to the input
41940 operand to ease control flow. */
41941 res = gen_reg_rtx (mode);
41942 emit_move_insn (res, operand1);
41943
41944 /* xa = abs (operand1) */
41945 xa = ix86_expand_sse_fabs (res, &mask);
41946
41947 /* if (!isless (xa, TWO52)) goto label; */
41948 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41949
41950 /* xa2 = xa + TWO52 - TWO52; */
41951 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41952 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
41953
41954 /* dxa = xa2 - xa; */
41955 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
41956
41957 /* generate 0.5, 1.0 and -0.5 */
41958 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
41959 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
41960 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
41961 0, OPTAB_DIRECT);
41962
41963 /* Compensate. */
41964 tmp = gen_reg_rtx (mode);
41965 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
41966 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
41967 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41968 gen_rtx_AND (mode, one, tmp)));
41969 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41970 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
41971 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
41972 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41973 gen_rtx_AND (mode, one, tmp)));
41974 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41975
41976 /* res = copysign (xa2, operand1) */
41977 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
41978
41979 emit_label (label);
41980 LABEL_NUSES (label) = 1;
41981
41982 emit_move_insn (operand0, res);
41983 }
41984
41985 /* Expand SSE sequence for computing trunc from OPERAND1 storing
41986 into OPERAND0. */
41987 void
41988 ix86_expand_trunc (rtx operand0, rtx operand1)
41989 {
41990 /* C code for SSE variant we expand below.
41991 double xa = fabs (x), x2;
41992 if (!isless (xa, TWO52))
41993 return x;
41994 x2 = (double)(long)x;
41995 if (HONOR_SIGNED_ZEROS (mode))
41996 return copysign (x2, x);
41997 return x2;
41998 */
41999 enum machine_mode mode = GET_MODE (operand0);
42000 rtx xa, xi, TWO52, label, res, mask;
42001
42002 TWO52 = ix86_gen_TWO52 (mode);
42003
42004 /* Temporary for holding the result, initialized to the input
42005 operand to ease control flow. */
42006 res = gen_reg_rtx (mode);
42007 emit_move_insn (res, operand1);
42008
42009 /* xa = abs (operand1) */
42010 xa = ix86_expand_sse_fabs (res, &mask);
42011
42012 /* if (!isless (xa, TWO52)) goto label; */
42013 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42014
42015 /* x = (double)(long)x */
42016 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42017 expand_fix (xi, res, 0);
42018 expand_float (res, xi, 0);
42019
42020 if (HONOR_SIGNED_ZEROS (mode))
42021 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42022
42023 emit_label (label);
42024 LABEL_NUSES (label) = 1;
42025
42026 emit_move_insn (operand0, res);
42027 }
42028
42029 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42030 into OPERAND0. */
42031 void
42032 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42033 {
42034 enum machine_mode mode = GET_MODE (operand0);
42035 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42036
42037 /* C code for SSE variant we expand below.
42038 double xa = fabs (x), x2;
42039 if (!isless (xa, TWO52))
42040 return x;
42041 xa2 = xa + TWO52 - TWO52;
42042 Compensate:
42043 if (xa2 > xa)
42044 xa2 -= 1.0;
42045 x2 = copysign (xa2, x);
42046 return x2;
42047 */
42048
42049 TWO52 = ix86_gen_TWO52 (mode);
42050
42051 /* Temporary for holding the result, initialized to the input
42052 operand to ease control flow. */
42053 res = gen_reg_rtx (mode);
42054 emit_move_insn (res, operand1);
42055
42056 /* xa = abs (operand1) */
42057 xa = ix86_expand_sse_fabs (res, &smask);
42058
42059 /* if (!isless (xa, TWO52)) goto label; */
42060 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42061
42062 /* res = xa + TWO52 - TWO52; */
42063 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42064 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42065 emit_move_insn (res, tmp);
42066
42067 /* generate 1.0 */
42068 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42069
42070 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42071 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42072 emit_insn (gen_rtx_SET (VOIDmode, mask,
42073 gen_rtx_AND (mode, mask, one)));
42074 tmp = expand_simple_binop (mode, MINUS,
42075 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42076 emit_move_insn (res, tmp);
42077
42078 /* res = copysign (res, operand1) */
42079 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42080
42081 emit_label (label);
42082 LABEL_NUSES (label) = 1;
42083
42084 emit_move_insn (operand0, res);
42085 }
42086
42087 /* Expand SSE sequence for computing round from OPERAND1 storing
42088 into OPERAND0. */
42089 void
42090 ix86_expand_round (rtx operand0, rtx operand1)
42091 {
42092 /* C code for the stuff we're doing below:
42093 double xa = fabs (x);
42094 if (!isless (xa, TWO52))
42095 return x;
42096 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42097 return copysign (xa, x);
42098 */
42099 enum machine_mode mode = GET_MODE (operand0);
42100 rtx res, TWO52, xa, label, xi, half, mask;
42101 const struct real_format *fmt;
42102 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42103
42104 /* Temporary for holding the result, initialized to the input
42105 operand to ease control flow. */
42106 res = gen_reg_rtx (mode);
42107 emit_move_insn (res, operand1);
42108
42109 TWO52 = ix86_gen_TWO52 (mode);
42110 xa = ix86_expand_sse_fabs (res, &mask);
42111 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42112
42113 /* load nextafter (0.5, 0.0) */
42114 fmt = REAL_MODE_FORMAT (mode);
42115 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42116 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42117
42118 /* xa = xa + 0.5 */
42119 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42120 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42121
42122 /* xa = (double)(int64_t)xa */
42123 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42124 expand_fix (xi, xa, 0);
42125 expand_float (xa, xi, 0);
42126
42127 /* res = copysign (xa, operand1) */
42128 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42129
42130 emit_label (label);
42131 LABEL_NUSES (label) = 1;
42132
42133 emit_move_insn (operand0, res);
42134 }
42135
42136 /* Expand SSE sequence for computing round
42137 from OP1 storing into OP0 using sse4 round insn. */
42138 void
42139 ix86_expand_round_sse4 (rtx op0, rtx op1)
42140 {
42141 enum machine_mode mode = GET_MODE (op0);
42142 rtx e1, e2, res, half;
42143 const struct real_format *fmt;
42144 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42145 rtx (*gen_copysign) (rtx, rtx, rtx);
42146 rtx (*gen_round) (rtx, rtx, rtx);
42147
42148 switch (mode)
42149 {
42150 case SFmode:
42151 gen_copysign = gen_copysignsf3;
42152 gen_round = gen_sse4_1_roundsf2;
42153 break;
42154 case DFmode:
42155 gen_copysign = gen_copysigndf3;
42156 gen_round = gen_sse4_1_rounddf2;
42157 break;
42158 default:
42159 gcc_unreachable ();
42160 }
42161
42162 /* round (a) = trunc (a + copysign (0.5, a)) */
42163
42164 /* load nextafter (0.5, 0.0) */
42165 fmt = REAL_MODE_FORMAT (mode);
42166 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42167 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42168 half = const_double_from_real_value (pred_half, mode);
42169
42170 /* e1 = copysign (0.5, op1) */
42171 e1 = gen_reg_rtx (mode);
42172 emit_insn (gen_copysign (e1, half, op1));
42173
42174 /* e2 = op1 + e1 */
42175 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42176
42177 /* res = trunc (e2) */
42178 res = gen_reg_rtx (mode);
42179 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42180
42181 emit_move_insn (op0, res);
42182 }
42183 \f
42184
42185 /* Table of valid machine attributes. */
42186 static const struct attribute_spec ix86_attribute_table[] =
42187 {
42188 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42189 affects_type_identity } */
42190 /* Stdcall attribute says callee is responsible for popping arguments
42191 if they are not variable. */
42192 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42193 true },
42194 /* Fastcall attribute says callee is responsible for popping arguments
42195 if they are not variable. */
42196 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42197 true },
42198 /* Thiscall attribute says callee is responsible for popping arguments
42199 if they are not variable. */
42200 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42201 true },
42202 /* Cdecl attribute says the callee is a normal C declaration */
42203 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42204 true },
42205 /* Regparm attribute specifies how many integer arguments are to be
42206 passed in registers. */
42207 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42208 true },
42209 /* Sseregparm attribute says we are using x86_64 calling conventions
42210 for FP arguments. */
42211 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42212 true },
42213 /* The transactional memory builtins are implicitly regparm or fastcall
42214 depending on the ABI. Override the generic do-nothing attribute that
42215 these builtins were declared with. */
42216 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42217 true },
42218 /* force_align_arg_pointer says this function realigns the stack at entry. */
42219 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42220 false, true, true, ix86_handle_cconv_attribute, false },
42221 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42222 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42223 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42224 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42225 false },
42226 #endif
42227 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42228 false },
42229 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42230 false },
42231 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42232 SUBTARGET_ATTRIBUTE_TABLE,
42233 #endif
42234 /* ms_abi and sysv_abi calling convention function attributes. */
42235 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42236 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42237 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42238 false },
42239 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42240 ix86_handle_callee_pop_aggregate_return, true },
42241 /* End element. */
42242 { NULL, 0, 0, false, false, false, NULL, false }
42243 };
42244
42245 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42246 static int
42247 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42248 tree vectype,
42249 int misalign ATTRIBUTE_UNUSED)
42250 {
42251 unsigned elements;
42252
42253 switch (type_of_cost)
42254 {
42255 case scalar_stmt:
42256 return ix86_cost->scalar_stmt_cost;
42257
42258 case scalar_load:
42259 return ix86_cost->scalar_load_cost;
42260
42261 case scalar_store:
42262 return ix86_cost->scalar_store_cost;
42263
42264 case vector_stmt:
42265 return ix86_cost->vec_stmt_cost;
42266
42267 case vector_load:
42268 return ix86_cost->vec_align_load_cost;
42269
42270 case vector_store:
42271 return ix86_cost->vec_store_cost;
42272
42273 case vec_to_scalar:
42274 return ix86_cost->vec_to_scalar_cost;
42275
42276 case scalar_to_vec:
42277 return ix86_cost->scalar_to_vec_cost;
42278
42279 case unaligned_load:
42280 case unaligned_store:
42281 return ix86_cost->vec_unalign_load_cost;
42282
42283 case cond_branch_taken:
42284 return ix86_cost->cond_taken_branch_cost;
42285
42286 case cond_branch_not_taken:
42287 return ix86_cost->cond_not_taken_branch_cost;
42288
42289 case vec_perm:
42290 case vec_promote_demote:
42291 return ix86_cost->vec_stmt_cost;
42292
42293 case vec_construct:
42294 elements = TYPE_VECTOR_SUBPARTS (vectype);
42295 return elements / 2 + 1;
42296
42297 default:
42298 gcc_unreachable ();
42299 }
42300 }
42301
42302 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42303 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42304 insn every time. */
42305
42306 static GTY(()) rtx vselect_insn;
42307
42308 /* Initialize vselect_insn. */
42309
42310 static void
42311 init_vselect_insn (void)
42312 {
42313 unsigned i;
42314 rtx x;
42315
42316 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42317 for (i = 0; i < MAX_VECT_LEN; ++i)
42318 XVECEXP (x, 0, i) = const0_rtx;
42319 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42320 const0_rtx), x);
42321 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42322 start_sequence ();
42323 vselect_insn = emit_insn (x);
42324 end_sequence ();
42325 }
42326
42327 /* Construct (set target (vec_select op0 (parallel perm))) and
42328 return true if that's a valid instruction in the active ISA. */
42329
42330 static bool
42331 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42332 unsigned nelt, bool testing_p)
42333 {
42334 unsigned int i;
42335 rtx x, save_vconcat;
42336 int icode;
42337
42338 if (vselect_insn == NULL_RTX)
42339 init_vselect_insn ();
42340
42341 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42342 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42343 for (i = 0; i < nelt; ++i)
42344 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42345 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42346 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42347 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42348 SET_DEST (PATTERN (vselect_insn)) = target;
42349 icode = recog_memoized (vselect_insn);
42350
42351 if (icode >= 0 && !testing_p)
42352 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42353
42354 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42355 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42356 INSN_CODE (vselect_insn) = -1;
42357
42358 return icode >= 0;
42359 }
42360
42361 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42362
42363 static bool
42364 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42365 const unsigned char *perm, unsigned nelt,
42366 bool testing_p)
42367 {
42368 enum machine_mode v2mode;
42369 rtx x;
42370 bool ok;
42371
42372 if (vselect_insn == NULL_RTX)
42373 init_vselect_insn ();
42374
42375 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42376 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42377 PUT_MODE (x, v2mode);
42378 XEXP (x, 0) = op0;
42379 XEXP (x, 1) = op1;
42380 ok = expand_vselect (target, x, perm, nelt, testing_p);
42381 XEXP (x, 0) = const0_rtx;
42382 XEXP (x, 1) = const0_rtx;
42383 return ok;
42384 }
42385
42386 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42387 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42388
42389 static bool
42390 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42391 {
42392 enum machine_mode vmode = d->vmode;
42393 unsigned i, mask, nelt = d->nelt;
42394 rtx target, op0, op1, x;
42395 rtx rperm[32], vperm;
42396
42397 if (d->one_operand_p)
42398 return false;
42399 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42400 ;
42401 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42402 ;
42403 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42404 ;
42405 else
42406 return false;
42407
42408 /* This is a blend, not a permute. Elements must stay in their
42409 respective lanes. */
42410 for (i = 0; i < nelt; ++i)
42411 {
42412 unsigned e = d->perm[i];
42413 if (!(e == i || e == i + nelt))
42414 return false;
42415 }
42416
42417 if (d->testing_p)
42418 return true;
42419
42420 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42421 decision should be extracted elsewhere, so that we only try that
42422 sequence once all budget==3 options have been tried. */
42423 target = d->target;
42424 op0 = d->op0;
42425 op1 = d->op1;
42426 mask = 0;
42427
42428 switch (vmode)
42429 {
42430 case V4DFmode:
42431 case V8SFmode:
42432 case V2DFmode:
42433 case V4SFmode:
42434 case V8HImode:
42435 case V8SImode:
42436 for (i = 0; i < nelt; ++i)
42437 mask |= (d->perm[i] >= nelt) << i;
42438 break;
42439
42440 case V2DImode:
42441 for (i = 0; i < 2; ++i)
42442 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42443 vmode = V8HImode;
42444 goto do_subreg;
42445
42446 case V4SImode:
42447 for (i = 0; i < 4; ++i)
42448 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42449 vmode = V8HImode;
42450 goto do_subreg;
42451
42452 case V16QImode:
42453 /* See if bytes move in pairs so we can use pblendw with
42454 an immediate argument, rather than pblendvb with a vector
42455 argument. */
42456 for (i = 0; i < 16; i += 2)
42457 if (d->perm[i] + 1 != d->perm[i + 1])
42458 {
42459 use_pblendvb:
42460 for (i = 0; i < nelt; ++i)
42461 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42462
42463 finish_pblendvb:
42464 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42465 vperm = force_reg (vmode, vperm);
42466
42467 if (GET_MODE_SIZE (vmode) == 16)
42468 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42469 else
42470 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42471 if (target != d->target)
42472 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42473 return true;
42474 }
42475
42476 for (i = 0; i < 8; ++i)
42477 mask |= (d->perm[i * 2] >= 16) << i;
42478 vmode = V8HImode;
42479 /* FALLTHRU */
42480
42481 do_subreg:
42482 target = gen_reg_rtx (vmode);
42483 op0 = gen_lowpart (vmode, op0);
42484 op1 = gen_lowpart (vmode, op1);
42485 break;
42486
42487 case V32QImode:
42488 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42489 for (i = 0; i < 32; i += 2)
42490 if (d->perm[i] + 1 != d->perm[i + 1])
42491 goto use_pblendvb;
42492 /* See if bytes move in quadruplets. If yes, vpblendd
42493 with immediate can be used. */
42494 for (i = 0; i < 32; i += 4)
42495 if (d->perm[i] + 2 != d->perm[i + 2])
42496 break;
42497 if (i < 32)
42498 {
42499 /* See if bytes move the same in both lanes. If yes,
42500 vpblendw with immediate can be used. */
42501 for (i = 0; i < 16; i += 2)
42502 if (d->perm[i] + 16 != d->perm[i + 16])
42503 goto use_pblendvb;
42504
42505 /* Use vpblendw. */
42506 for (i = 0; i < 16; ++i)
42507 mask |= (d->perm[i * 2] >= 32) << i;
42508 vmode = V16HImode;
42509 goto do_subreg;
42510 }
42511
42512 /* Use vpblendd. */
42513 for (i = 0; i < 8; ++i)
42514 mask |= (d->perm[i * 4] >= 32) << i;
42515 vmode = V8SImode;
42516 goto do_subreg;
42517
42518 case V16HImode:
42519 /* See if words move in pairs. If yes, vpblendd can be used. */
42520 for (i = 0; i < 16; i += 2)
42521 if (d->perm[i] + 1 != d->perm[i + 1])
42522 break;
42523 if (i < 16)
42524 {
42525 /* See if words move the same in both lanes. If not,
42526 vpblendvb must be used. */
42527 for (i = 0; i < 8; i++)
42528 if (d->perm[i] + 8 != d->perm[i + 8])
42529 {
42530 /* Use vpblendvb. */
42531 for (i = 0; i < 32; ++i)
42532 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42533
42534 vmode = V32QImode;
42535 nelt = 32;
42536 target = gen_reg_rtx (vmode);
42537 op0 = gen_lowpart (vmode, op0);
42538 op1 = gen_lowpart (vmode, op1);
42539 goto finish_pblendvb;
42540 }
42541
42542 /* Use vpblendw. */
42543 for (i = 0; i < 16; ++i)
42544 mask |= (d->perm[i] >= 16) << i;
42545 break;
42546 }
42547
42548 /* Use vpblendd. */
42549 for (i = 0; i < 8; ++i)
42550 mask |= (d->perm[i * 2] >= 16) << i;
42551 vmode = V8SImode;
42552 goto do_subreg;
42553
42554 case V4DImode:
42555 /* Use vpblendd. */
42556 for (i = 0; i < 4; ++i)
42557 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42558 vmode = V8SImode;
42559 goto do_subreg;
42560
42561 default:
42562 gcc_unreachable ();
42563 }
42564
42565 /* This matches five different patterns with the different modes. */
42566 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42567 x = gen_rtx_SET (VOIDmode, target, x);
42568 emit_insn (x);
42569 if (target != d->target)
42570 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42571
42572 return true;
42573 }
42574
42575 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42576 in terms of the variable form of vpermilps.
42577
42578 Note that we will have already failed the immediate input vpermilps,
42579 which requires that the high and low part shuffle be identical; the
42580 variable form doesn't require that. */
42581
42582 static bool
42583 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42584 {
42585 rtx rperm[8], vperm;
42586 unsigned i;
42587
42588 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42589 return false;
42590
42591 /* We can only permute within the 128-bit lane. */
42592 for (i = 0; i < 8; ++i)
42593 {
42594 unsigned e = d->perm[i];
42595 if (i < 4 ? e >= 4 : e < 4)
42596 return false;
42597 }
42598
42599 if (d->testing_p)
42600 return true;
42601
42602 for (i = 0; i < 8; ++i)
42603 {
42604 unsigned e = d->perm[i];
42605
42606 /* Within each 128-bit lane, the elements of op0 are numbered
42607 from 0 and the elements of op1 are numbered from 4. */
42608 if (e >= 8 + 4)
42609 e -= 8;
42610 else if (e >= 4)
42611 e -= 4;
42612
42613 rperm[i] = GEN_INT (e);
42614 }
42615
42616 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42617 vperm = force_reg (V8SImode, vperm);
42618 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42619
42620 return true;
42621 }
42622
42623 /* Return true if permutation D can be performed as VMODE permutation
42624 instead. */
42625
42626 static bool
42627 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42628 {
42629 unsigned int i, j, chunk;
42630
42631 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42632 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42633 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42634 return false;
42635
42636 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42637 return true;
42638
42639 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42640 for (i = 0; i < d->nelt; i += chunk)
42641 if (d->perm[i] & (chunk - 1))
42642 return false;
42643 else
42644 for (j = 1; j < chunk; ++j)
42645 if (d->perm[i] + j != d->perm[i + j])
42646 return false;
42647
42648 return true;
42649 }
42650
42651 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42652 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42653
42654 static bool
42655 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42656 {
42657 unsigned i, nelt, eltsz, mask;
42658 unsigned char perm[32];
42659 enum machine_mode vmode = V16QImode;
42660 rtx rperm[32], vperm, target, op0, op1;
42661
42662 nelt = d->nelt;
42663
42664 if (!d->one_operand_p)
42665 {
42666 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42667 {
42668 if (TARGET_AVX2
42669 && valid_perm_using_mode_p (V2TImode, d))
42670 {
42671 if (d->testing_p)
42672 return true;
42673
42674 /* Use vperm2i128 insn. The pattern uses
42675 V4DImode instead of V2TImode. */
42676 target = d->target;
42677 if (d->vmode != V4DImode)
42678 target = gen_reg_rtx (V4DImode);
42679 op0 = gen_lowpart (V4DImode, d->op0);
42680 op1 = gen_lowpart (V4DImode, d->op1);
42681 rperm[0]
42682 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42683 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42684 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42685 if (target != d->target)
42686 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42687 return true;
42688 }
42689 return false;
42690 }
42691 }
42692 else
42693 {
42694 if (GET_MODE_SIZE (d->vmode) == 16)
42695 {
42696 if (!TARGET_SSSE3)
42697 return false;
42698 }
42699 else if (GET_MODE_SIZE (d->vmode) == 32)
42700 {
42701 if (!TARGET_AVX2)
42702 return false;
42703
42704 /* V4DImode should be already handled through
42705 expand_vselect by vpermq instruction. */
42706 gcc_assert (d->vmode != V4DImode);
42707
42708 vmode = V32QImode;
42709 if (d->vmode == V8SImode
42710 || d->vmode == V16HImode
42711 || d->vmode == V32QImode)
42712 {
42713 /* First see if vpermq can be used for
42714 V8SImode/V16HImode/V32QImode. */
42715 if (valid_perm_using_mode_p (V4DImode, d))
42716 {
42717 for (i = 0; i < 4; i++)
42718 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42719 if (d->testing_p)
42720 return true;
42721 target = gen_reg_rtx (V4DImode);
42722 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42723 perm, 4, false))
42724 {
42725 emit_move_insn (d->target,
42726 gen_lowpart (d->vmode, target));
42727 return true;
42728 }
42729 return false;
42730 }
42731
42732 /* Next see if vpermd can be used. */
42733 if (valid_perm_using_mode_p (V8SImode, d))
42734 vmode = V8SImode;
42735 }
42736 /* Or if vpermps can be used. */
42737 else if (d->vmode == V8SFmode)
42738 vmode = V8SImode;
42739
42740 if (vmode == V32QImode)
42741 {
42742 /* vpshufb only works intra lanes, it is not
42743 possible to shuffle bytes in between the lanes. */
42744 for (i = 0; i < nelt; ++i)
42745 if ((d->perm[i] ^ i) & (nelt / 2))
42746 return false;
42747 }
42748 }
42749 else
42750 return false;
42751 }
42752
42753 if (d->testing_p)
42754 return true;
42755
42756 if (vmode == V8SImode)
42757 for (i = 0; i < 8; ++i)
42758 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42759 else
42760 {
42761 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42762 if (!d->one_operand_p)
42763 mask = 2 * nelt - 1;
42764 else if (vmode == V16QImode)
42765 mask = nelt - 1;
42766 else
42767 mask = nelt / 2 - 1;
42768
42769 for (i = 0; i < nelt; ++i)
42770 {
42771 unsigned j, e = d->perm[i] & mask;
42772 for (j = 0; j < eltsz; ++j)
42773 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42774 }
42775 }
42776
42777 vperm = gen_rtx_CONST_VECTOR (vmode,
42778 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42779 vperm = force_reg (vmode, vperm);
42780
42781 target = d->target;
42782 if (d->vmode != vmode)
42783 target = gen_reg_rtx (vmode);
42784 op0 = gen_lowpart (vmode, d->op0);
42785 if (d->one_operand_p)
42786 {
42787 if (vmode == V16QImode)
42788 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42789 else if (vmode == V32QImode)
42790 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42791 else if (vmode == V8SFmode)
42792 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42793 else
42794 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42795 }
42796 else
42797 {
42798 op1 = gen_lowpart (vmode, d->op1);
42799 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42800 }
42801 if (target != d->target)
42802 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42803
42804 return true;
42805 }
42806
42807 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42808 in a single instruction. */
42809
42810 static bool
42811 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42812 {
42813 unsigned i, nelt = d->nelt;
42814 unsigned char perm2[MAX_VECT_LEN];
42815
42816 /* Check plain VEC_SELECT first, because AVX has instructions that could
42817 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42818 input where SEL+CONCAT may not. */
42819 if (d->one_operand_p)
42820 {
42821 int mask = nelt - 1;
42822 bool identity_perm = true;
42823 bool broadcast_perm = true;
42824
42825 for (i = 0; i < nelt; i++)
42826 {
42827 perm2[i] = d->perm[i] & mask;
42828 if (perm2[i] != i)
42829 identity_perm = false;
42830 if (perm2[i])
42831 broadcast_perm = false;
42832 }
42833
42834 if (identity_perm)
42835 {
42836 if (!d->testing_p)
42837 emit_move_insn (d->target, d->op0);
42838 return true;
42839 }
42840 else if (broadcast_perm && TARGET_AVX2)
42841 {
42842 /* Use vpbroadcast{b,w,d}. */
42843 rtx (*gen) (rtx, rtx) = NULL;
42844 switch (d->vmode)
42845 {
42846 case V32QImode:
42847 gen = gen_avx2_pbroadcastv32qi_1;
42848 break;
42849 case V16HImode:
42850 gen = gen_avx2_pbroadcastv16hi_1;
42851 break;
42852 case V8SImode:
42853 gen = gen_avx2_pbroadcastv8si_1;
42854 break;
42855 case V16QImode:
42856 gen = gen_avx2_pbroadcastv16qi;
42857 break;
42858 case V8HImode:
42859 gen = gen_avx2_pbroadcastv8hi;
42860 break;
42861 case V8SFmode:
42862 gen = gen_avx2_vec_dupv8sf_1;
42863 break;
42864 /* For other modes prefer other shuffles this function creates. */
42865 default: break;
42866 }
42867 if (gen != NULL)
42868 {
42869 if (!d->testing_p)
42870 emit_insn (gen (d->target, d->op0));
42871 return true;
42872 }
42873 }
42874
42875 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
42876 return true;
42877
42878 /* There are plenty of patterns in sse.md that are written for
42879 SEL+CONCAT and are not replicated for a single op. Perhaps
42880 that should be changed, to avoid the nastiness here. */
42881
42882 /* Recognize interleave style patterns, which means incrementing
42883 every other permutation operand. */
42884 for (i = 0; i < nelt; i += 2)
42885 {
42886 perm2[i] = d->perm[i] & mask;
42887 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
42888 }
42889 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
42890 d->testing_p))
42891 return true;
42892
42893 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
42894 if (nelt >= 4)
42895 {
42896 for (i = 0; i < nelt; i += 4)
42897 {
42898 perm2[i + 0] = d->perm[i + 0] & mask;
42899 perm2[i + 1] = d->perm[i + 1] & mask;
42900 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
42901 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
42902 }
42903
42904 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
42905 d->testing_p))
42906 return true;
42907 }
42908 }
42909
42910 /* Finally, try the fully general two operand permute. */
42911 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
42912 d->testing_p))
42913 return true;
42914
42915 /* Recognize interleave style patterns with reversed operands. */
42916 if (!d->one_operand_p)
42917 {
42918 for (i = 0; i < nelt; ++i)
42919 {
42920 unsigned e = d->perm[i];
42921 if (e >= nelt)
42922 e -= nelt;
42923 else
42924 e += nelt;
42925 perm2[i] = e;
42926 }
42927
42928 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
42929 d->testing_p))
42930 return true;
42931 }
42932
42933 /* Try the SSE4.1 blend variable merge instructions. */
42934 if (expand_vec_perm_blend (d))
42935 return true;
42936
42937 /* Try one of the AVX vpermil variable permutations. */
42938 if (expand_vec_perm_vpermil (d))
42939 return true;
42940
42941 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
42942 vpshufb, vpermd, vpermps or vpermq variable permutation. */
42943 if (expand_vec_perm_pshufb (d))
42944 return true;
42945
42946 /* Try the AVX512F vpermi2 instructions. */
42947 rtx vec[64];
42948 enum machine_mode mode = d->vmode;
42949 if (mode == V8DFmode)
42950 mode = V8DImode;
42951 else if (mode == V16SFmode)
42952 mode = V16SImode;
42953 for (i = 0; i < nelt; ++i)
42954 vec[i] = GEN_INT (d->perm[i]);
42955 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
42956 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
42957 return true;
42958
42959 return false;
42960 }
42961
42962 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42963 in terms of a pair of pshuflw + pshufhw instructions. */
42964
42965 static bool
42966 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
42967 {
42968 unsigned char perm2[MAX_VECT_LEN];
42969 unsigned i;
42970 bool ok;
42971
42972 if (d->vmode != V8HImode || !d->one_operand_p)
42973 return false;
42974
42975 /* The two permutations only operate in 64-bit lanes. */
42976 for (i = 0; i < 4; ++i)
42977 if (d->perm[i] >= 4)
42978 return false;
42979 for (i = 4; i < 8; ++i)
42980 if (d->perm[i] < 4)
42981 return false;
42982
42983 if (d->testing_p)
42984 return true;
42985
42986 /* Emit the pshuflw. */
42987 memcpy (perm2, d->perm, 4);
42988 for (i = 4; i < 8; ++i)
42989 perm2[i] = i;
42990 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
42991 gcc_assert (ok);
42992
42993 /* Emit the pshufhw. */
42994 memcpy (perm2 + 4, d->perm + 4, 4);
42995 for (i = 0; i < 4; ++i)
42996 perm2[i] = i;
42997 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
42998 gcc_assert (ok);
42999
43000 return true;
43001 }
43002
43003 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43004 the permutation using the SSSE3 palignr instruction. This succeeds
43005 when all of the elements in PERM fit within one vector and we merely
43006 need to shift them down so that a single vector permutation has a
43007 chance to succeed. */
43008
43009 static bool
43010 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43011 {
43012 unsigned i, nelt = d->nelt;
43013 unsigned min, max;
43014 bool in_order, ok;
43015 rtx shift, target;
43016 struct expand_vec_perm_d dcopy;
43017
43018 /* Even with AVX, palignr only operates on 128-bit vectors. */
43019 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43020 return false;
43021
43022 min = nelt, max = 0;
43023 for (i = 0; i < nelt; ++i)
43024 {
43025 unsigned e = d->perm[i];
43026 if (e < min)
43027 min = e;
43028 if (e > max)
43029 max = e;
43030 }
43031 if (min == 0 || max - min >= nelt)
43032 return false;
43033
43034 /* Given that we have SSSE3, we know we'll be able to implement the
43035 single operand permutation after the palignr with pshufb. */
43036 if (d->testing_p)
43037 return true;
43038
43039 dcopy = *d;
43040 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43041 target = gen_reg_rtx (TImode);
43042 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43043 gen_lowpart (TImode, d->op0), shift));
43044
43045 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43046 dcopy.one_operand_p = true;
43047
43048 in_order = true;
43049 for (i = 0; i < nelt; ++i)
43050 {
43051 unsigned e = dcopy.perm[i] - min;
43052 if (e != i)
43053 in_order = false;
43054 dcopy.perm[i] = e;
43055 }
43056
43057 /* Test for the degenerate case where the alignment by itself
43058 produces the desired permutation. */
43059 if (in_order)
43060 {
43061 emit_move_insn (d->target, dcopy.op0);
43062 return true;
43063 }
43064
43065 ok = expand_vec_perm_1 (&dcopy);
43066 gcc_assert (ok);
43067
43068 return ok;
43069 }
43070
43071 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43072
43073 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43074 a two vector permutation into a single vector permutation by using
43075 an interleave operation to merge the vectors. */
43076
43077 static bool
43078 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43079 {
43080 struct expand_vec_perm_d dremap, dfinal;
43081 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43082 unsigned HOST_WIDE_INT contents;
43083 unsigned char remap[2 * MAX_VECT_LEN];
43084 rtx seq;
43085 bool ok, same_halves = false;
43086
43087 if (GET_MODE_SIZE (d->vmode) == 16)
43088 {
43089 if (d->one_operand_p)
43090 return false;
43091 }
43092 else if (GET_MODE_SIZE (d->vmode) == 32)
43093 {
43094 if (!TARGET_AVX)
43095 return false;
43096 /* For 32-byte modes allow even d->one_operand_p.
43097 The lack of cross-lane shuffling in some instructions
43098 might prevent a single insn shuffle. */
43099 dfinal = *d;
43100 dfinal.testing_p = true;
43101 /* If expand_vec_perm_interleave3 can expand this into
43102 a 3 insn sequence, give up and let it be expanded as
43103 3 insn sequence. While that is one insn longer,
43104 it doesn't need a memory operand and in the common
43105 case that both interleave low and high permutations
43106 with the same operands are adjacent needs 4 insns
43107 for both after CSE. */
43108 if (expand_vec_perm_interleave3 (&dfinal))
43109 return false;
43110 }
43111 else
43112 return false;
43113
43114 /* Examine from whence the elements come. */
43115 contents = 0;
43116 for (i = 0; i < nelt; ++i)
43117 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43118
43119 memset (remap, 0xff, sizeof (remap));
43120 dremap = *d;
43121
43122 if (GET_MODE_SIZE (d->vmode) == 16)
43123 {
43124 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43125
43126 /* Split the two input vectors into 4 halves. */
43127 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43128 h2 = h1 << nelt2;
43129 h3 = h2 << nelt2;
43130 h4 = h3 << nelt2;
43131
43132 /* If the elements from the low halves use interleave low, and similarly
43133 for interleave high. If the elements are from mis-matched halves, we
43134 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43135 if ((contents & (h1 | h3)) == contents)
43136 {
43137 /* punpckl* */
43138 for (i = 0; i < nelt2; ++i)
43139 {
43140 remap[i] = i * 2;
43141 remap[i + nelt] = i * 2 + 1;
43142 dremap.perm[i * 2] = i;
43143 dremap.perm[i * 2 + 1] = i + nelt;
43144 }
43145 if (!TARGET_SSE2 && d->vmode == V4SImode)
43146 dremap.vmode = V4SFmode;
43147 }
43148 else if ((contents & (h2 | h4)) == contents)
43149 {
43150 /* punpckh* */
43151 for (i = 0; i < nelt2; ++i)
43152 {
43153 remap[i + nelt2] = i * 2;
43154 remap[i + nelt + nelt2] = i * 2 + 1;
43155 dremap.perm[i * 2] = i + nelt2;
43156 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43157 }
43158 if (!TARGET_SSE2 && d->vmode == V4SImode)
43159 dremap.vmode = V4SFmode;
43160 }
43161 else if ((contents & (h1 | h4)) == contents)
43162 {
43163 /* shufps */
43164 for (i = 0; i < nelt2; ++i)
43165 {
43166 remap[i] = i;
43167 remap[i + nelt + nelt2] = i + nelt2;
43168 dremap.perm[i] = i;
43169 dremap.perm[i + nelt2] = i + nelt + nelt2;
43170 }
43171 if (nelt != 4)
43172 {
43173 /* shufpd */
43174 dremap.vmode = V2DImode;
43175 dremap.nelt = 2;
43176 dremap.perm[0] = 0;
43177 dremap.perm[1] = 3;
43178 }
43179 }
43180 else if ((contents & (h2 | h3)) == contents)
43181 {
43182 /* shufps */
43183 for (i = 0; i < nelt2; ++i)
43184 {
43185 remap[i + nelt2] = i;
43186 remap[i + nelt] = i + nelt2;
43187 dremap.perm[i] = i + nelt2;
43188 dremap.perm[i + nelt2] = i + nelt;
43189 }
43190 if (nelt != 4)
43191 {
43192 /* shufpd */
43193 dremap.vmode = V2DImode;
43194 dremap.nelt = 2;
43195 dremap.perm[0] = 1;
43196 dremap.perm[1] = 2;
43197 }
43198 }
43199 else
43200 return false;
43201 }
43202 else
43203 {
43204 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43205 unsigned HOST_WIDE_INT q[8];
43206 unsigned int nonzero_halves[4];
43207
43208 /* Split the two input vectors into 8 quarters. */
43209 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43210 for (i = 1; i < 8; ++i)
43211 q[i] = q[0] << (nelt4 * i);
43212 for (i = 0; i < 4; ++i)
43213 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43214 {
43215 nonzero_halves[nzcnt] = i;
43216 ++nzcnt;
43217 }
43218
43219 if (nzcnt == 1)
43220 {
43221 gcc_assert (d->one_operand_p);
43222 nonzero_halves[1] = nonzero_halves[0];
43223 same_halves = true;
43224 }
43225 else if (d->one_operand_p)
43226 {
43227 gcc_assert (nonzero_halves[0] == 0);
43228 gcc_assert (nonzero_halves[1] == 1);
43229 }
43230
43231 if (nzcnt <= 2)
43232 {
43233 if (d->perm[0] / nelt2 == nonzero_halves[1])
43234 {
43235 /* Attempt to increase the likelihood that dfinal
43236 shuffle will be intra-lane. */
43237 char tmph = nonzero_halves[0];
43238 nonzero_halves[0] = nonzero_halves[1];
43239 nonzero_halves[1] = tmph;
43240 }
43241
43242 /* vperm2f128 or vperm2i128. */
43243 for (i = 0; i < nelt2; ++i)
43244 {
43245 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43246 remap[i + nonzero_halves[0] * nelt2] = i;
43247 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43248 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43249 }
43250
43251 if (d->vmode != V8SFmode
43252 && d->vmode != V4DFmode
43253 && d->vmode != V8SImode)
43254 {
43255 dremap.vmode = V8SImode;
43256 dremap.nelt = 8;
43257 for (i = 0; i < 4; ++i)
43258 {
43259 dremap.perm[i] = i + nonzero_halves[0] * 4;
43260 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43261 }
43262 }
43263 }
43264 else if (d->one_operand_p)
43265 return false;
43266 else if (TARGET_AVX2
43267 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43268 {
43269 /* vpunpckl* */
43270 for (i = 0; i < nelt4; ++i)
43271 {
43272 remap[i] = i * 2;
43273 remap[i + nelt] = i * 2 + 1;
43274 remap[i + nelt2] = i * 2 + nelt2;
43275 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43276 dremap.perm[i * 2] = i;
43277 dremap.perm[i * 2 + 1] = i + nelt;
43278 dremap.perm[i * 2 + nelt2] = i + nelt2;
43279 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43280 }
43281 }
43282 else if (TARGET_AVX2
43283 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43284 {
43285 /* vpunpckh* */
43286 for (i = 0; i < nelt4; ++i)
43287 {
43288 remap[i + nelt4] = i * 2;
43289 remap[i + nelt + nelt4] = i * 2 + 1;
43290 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43291 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43292 dremap.perm[i * 2] = i + nelt4;
43293 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43294 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43295 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43296 }
43297 }
43298 else
43299 return false;
43300 }
43301
43302 /* Use the remapping array set up above to move the elements from their
43303 swizzled locations into their final destinations. */
43304 dfinal = *d;
43305 for (i = 0; i < nelt; ++i)
43306 {
43307 unsigned e = remap[d->perm[i]];
43308 gcc_assert (e < nelt);
43309 /* If same_halves is true, both halves of the remapped vector are the
43310 same. Avoid cross-lane accesses if possible. */
43311 if (same_halves && i >= nelt2)
43312 {
43313 gcc_assert (e < nelt2);
43314 dfinal.perm[i] = e + nelt2;
43315 }
43316 else
43317 dfinal.perm[i] = e;
43318 }
43319 if (!d->testing_p)
43320 {
43321 dremap.target = gen_reg_rtx (dremap.vmode);
43322 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43323 }
43324 dfinal.op1 = dfinal.op0;
43325 dfinal.one_operand_p = true;
43326
43327 /* Test if the final remap can be done with a single insn. For V4SFmode or
43328 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43329 start_sequence ();
43330 ok = expand_vec_perm_1 (&dfinal);
43331 seq = get_insns ();
43332 end_sequence ();
43333
43334 if (!ok)
43335 return false;
43336
43337 if (d->testing_p)
43338 return true;
43339
43340 if (dremap.vmode != dfinal.vmode)
43341 {
43342 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43343 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43344 }
43345
43346 ok = expand_vec_perm_1 (&dremap);
43347 gcc_assert (ok);
43348
43349 emit_insn (seq);
43350 return true;
43351 }
43352
43353 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43354 a single vector cross-lane permutation into vpermq followed
43355 by any of the single insn permutations. */
43356
43357 static bool
43358 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43359 {
43360 struct expand_vec_perm_d dremap, dfinal;
43361 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43362 unsigned contents[2];
43363 bool ok;
43364
43365 if (!(TARGET_AVX2
43366 && (d->vmode == V32QImode || d->vmode == V16HImode)
43367 && d->one_operand_p))
43368 return false;
43369
43370 contents[0] = 0;
43371 contents[1] = 0;
43372 for (i = 0; i < nelt2; ++i)
43373 {
43374 contents[0] |= 1u << (d->perm[i] / nelt4);
43375 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43376 }
43377
43378 for (i = 0; i < 2; ++i)
43379 {
43380 unsigned int cnt = 0;
43381 for (j = 0; j < 4; ++j)
43382 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43383 return false;
43384 }
43385
43386 if (d->testing_p)
43387 return true;
43388
43389 dremap = *d;
43390 dremap.vmode = V4DImode;
43391 dremap.nelt = 4;
43392 dremap.target = gen_reg_rtx (V4DImode);
43393 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43394 dremap.op1 = dremap.op0;
43395 dremap.one_operand_p = true;
43396 for (i = 0; i < 2; ++i)
43397 {
43398 unsigned int cnt = 0;
43399 for (j = 0; j < 4; ++j)
43400 if ((contents[i] & (1u << j)) != 0)
43401 dremap.perm[2 * i + cnt++] = j;
43402 for (; cnt < 2; ++cnt)
43403 dremap.perm[2 * i + cnt] = 0;
43404 }
43405
43406 dfinal = *d;
43407 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43408 dfinal.op1 = dfinal.op0;
43409 dfinal.one_operand_p = true;
43410 for (i = 0, j = 0; i < nelt; ++i)
43411 {
43412 if (i == nelt2)
43413 j = 2;
43414 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43415 if ((d->perm[i] / nelt4) == dremap.perm[j])
43416 ;
43417 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43418 dfinal.perm[i] |= nelt4;
43419 else
43420 gcc_unreachable ();
43421 }
43422
43423 ok = expand_vec_perm_1 (&dremap);
43424 gcc_assert (ok);
43425
43426 ok = expand_vec_perm_1 (&dfinal);
43427 gcc_assert (ok);
43428
43429 return true;
43430 }
43431
43432 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43433 a vector permutation using two instructions, vperm2f128 resp.
43434 vperm2i128 followed by any single in-lane permutation. */
43435
43436 static bool
43437 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43438 {
43439 struct expand_vec_perm_d dfirst, dsecond;
43440 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43441 bool ok;
43442
43443 if (!TARGET_AVX
43444 || GET_MODE_SIZE (d->vmode) != 32
43445 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43446 return false;
43447
43448 dsecond = *d;
43449 dsecond.one_operand_p = false;
43450 dsecond.testing_p = true;
43451
43452 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43453 immediate. For perm < 16 the second permutation uses
43454 d->op0 as first operand, for perm >= 16 it uses d->op1
43455 as first operand. The second operand is the result of
43456 vperm2[fi]128. */
43457 for (perm = 0; perm < 32; perm++)
43458 {
43459 /* Ignore permutations which do not move anything cross-lane. */
43460 if (perm < 16)
43461 {
43462 /* The second shuffle for e.g. V4DFmode has
43463 0123 and ABCD operands.
43464 Ignore AB23, as 23 is already in the second lane
43465 of the first operand. */
43466 if ((perm & 0xc) == (1 << 2)) continue;
43467 /* And 01CD, as 01 is in the first lane of the first
43468 operand. */
43469 if ((perm & 3) == 0) continue;
43470 /* And 4567, as then the vperm2[fi]128 doesn't change
43471 anything on the original 4567 second operand. */
43472 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43473 }
43474 else
43475 {
43476 /* The second shuffle for e.g. V4DFmode has
43477 4567 and ABCD operands.
43478 Ignore AB67, as 67 is already in the second lane
43479 of the first operand. */
43480 if ((perm & 0xc) == (3 << 2)) continue;
43481 /* And 45CD, as 45 is in the first lane of the first
43482 operand. */
43483 if ((perm & 3) == 2) continue;
43484 /* And 0123, as then the vperm2[fi]128 doesn't change
43485 anything on the original 0123 first operand. */
43486 if ((perm & 0xf) == (1 << 2)) continue;
43487 }
43488
43489 for (i = 0; i < nelt; i++)
43490 {
43491 j = d->perm[i] / nelt2;
43492 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43493 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43494 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43495 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43496 else
43497 break;
43498 }
43499
43500 if (i == nelt)
43501 {
43502 start_sequence ();
43503 ok = expand_vec_perm_1 (&dsecond);
43504 end_sequence ();
43505 }
43506 else
43507 ok = false;
43508
43509 if (ok)
43510 {
43511 if (d->testing_p)
43512 return true;
43513
43514 /* Found a usable second shuffle. dfirst will be
43515 vperm2f128 on d->op0 and d->op1. */
43516 dsecond.testing_p = false;
43517 dfirst = *d;
43518 dfirst.target = gen_reg_rtx (d->vmode);
43519 for (i = 0; i < nelt; i++)
43520 dfirst.perm[i] = (i & (nelt2 - 1))
43521 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43522
43523 ok = expand_vec_perm_1 (&dfirst);
43524 gcc_assert (ok);
43525
43526 /* And dsecond is some single insn shuffle, taking
43527 d->op0 and result of vperm2f128 (if perm < 16) or
43528 d->op1 and result of vperm2f128 (otherwise). */
43529 dsecond.op1 = dfirst.target;
43530 if (perm >= 16)
43531 dsecond.op0 = dfirst.op1;
43532
43533 ok = expand_vec_perm_1 (&dsecond);
43534 gcc_assert (ok);
43535
43536 return true;
43537 }
43538
43539 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43540 if (d->one_operand_p)
43541 return false;
43542 }
43543
43544 return false;
43545 }
43546
43547 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43548 a two vector permutation using 2 intra-lane interleave insns
43549 and cross-lane shuffle for 32-byte vectors. */
43550
43551 static bool
43552 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43553 {
43554 unsigned i, nelt;
43555 rtx (*gen) (rtx, rtx, rtx);
43556
43557 if (d->one_operand_p)
43558 return false;
43559 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43560 ;
43561 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43562 ;
43563 else
43564 return false;
43565
43566 nelt = d->nelt;
43567 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43568 return false;
43569 for (i = 0; i < nelt; i += 2)
43570 if (d->perm[i] != d->perm[0] + i / 2
43571 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43572 return false;
43573
43574 if (d->testing_p)
43575 return true;
43576
43577 switch (d->vmode)
43578 {
43579 case V32QImode:
43580 if (d->perm[0])
43581 gen = gen_vec_interleave_highv32qi;
43582 else
43583 gen = gen_vec_interleave_lowv32qi;
43584 break;
43585 case V16HImode:
43586 if (d->perm[0])
43587 gen = gen_vec_interleave_highv16hi;
43588 else
43589 gen = gen_vec_interleave_lowv16hi;
43590 break;
43591 case V8SImode:
43592 if (d->perm[0])
43593 gen = gen_vec_interleave_highv8si;
43594 else
43595 gen = gen_vec_interleave_lowv8si;
43596 break;
43597 case V4DImode:
43598 if (d->perm[0])
43599 gen = gen_vec_interleave_highv4di;
43600 else
43601 gen = gen_vec_interleave_lowv4di;
43602 break;
43603 case V8SFmode:
43604 if (d->perm[0])
43605 gen = gen_vec_interleave_highv8sf;
43606 else
43607 gen = gen_vec_interleave_lowv8sf;
43608 break;
43609 case V4DFmode:
43610 if (d->perm[0])
43611 gen = gen_vec_interleave_highv4df;
43612 else
43613 gen = gen_vec_interleave_lowv4df;
43614 break;
43615 default:
43616 gcc_unreachable ();
43617 }
43618
43619 emit_insn (gen (d->target, d->op0, d->op1));
43620 return true;
43621 }
43622
43623 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43624 a single vector permutation using a single intra-lane vector
43625 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43626 the non-swapped and swapped vectors together. */
43627
43628 static bool
43629 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43630 {
43631 struct expand_vec_perm_d dfirst, dsecond;
43632 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43633 rtx seq;
43634 bool ok;
43635 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43636
43637 if (!TARGET_AVX
43638 || TARGET_AVX2
43639 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43640 || !d->one_operand_p)
43641 return false;
43642
43643 dfirst = *d;
43644 for (i = 0; i < nelt; i++)
43645 dfirst.perm[i] = 0xff;
43646 for (i = 0, msk = 0; i < nelt; i++)
43647 {
43648 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43649 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43650 return false;
43651 dfirst.perm[j] = d->perm[i];
43652 if (j != i)
43653 msk |= (1 << i);
43654 }
43655 for (i = 0; i < nelt; i++)
43656 if (dfirst.perm[i] == 0xff)
43657 dfirst.perm[i] = i;
43658
43659 if (!d->testing_p)
43660 dfirst.target = gen_reg_rtx (dfirst.vmode);
43661
43662 start_sequence ();
43663 ok = expand_vec_perm_1 (&dfirst);
43664 seq = get_insns ();
43665 end_sequence ();
43666
43667 if (!ok)
43668 return false;
43669
43670 if (d->testing_p)
43671 return true;
43672
43673 emit_insn (seq);
43674
43675 dsecond = *d;
43676 dsecond.op0 = dfirst.target;
43677 dsecond.op1 = dfirst.target;
43678 dsecond.one_operand_p = true;
43679 dsecond.target = gen_reg_rtx (dsecond.vmode);
43680 for (i = 0; i < nelt; i++)
43681 dsecond.perm[i] = i ^ nelt2;
43682
43683 ok = expand_vec_perm_1 (&dsecond);
43684 gcc_assert (ok);
43685
43686 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43687 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43688 return true;
43689 }
43690
43691 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43692 permutation using two vperm2f128, followed by a vshufpd insn blending
43693 the two vectors together. */
43694
43695 static bool
43696 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43697 {
43698 struct expand_vec_perm_d dfirst, dsecond, dthird;
43699 bool ok;
43700
43701 if (!TARGET_AVX || (d->vmode != V4DFmode))
43702 return false;
43703
43704 if (d->testing_p)
43705 return true;
43706
43707 dfirst = *d;
43708 dsecond = *d;
43709 dthird = *d;
43710
43711 dfirst.perm[0] = (d->perm[0] & ~1);
43712 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43713 dfirst.perm[2] = (d->perm[2] & ~1);
43714 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43715 dsecond.perm[0] = (d->perm[1] & ~1);
43716 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43717 dsecond.perm[2] = (d->perm[3] & ~1);
43718 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43719 dthird.perm[0] = (d->perm[0] % 2);
43720 dthird.perm[1] = (d->perm[1] % 2) + 4;
43721 dthird.perm[2] = (d->perm[2] % 2) + 2;
43722 dthird.perm[3] = (d->perm[3] % 2) + 6;
43723
43724 dfirst.target = gen_reg_rtx (dfirst.vmode);
43725 dsecond.target = gen_reg_rtx (dsecond.vmode);
43726 dthird.op0 = dfirst.target;
43727 dthird.op1 = dsecond.target;
43728 dthird.one_operand_p = false;
43729
43730 canonicalize_perm (&dfirst);
43731 canonicalize_perm (&dsecond);
43732
43733 ok = expand_vec_perm_1 (&dfirst)
43734 && expand_vec_perm_1 (&dsecond)
43735 && expand_vec_perm_1 (&dthird);
43736
43737 gcc_assert (ok);
43738
43739 return true;
43740 }
43741
43742 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43743 permutation with two pshufb insns and an ior. We should have already
43744 failed all two instruction sequences. */
43745
43746 static bool
43747 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43748 {
43749 rtx rperm[2][16], vperm, l, h, op, m128;
43750 unsigned int i, nelt, eltsz;
43751
43752 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43753 return false;
43754 gcc_assert (!d->one_operand_p);
43755
43756 if (d->testing_p)
43757 return true;
43758
43759 nelt = d->nelt;
43760 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43761
43762 /* Generate two permutation masks. If the required element is within
43763 the given vector it is shuffled into the proper lane. If the required
43764 element is in the other vector, force a zero into the lane by setting
43765 bit 7 in the permutation mask. */
43766 m128 = GEN_INT (-128);
43767 for (i = 0; i < nelt; ++i)
43768 {
43769 unsigned j, e = d->perm[i];
43770 unsigned which = (e >= nelt);
43771 if (e >= nelt)
43772 e -= nelt;
43773
43774 for (j = 0; j < eltsz; ++j)
43775 {
43776 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43777 rperm[1-which][i*eltsz + j] = m128;
43778 }
43779 }
43780
43781 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43782 vperm = force_reg (V16QImode, vperm);
43783
43784 l = gen_reg_rtx (V16QImode);
43785 op = gen_lowpart (V16QImode, d->op0);
43786 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43787
43788 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43789 vperm = force_reg (V16QImode, vperm);
43790
43791 h = gen_reg_rtx (V16QImode);
43792 op = gen_lowpart (V16QImode, d->op1);
43793 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43794
43795 op = d->target;
43796 if (d->vmode != V16QImode)
43797 op = gen_reg_rtx (V16QImode);
43798 emit_insn (gen_iorv16qi3 (op, l, h));
43799 if (op != d->target)
43800 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43801
43802 return true;
43803 }
43804
43805 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
43806 with two vpshufb insns, vpermq and vpor. We should have already failed
43807 all two or three instruction sequences. */
43808
43809 static bool
43810 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
43811 {
43812 rtx rperm[2][32], vperm, l, h, hp, op, m128;
43813 unsigned int i, nelt, eltsz;
43814
43815 if (!TARGET_AVX2
43816 || !d->one_operand_p
43817 || (d->vmode != V32QImode && d->vmode != V16HImode))
43818 return false;
43819
43820 if (d->testing_p)
43821 return true;
43822
43823 nelt = d->nelt;
43824 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43825
43826 /* Generate two permutation masks. If the required element is within
43827 the same lane, it is shuffled in. If the required element from the
43828 other lane, force a zero by setting bit 7 in the permutation mask.
43829 In the other mask the mask has non-negative elements if element
43830 is requested from the other lane, but also moved to the other lane,
43831 so that the result of vpshufb can have the two V2TImode halves
43832 swapped. */
43833 m128 = GEN_INT (-128);
43834 for (i = 0; i < nelt; ++i)
43835 {
43836 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43837 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
43838
43839 for (j = 0; j < eltsz; ++j)
43840 {
43841 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
43842 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
43843 }
43844 }
43845
43846 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43847 vperm = force_reg (V32QImode, vperm);
43848
43849 h = gen_reg_rtx (V32QImode);
43850 op = gen_lowpart (V32QImode, d->op0);
43851 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43852
43853 /* Swap the 128-byte lanes of h into hp. */
43854 hp = gen_reg_rtx (V4DImode);
43855 op = gen_lowpart (V4DImode, h);
43856 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
43857 const1_rtx));
43858
43859 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43860 vperm = force_reg (V32QImode, vperm);
43861
43862 l = gen_reg_rtx (V32QImode);
43863 op = gen_lowpart (V32QImode, d->op0);
43864 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
43865
43866 op = d->target;
43867 if (d->vmode != V32QImode)
43868 op = gen_reg_rtx (V32QImode);
43869 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
43870 if (op != d->target)
43871 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43872
43873 return true;
43874 }
43875
43876 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
43877 and extract-odd permutations of two V32QImode and V16QImode operand
43878 with two vpshufb insns, vpor and vpermq. We should have already
43879 failed all two or three instruction sequences. */
43880
43881 static bool
43882 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
43883 {
43884 rtx rperm[2][32], vperm, l, h, ior, op, m128;
43885 unsigned int i, nelt, eltsz;
43886
43887 if (!TARGET_AVX2
43888 || d->one_operand_p
43889 || (d->vmode != V32QImode && d->vmode != V16HImode))
43890 return false;
43891
43892 for (i = 0; i < d->nelt; ++i)
43893 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
43894 return false;
43895
43896 if (d->testing_p)
43897 return true;
43898
43899 nelt = d->nelt;
43900 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43901
43902 /* Generate two permutation masks. In the first permutation mask
43903 the first quarter will contain indexes for the first half
43904 of the op0, the second quarter will contain bit 7 set, third quarter
43905 will contain indexes for the second half of the op0 and the
43906 last quarter bit 7 set. In the second permutation mask
43907 the first quarter will contain bit 7 set, the second quarter
43908 indexes for the first half of the op1, the third quarter bit 7 set
43909 and last quarter indexes for the second half of the op1.
43910 I.e. the first mask e.g. for V32QImode extract even will be:
43911 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
43912 (all values masked with 0xf except for -128) and second mask
43913 for extract even will be
43914 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
43915 m128 = GEN_INT (-128);
43916 for (i = 0; i < nelt; ++i)
43917 {
43918 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43919 unsigned which = d->perm[i] >= nelt;
43920 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
43921
43922 for (j = 0; j < eltsz; ++j)
43923 {
43924 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
43925 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
43926 }
43927 }
43928
43929 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43930 vperm = force_reg (V32QImode, vperm);
43931
43932 l = gen_reg_rtx (V32QImode);
43933 op = gen_lowpart (V32QImode, d->op0);
43934 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
43935
43936 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43937 vperm = force_reg (V32QImode, vperm);
43938
43939 h = gen_reg_rtx (V32QImode);
43940 op = gen_lowpart (V32QImode, d->op1);
43941 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43942
43943 ior = gen_reg_rtx (V32QImode);
43944 emit_insn (gen_iorv32qi3 (ior, l, h));
43945
43946 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
43947 op = gen_reg_rtx (V4DImode);
43948 ior = gen_lowpart (V4DImode, ior);
43949 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
43950 const1_rtx, GEN_INT (3)));
43951 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43952
43953 return true;
43954 }
43955
43956 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
43957 and extract-odd permutations. */
43958
43959 static bool
43960 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
43961 {
43962 rtx t1, t2, t3, t4, t5;
43963
43964 switch (d->vmode)
43965 {
43966 case V4DFmode:
43967 if (d->testing_p)
43968 break;
43969 t1 = gen_reg_rtx (V4DFmode);
43970 t2 = gen_reg_rtx (V4DFmode);
43971
43972 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
43973 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
43974 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
43975
43976 /* Now an unpck[lh]pd will produce the result required. */
43977 if (odd)
43978 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
43979 else
43980 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
43981 emit_insn (t3);
43982 break;
43983
43984 case V8SFmode:
43985 {
43986 int mask = odd ? 0xdd : 0x88;
43987
43988 if (d->testing_p)
43989 break;
43990 t1 = gen_reg_rtx (V8SFmode);
43991 t2 = gen_reg_rtx (V8SFmode);
43992 t3 = gen_reg_rtx (V8SFmode);
43993
43994 /* Shuffle within the 128-bit lanes to produce:
43995 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
43996 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
43997 GEN_INT (mask)));
43998
43999 /* Shuffle the lanes around to produce:
44000 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44001 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44002 GEN_INT (0x3)));
44003
44004 /* Shuffle within the 128-bit lanes to produce:
44005 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44006 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44007
44008 /* Shuffle within the 128-bit lanes to produce:
44009 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44010 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44011
44012 /* Shuffle the lanes around to produce:
44013 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44014 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44015 GEN_INT (0x20)));
44016 }
44017 break;
44018
44019 case V2DFmode:
44020 case V4SFmode:
44021 case V2DImode:
44022 case V4SImode:
44023 /* These are always directly implementable by expand_vec_perm_1. */
44024 gcc_unreachable ();
44025
44026 case V8HImode:
44027 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44028 return expand_vec_perm_pshufb2 (d);
44029 else
44030 {
44031 if (d->testing_p)
44032 break;
44033 /* We need 2*log2(N)-1 operations to achieve odd/even
44034 with interleave. */
44035 t1 = gen_reg_rtx (V8HImode);
44036 t2 = gen_reg_rtx (V8HImode);
44037 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44038 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44039 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44040 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44041 if (odd)
44042 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44043 else
44044 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44045 emit_insn (t3);
44046 }
44047 break;
44048
44049 case V16QImode:
44050 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44051 return expand_vec_perm_pshufb2 (d);
44052 else
44053 {
44054 if (d->testing_p)
44055 break;
44056 t1 = gen_reg_rtx (V16QImode);
44057 t2 = gen_reg_rtx (V16QImode);
44058 t3 = gen_reg_rtx (V16QImode);
44059 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44060 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44061 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44062 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44063 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44064 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44065 if (odd)
44066 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44067 else
44068 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44069 emit_insn (t3);
44070 }
44071 break;
44072
44073 case V16HImode:
44074 case V32QImode:
44075 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44076
44077 case V4DImode:
44078 if (!TARGET_AVX2)
44079 {
44080 struct expand_vec_perm_d d_copy = *d;
44081 d_copy.vmode = V4DFmode;
44082 if (d->testing_p)
44083 d_copy.target = gen_lowpart (V4DFmode, d->target);
44084 else
44085 d_copy.target = gen_reg_rtx (V4DFmode);
44086 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44087 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44088 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44089 {
44090 if (!d->testing_p)
44091 emit_move_insn (d->target,
44092 gen_lowpart (V4DImode, d_copy.target));
44093 return true;
44094 }
44095 return false;
44096 }
44097
44098 if (d->testing_p)
44099 break;
44100
44101 t1 = gen_reg_rtx (V4DImode);
44102 t2 = gen_reg_rtx (V4DImode);
44103
44104 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44105 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44106 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44107
44108 /* Now an vpunpck[lh]qdq will produce the result required. */
44109 if (odd)
44110 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44111 else
44112 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44113 emit_insn (t3);
44114 break;
44115
44116 case V8SImode:
44117 if (!TARGET_AVX2)
44118 {
44119 struct expand_vec_perm_d d_copy = *d;
44120 d_copy.vmode = V8SFmode;
44121 if (d->testing_p)
44122 d_copy.target = gen_lowpart (V8SFmode, d->target);
44123 else
44124 d_copy.target = gen_reg_rtx (V8SFmode);
44125 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44126 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44127 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44128 {
44129 if (!d->testing_p)
44130 emit_move_insn (d->target,
44131 gen_lowpart (V8SImode, d_copy.target));
44132 return true;
44133 }
44134 return false;
44135 }
44136
44137 if (d->testing_p)
44138 break;
44139
44140 t1 = gen_reg_rtx (V8SImode);
44141 t2 = gen_reg_rtx (V8SImode);
44142 t3 = gen_reg_rtx (V4DImode);
44143 t4 = gen_reg_rtx (V4DImode);
44144 t5 = gen_reg_rtx (V4DImode);
44145
44146 /* Shuffle the lanes around into
44147 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44148 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44149 gen_lowpart (V4DImode, d->op1),
44150 GEN_INT (0x20)));
44151 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44152 gen_lowpart (V4DImode, d->op1),
44153 GEN_INT (0x31)));
44154
44155 /* Swap the 2nd and 3rd position in each lane into
44156 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44157 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44158 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44159 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44160 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44161
44162 /* Now an vpunpck[lh]qdq will produce
44163 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44164 if (odd)
44165 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44166 gen_lowpart (V4DImode, t2));
44167 else
44168 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44169 gen_lowpart (V4DImode, t2));
44170 emit_insn (t3);
44171 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44172 break;
44173
44174 default:
44175 gcc_unreachable ();
44176 }
44177
44178 return true;
44179 }
44180
44181 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44182 extract-even and extract-odd permutations. */
44183
44184 static bool
44185 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44186 {
44187 unsigned i, odd, nelt = d->nelt;
44188
44189 odd = d->perm[0];
44190 if (odd != 0 && odd != 1)
44191 return false;
44192
44193 for (i = 1; i < nelt; ++i)
44194 if (d->perm[i] != 2 * i + odd)
44195 return false;
44196
44197 return expand_vec_perm_even_odd_1 (d, odd);
44198 }
44199
44200 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44201 permutations. We assume that expand_vec_perm_1 has already failed. */
44202
44203 static bool
44204 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44205 {
44206 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44207 enum machine_mode vmode = d->vmode;
44208 unsigned char perm2[4];
44209 rtx op0 = d->op0, dest;
44210 bool ok;
44211
44212 switch (vmode)
44213 {
44214 case V4DFmode:
44215 case V8SFmode:
44216 /* These are special-cased in sse.md so that we can optionally
44217 use the vbroadcast instruction. They expand to two insns
44218 if the input happens to be in a register. */
44219 gcc_unreachable ();
44220
44221 case V2DFmode:
44222 case V2DImode:
44223 case V4SFmode:
44224 case V4SImode:
44225 /* These are always implementable using standard shuffle patterns. */
44226 gcc_unreachable ();
44227
44228 case V8HImode:
44229 case V16QImode:
44230 /* These can be implemented via interleave. We save one insn by
44231 stopping once we have promoted to V4SImode and then use pshufd. */
44232 if (d->testing_p)
44233 return true;
44234 do
44235 {
44236 rtx dest;
44237 rtx (*gen) (rtx, rtx, rtx)
44238 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44239 : gen_vec_interleave_lowv8hi;
44240
44241 if (elt >= nelt2)
44242 {
44243 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44244 : gen_vec_interleave_highv8hi;
44245 elt -= nelt2;
44246 }
44247 nelt2 /= 2;
44248
44249 dest = gen_reg_rtx (vmode);
44250 emit_insn (gen (dest, op0, op0));
44251 vmode = get_mode_wider_vector (vmode);
44252 op0 = gen_lowpart (vmode, dest);
44253 }
44254 while (vmode != V4SImode);
44255
44256 memset (perm2, elt, 4);
44257 dest = gen_reg_rtx (V4SImode);
44258 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44259 gcc_assert (ok);
44260 if (!d->testing_p)
44261 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44262 return true;
44263
44264 case V32QImode:
44265 case V16HImode:
44266 case V8SImode:
44267 case V4DImode:
44268 /* For AVX2 broadcasts of the first element vpbroadcast* or
44269 vpermq should be used by expand_vec_perm_1. */
44270 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44271 return false;
44272
44273 default:
44274 gcc_unreachable ();
44275 }
44276 }
44277
44278 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44279 broadcast permutations. */
44280
44281 static bool
44282 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44283 {
44284 unsigned i, elt, nelt = d->nelt;
44285
44286 if (!d->one_operand_p)
44287 return false;
44288
44289 elt = d->perm[0];
44290 for (i = 1; i < nelt; ++i)
44291 if (d->perm[i] != elt)
44292 return false;
44293
44294 return expand_vec_perm_broadcast_1 (d);
44295 }
44296
44297 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44298 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44299 all the shorter instruction sequences. */
44300
44301 static bool
44302 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44303 {
44304 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44305 unsigned int i, nelt, eltsz;
44306 bool used[4];
44307
44308 if (!TARGET_AVX2
44309 || d->one_operand_p
44310 || (d->vmode != V32QImode && d->vmode != V16HImode))
44311 return false;
44312
44313 if (d->testing_p)
44314 return true;
44315
44316 nelt = d->nelt;
44317 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44318
44319 /* Generate 4 permutation masks. If the required element is within
44320 the same lane, it is shuffled in. If the required element from the
44321 other lane, force a zero by setting bit 7 in the permutation mask.
44322 In the other mask the mask has non-negative elements if element
44323 is requested from the other lane, but also moved to the other lane,
44324 so that the result of vpshufb can have the two V2TImode halves
44325 swapped. */
44326 m128 = GEN_INT (-128);
44327 for (i = 0; i < 32; ++i)
44328 {
44329 rperm[0][i] = m128;
44330 rperm[1][i] = m128;
44331 rperm[2][i] = m128;
44332 rperm[3][i] = m128;
44333 }
44334 used[0] = false;
44335 used[1] = false;
44336 used[2] = false;
44337 used[3] = false;
44338 for (i = 0; i < nelt; ++i)
44339 {
44340 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44341 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44342 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44343
44344 for (j = 0; j < eltsz; ++j)
44345 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44346 used[which] = true;
44347 }
44348
44349 for (i = 0; i < 2; ++i)
44350 {
44351 if (!used[2 * i + 1])
44352 {
44353 h[i] = NULL_RTX;
44354 continue;
44355 }
44356 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44357 gen_rtvec_v (32, rperm[2 * i + 1]));
44358 vperm = force_reg (V32QImode, vperm);
44359 h[i] = gen_reg_rtx (V32QImode);
44360 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44361 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44362 }
44363
44364 /* Swap the 128-byte lanes of h[X]. */
44365 for (i = 0; i < 2; ++i)
44366 {
44367 if (h[i] == NULL_RTX)
44368 continue;
44369 op = gen_reg_rtx (V4DImode);
44370 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44371 const2_rtx, GEN_INT (3), const0_rtx,
44372 const1_rtx));
44373 h[i] = gen_lowpart (V32QImode, op);
44374 }
44375
44376 for (i = 0; i < 2; ++i)
44377 {
44378 if (!used[2 * i])
44379 {
44380 l[i] = NULL_RTX;
44381 continue;
44382 }
44383 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44384 vperm = force_reg (V32QImode, vperm);
44385 l[i] = gen_reg_rtx (V32QImode);
44386 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44387 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44388 }
44389
44390 for (i = 0; i < 2; ++i)
44391 {
44392 if (h[i] && l[i])
44393 {
44394 op = gen_reg_rtx (V32QImode);
44395 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44396 l[i] = op;
44397 }
44398 else if (h[i])
44399 l[i] = h[i];
44400 }
44401
44402 gcc_assert (l[0] && l[1]);
44403 op = d->target;
44404 if (d->vmode != V32QImode)
44405 op = gen_reg_rtx (V32QImode);
44406 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44407 if (op != d->target)
44408 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44409 return true;
44410 }
44411
44412 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44413 With all of the interface bits taken care of, perform the expansion
44414 in D and return true on success. */
44415
44416 static bool
44417 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44418 {
44419 /* Try a single instruction expansion. */
44420 if (expand_vec_perm_1 (d))
44421 return true;
44422
44423 /* Try sequences of two instructions. */
44424
44425 if (expand_vec_perm_pshuflw_pshufhw (d))
44426 return true;
44427
44428 if (expand_vec_perm_palignr (d))
44429 return true;
44430
44431 if (expand_vec_perm_interleave2 (d))
44432 return true;
44433
44434 if (expand_vec_perm_broadcast (d))
44435 return true;
44436
44437 if (expand_vec_perm_vpermq_perm_1 (d))
44438 return true;
44439
44440 if (expand_vec_perm_vperm2f128 (d))
44441 return true;
44442
44443 /* Try sequences of three instructions. */
44444
44445 if (expand_vec_perm_2vperm2f128_vshuf (d))
44446 return true;
44447
44448 if (expand_vec_perm_pshufb2 (d))
44449 return true;
44450
44451 if (expand_vec_perm_interleave3 (d))
44452 return true;
44453
44454 if (expand_vec_perm_vperm2f128_vblend (d))
44455 return true;
44456
44457 /* Try sequences of four instructions. */
44458
44459 if (expand_vec_perm_vpshufb2_vpermq (d))
44460 return true;
44461
44462 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44463 return true;
44464
44465 /* ??? Look for narrow permutations whose element orderings would
44466 allow the promotion to a wider mode. */
44467
44468 /* ??? Look for sequences of interleave or a wider permute that place
44469 the data into the correct lanes for a half-vector shuffle like
44470 pshuf[lh]w or vpermilps. */
44471
44472 /* ??? Look for sequences of interleave that produce the desired results.
44473 The combinatorics of punpck[lh] get pretty ugly... */
44474
44475 if (expand_vec_perm_even_odd (d))
44476 return true;
44477
44478 /* Even longer sequences. */
44479 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44480 return true;
44481
44482 return false;
44483 }
44484
44485 /* If a permutation only uses one operand, make it clear. Returns true
44486 if the permutation references both operands. */
44487
44488 static bool
44489 canonicalize_perm (struct expand_vec_perm_d *d)
44490 {
44491 int i, which, nelt = d->nelt;
44492
44493 for (i = which = 0; i < nelt; ++i)
44494 which |= (d->perm[i] < nelt ? 1 : 2);
44495
44496 d->one_operand_p = true;
44497 switch (which)
44498 {
44499 default:
44500 gcc_unreachable();
44501
44502 case 3:
44503 if (!rtx_equal_p (d->op0, d->op1))
44504 {
44505 d->one_operand_p = false;
44506 break;
44507 }
44508 /* The elements of PERM do not suggest that only the first operand
44509 is used, but both operands are identical. Allow easier matching
44510 of the permutation by folding the permutation into the single
44511 input vector. */
44512 /* FALLTHRU */
44513
44514 case 2:
44515 for (i = 0; i < nelt; ++i)
44516 d->perm[i] &= nelt - 1;
44517 d->op0 = d->op1;
44518 break;
44519
44520 case 1:
44521 d->op1 = d->op0;
44522 break;
44523 }
44524
44525 return (which == 3);
44526 }
44527
44528 bool
44529 ix86_expand_vec_perm_const (rtx operands[4])
44530 {
44531 struct expand_vec_perm_d d;
44532 unsigned char perm[MAX_VECT_LEN];
44533 int i, nelt;
44534 bool two_args;
44535 rtx sel;
44536
44537 d.target = operands[0];
44538 d.op0 = operands[1];
44539 d.op1 = operands[2];
44540 sel = operands[3];
44541
44542 d.vmode = GET_MODE (d.target);
44543 gcc_assert (VECTOR_MODE_P (d.vmode));
44544 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44545 d.testing_p = false;
44546
44547 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44548 gcc_assert (XVECLEN (sel, 0) == nelt);
44549 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44550
44551 for (i = 0; i < nelt; ++i)
44552 {
44553 rtx e = XVECEXP (sel, 0, i);
44554 int ei = INTVAL (e) & (2 * nelt - 1);
44555 d.perm[i] = ei;
44556 perm[i] = ei;
44557 }
44558
44559 two_args = canonicalize_perm (&d);
44560
44561 if (ix86_expand_vec_perm_const_1 (&d))
44562 return true;
44563
44564 /* If the selector says both arguments are needed, but the operands are the
44565 same, the above tried to expand with one_operand_p and flattened selector.
44566 If that didn't work, retry without one_operand_p; we succeeded with that
44567 during testing. */
44568 if (two_args && d.one_operand_p)
44569 {
44570 d.one_operand_p = false;
44571 memcpy (d.perm, perm, sizeof (perm));
44572 return ix86_expand_vec_perm_const_1 (&d);
44573 }
44574
44575 return false;
44576 }
44577
44578 /* Implement targetm.vectorize.vec_perm_const_ok. */
44579
44580 static bool
44581 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44582 const unsigned char *sel)
44583 {
44584 struct expand_vec_perm_d d;
44585 unsigned int i, nelt, which;
44586 bool ret;
44587
44588 d.vmode = vmode;
44589 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44590 d.testing_p = true;
44591
44592 /* Given sufficient ISA support we can just return true here
44593 for selected vector modes. */
44594 if (d.vmode == V16SImode || d.vmode == V16SFmode
44595 || d.vmode == V8DFmode || d.vmode == V8DImode)
44596 /* All implementable with a single vpermi2 insn. */
44597 return true;
44598 if (GET_MODE_SIZE (d.vmode) == 16)
44599 {
44600 /* All implementable with a single vpperm insn. */
44601 if (TARGET_XOP)
44602 return true;
44603 /* All implementable with 2 pshufb + 1 ior. */
44604 if (TARGET_SSSE3)
44605 return true;
44606 /* All implementable with shufpd or unpck[lh]pd. */
44607 if (d.nelt == 2)
44608 return true;
44609 }
44610
44611 /* Extract the values from the vector CST into the permutation
44612 array in D. */
44613 memcpy (d.perm, sel, nelt);
44614 for (i = which = 0; i < nelt; ++i)
44615 {
44616 unsigned char e = d.perm[i];
44617 gcc_assert (e < 2 * nelt);
44618 which |= (e < nelt ? 1 : 2);
44619 }
44620
44621 /* For all elements from second vector, fold the elements to first. */
44622 if (which == 2)
44623 for (i = 0; i < nelt; ++i)
44624 d.perm[i] -= nelt;
44625
44626 /* Check whether the mask can be applied to the vector type. */
44627 d.one_operand_p = (which != 3);
44628
44629 /* Implementable with shufps or pshufd. */
44630 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44631 return true;
44632
44633 /* Otherwise we have to go through the motions and see if we can
44634 figure out how to generate the requested permutation. */
44635 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44636 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44637 if (!d.one_operand_p)
44638 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44639
44640 start_sequence ();
44641 ret = ix86_expand_vec_perm_const_1 (&d);
44642 end_sequence ();
44643
44644 return ret;
44645 }
44646
44647 void
44648 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44649 {
44650 struct expand_vec_perm_d d;
44651 unsigned i, nelt;
44652
44653 d.target = targ;
44654 d.op0 = op0;
44655 d.op1 = op1;
44656 d.vmode = GET_MODE (targ);
44657 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44658 d.one_operand_p = false;
44659 d.testing_p = false;
44660
44661 for (i = 0; i < nelt; ++i)
44662 d.perm[i] = i * 2 + odd;
44663
44664 /* We'll either be able to implement the permutation directly... */
44665 if (expand_vec_perm_1 (&d))
44666 return;
44667
44668 /* ... or we use the special-case patterns. */
44669 expand_vec_perm_even_odd_1 (&d, odd);
44670 }
44671
44672 static void
44673 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44674 {
44675 struct expand_vec_perm_d d;
44676 unsigned i, nelt, base;
44677 bool ok;
44678
44679 d.target = targ;
44680 d.op0 = op0;
44681 d.op1 = op1;
44682 d.vmode = GET_MODE (targ);
44683 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44684 d.one_operand_p = false;
44685 d.testing_p = false;
44686
44687 base = high_p ? nelt / 2 : 0;
44688 for (i = 0; i < nelt / 2; ++i)
44689 {
44690 d.perm[i * 2] = i + base;
44691 d.perm[i * 2 + 1] = i + base + nelt;
44692 }
44693
44694 /* Note that for AVX this isn't one instruction. */
44695 ok = ix86_expand_vec_perm_const_1 (&d);
44696 gcc_assert (ok);
44697 }
44698
44699
44700 /* Expand a vector operation CODE for a V*QImode in terms of the
44701 same operation on V*HImode. */
44702
44703 void
44704 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44705 {
44706 enum machine_mode qimode = GET_MODE (dest);
44707 enum machine_mode himode;
44708 rtx (*gen_il) (rtx, rtx, rtx);
44709 rtx (*gen_ih) (rtx, rtx, rtx);
44710 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44711 struct expand_vec_perm_d d;
44712 bool ok, full_interleave;
44713 bool uns_p = false;
44714 int i;
44715
44716 switch (qimode)
44717 {
44718 case V16QImode:
44719 himode = V8HImode;
44720 gen_il = gen_vec_interleave_lowv16qi;
44721 gen_ih = gen_vec_interleave_highv16qi;
44722 break;
44723 case V32QImode:
44724 himode = V16HImode;
44725 gen_il = gen_avx2_interleave_lowv32qi;
44726 gen_ih = gen_avx2_interleave_highv32qi;
44727 break;
44728 default:
44729 gcc_unreachable ();
44730 }
44731
44732 op2_l = op2_h = op2;
44733 switch (code)
44734 {
44735 case MULT:
44736 /* Unpack data such that we've got a source byte in each low byte of
44737 each word. We don't care what goes into the high byte of each word.
44738 Rather than trying to get zero in there, most convenient is to let
44739 it be a copy of the low byte. */
44740 op2_l = gen_reg_rtx (qimode);
44741 op2_h = gen_reg_rtx (qimode);
44742 emit_insn (gen_il (op2_l, op2, op2));
44743 emit_insn (gen_ih (op2_h, op2, op2));
44744 /* FALLTHRU */
44745
44746 op1_l = gen_reg_rtx (qimode);
44747 op1_h = gen_reg_rtx (qimode);
44748 emit_insn (gen_il (op1_l, op1, op1));
44749 emit_insn (gen_ih (op1_h, op1, op1));
44750 full_interleave = qimode == V16QImode;
44751 break;
44752
44753 case ASHIFT:
44754 case LSHIFTRT:
44755 uns_p = true;
44756 /* FALLTHRU */
44757 case ASHIFTRT:
44758 op1_l = gen_reg_rtx (himode);
44759 op1_h = gen_reg_rtx (himode);
44760 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44761 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44762 full_interleave = true;
44763 break;
44764 default:
44765 gcc_unreachable ();
44766 }
44767
44768 /* Perform the operation. */
44769 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44770 1, OPTAB_DIRECT);
44771 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44772 1, OPTAB_DIRECT);
44773 gcc_assert (res_l && res_h);
44774
44775 /* Merge the data back into the right place. */
44776 d.target = dest;
44777 d.op0 = gen_lowpart (qimode, res_l);
44778 d.op1 = gen_lowpart (qimode, res_h);
44779 d.vmode = qimode;
44780 d.nelt = GET_MODE_NUNITS (qimode);
44781 d.one_operand_p = false;
44782 d.testing_p = false;
44783
44784 if (full_interleave)
44785 {
44786 /* For SSE2, we used an full interleave, so the desired
44787 results are in the even elements. */
44788 for (i = 0; i < 32; ++i)
44789 d.perm[i] = i * 2;
44790 }
44791 else
44792 {
44793 /* For AVX, the interleave used above was not cross-lane. So the
44794 extraction is evens but with the second and third quarter swapped.
44795 Happily, that is even one insn shorter than even extraction. */
44796 for (i = 0; i < 32; ++i)
44797 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
44798 }
44799
44800 ok = ix86_expand_vec_perm_const_1 (&d);
44801 gcc_assert (ok);
44802
44803 set_unique_reg_note (get_last_insn (), REG_EQUAL,
44804 gen_rtx_fmt_ee (code, qimode, op1, op2));
44805 }
44806
44807 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
44808 if op is CONST_VECTOR with all odd elements equal to their
44809 preceding element. */
44810
44811 static bool
44812 const_vector_equal_evenodd_p (rtx op)
44813 {
44814 enum machine_mode mode = GET_MODE (op);
44815 int i, nunits = GET_MODE_NUNITS (mode);
44816 if (GET_CODE (op) != CONST_VECTOR
44817 || nunits != CONST_VECTOR_NUNITS (op))
44818 return false;
44819 for (i = 0; i < nunits; i += 2)
44820 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
44821 return false;
44822 return true;
44823 }
44824
44825 void
44826 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
44827 bool uns_p, bool odd_p)
44828 {
44829 enum machine_mode mode = GET_MODE (op1);
44830 enum machine_mode wmode = GET_MODE (dest);
44831 rtx x;
44832 rtx orig_op1 = op1, orig_op2 = op2;
44833
44834 if (!nonimmediate_operand (op1, mode))
44835 op1 = force_reg (mode, op1);
44836 if (!nonimmediate_operand (op2, mode))
44837 op2 = force_reg (mode, op2);
44838
44839 /* We only play even/odd games with vectors of SImode. */
44840 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
44841
44842 /* If we're looking for the odd results, shift those members down to
44843 the even slots. For some cpus this is faster than a PSHUFD. */
44844 if (odd_p)
44845 {
44846 /* For XOP use vpmacsdqh, but only for smult, as it is only
44847 signed. */
44848 if (TARGET_XOP && mode == V4SImode && !uns_p)
44849 {
44850 x = force_reg (wmode, CONST0_RTX (wmode));
44851 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
44852 return;
44853 }
44854
44855 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
44856 if (!const_vector_equal_evenodd_p (orig_op1))
44857 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
44858 x, NULL, 1, OPTAB_DIRECT);
44859 if (!const_vector_equal_evenodd_p (orig_op2))
44860 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
44861 x, NULL, 1, OPTAB_DIRECT);
44862 op1 = gen_lowpart (mode, op1);
44863 op2 = gen_lowpart (mode, op2);
44864 }
44865
44866 if (mode == V16SImode)
44867 {
44868 if (uns_p)
44869 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
44870 else
44871 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
44872 }
44873 else if (mode == V8SImode)
44874 {
44875 if (uns_p)
44876 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
44877 else
44878 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
44879 }
44880 else if (uns_p)
44881 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
44882 else if (TARGET_SSE4_1)
44883 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
44884 else
44885 {
44886 rtx s1, s2, t0, t1, t2;
44887
44888 /* The easiest way to implement this without PMULDQ is to go through
44889 the motions as if we are performing a full 64-bit multiply. With
44890 the exception that we need to do less shuffling of the elements. */
44891
44892 /* Compute the sign-extension, aka highparts, of the two operands. */
44893 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
44894 op1, pc_rtx, pc_rtx);
44895 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
44896 op2, pc_rtx, pc_rtx);
44897
44898 /* Multiply LO(A) * HI(B), and vice-versa. */
44899 t1 = gen_reg_rtx (wmode);
44900 t2 = gen_reg_rtx (wmode);
44901 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
44902 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
44903
44904 /* Multiply LO(A) * LO(B). */
44905 t0 = gen_reg_rtx (wmode);
44906 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
44907
44908 /* Combine and shift the highparts into place. */
44909 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
44910 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
44911 1, OPTAB_DIRECT);
44912
44913 /* Combine high and low parts. */
44914 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
44915 return;
44916 }
44917 emit_insn (x);
44918 }
44919
44920 void
44921 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
44922 bool uns_p, bool high_p)
44923 {
44924 enum machine_mode wmode = GET_MODE (dest);
44925 enum machine_mode mode = GET_MODE (op1);
44926 rtx t1, t2, t3, t4, mask;
44927
44928 switch (mode)
44929 {
44930 case V4SImode:
44931 t1 = gen_reg_rtx (mode);
44932 t2 = gen_reg_rtx (mode);
44933 if (TARGET_XOP && !uns_p)
44934 {
44935 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
44936 shuffle the elements once so that all elements are in the right
44937 place for immediate use: { A C B D }. */
44938 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
44939 const1_rtx, GEN_INT (3)));
44940 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
44941 const1_rtx, GEN_INT (3)));
44942 }
44943 else
44944 {
44945 /* Put the elements into place for the multiply. */
44946 ix86_expand_vec_interleave (t1, op1, op1, high_p);
44947 ix86_expand_vec_interleave (t2, op2, op2, high_p);
44948 high_p = false;
44949 }
44950 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
44951 break;
44952
44953 case V8SImode:
44954 /* Shuffle the elements between the lanes. After this we
44955 have { A B E F | C D G H } for each operand. */
44956 t1 = gen_reg_rtx (V4DImode);
44957 t2 = gen_reg_rtx (V4DImode);
44958 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
44959 const0_rtx, const2_rtx,
44960 const1_rtx, GEN_INT (3)));
44961 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
44962 const0_rtx, const2_rtx,
44963 const1_rtx, GEN_INT (3)));
44964
44965 /* Shuffle the elements within the lanes. After this we
44966 have { A A B B | C C D D } or { E E F F | G G H H }. */
44967 t3 = gen_reg_rtx (V8SImode);
44968 t4 = gen_reg_rtx (V8SImode);
44969 mask = GEN_INT (high_p
44970 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
44971 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
44972 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
44973 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
44974
44975 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
44976 break;
44977
44978 case V8HImode:
44979 case V16HImode:
44980 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
44981 uns_p, OPTAB_DIRECT);
44982 t2 = expand_binop (mode,
44983 uns_p ? umul_highpart_optab : smul_highpart_optab,
44984 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
44985 gcc_assert (t1 && t2);
44986
44987 t3 = gen_reg_rtx (mode);
44988 ix86_expand_vec_interleave (t3, t1, t2, high_p);
44989 emit_move_insn (dest, gen_lowpart (wmode, t3));
44990 break;
44991
44992 case V16QImode:
44993 case V32QImode:
44994 t1 = gen_reg_rtx (wmode);
44995 t2 = gen_reg_rtx (wmode);
44996 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
44997 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
44998
44999 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45000 break;
45001
45002 default:
45003 gcc_unreachable ();
45004 }
45005 }
45006
45007 void
45008 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45009 {
45010 rtx res_1, res_2, res_3, res_4;
45011
45012 res_1 = gen_reg_rtx (V4SImode);
45013 res_2 = gen_reg_rtx (V4SImode);
45014 res_3 = gen_reg_rtx (V2DImode);
45015 res_4 = gen_reg_rtx (V2DImode);
45016 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45017 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45018
45019 /* Move the results in element 2 down to element 1; we don't care
45020 what goes in elements 2 and 3. Then we can merge the parts
45021 back together with an interleave.
45022
45023 Note that two other sequences were tried:
45024 (1) Use interleaves at the start instead of psrldq, which allows
45025 us to use a single shufps to merge things back at the end.
45026 (2) Use shufps here to combine the two vectors, then pshufd to
45027 put the elements in the correct order.
45028 In both cases the cost of the reformatting stall was too high
45029 and the overall sequence slower. */
45030
45031 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45032 const0_rtx, const2_rtx,
45033 const0_rtx, const0_rtx));
45034 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45035 const0_rtx, const2_rtx,
45036 const0_rtx, const0_rtx));
45037 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45038
45039 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45040 }
45041
45042 void
45043 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45044 {
45045 enum machine_mode mode = GET_MODE (op0);
45046 rtx t1, t2, t3, t4, t5, t6;
45047
45048 if (TARGET_XOP && mode == V2DImode)
45049 {
45050 /* op1: A,B,C,D, op2: E,F,G,H */
45051 op1 = gen_lowpart (V4SImode, op1);
45052 op2 = gen_lowpart (V4SImode, op2);
45053
45054 t1 = gen_reg_rtx (V4SImode);
45055 t2 = gen_reg_rtx (V4SImode);
45056 t3 = gen_reg_rtx (V2DImode);
45057 t4 = gen_reg_rtx (V2DImode);
45058
45059 /* t1: B,A,D,C */
45060 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45061 GEN_INT (1),
45062 GEN_INT (0),
45063 GEN_INT (3),
45064 GEN_INT (2)));
45065
45066 /* t2: (B*E),(A*F),(D*G),(C*H) */
45067 emit_insn (gen_mulv4si3 (t2, t1, op2));
45068
45069 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45070 emit_insn (gen_xop_phadddq (t3, t2));
45071
45072 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45073 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45074
45075 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
45076 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
45077 }
45078 else
45079 {
45080 enum machine_mode nmode;
45081 rtx (*umul) (rtx, rtx, rtx);
45082
45083 if (mode == V2DImode)
45084 {
45085 umul = gen_vec_widen_umult_even_v4si;
45086 nmode = V4SImode;
45087 }
45088 else if (mode == V4DImode)
45089 {
45090 umul = gen_vec_widen_umult_even_v8si;
45091 nmode = V8SImode;
45092 }
45093 else if (mode == V8DImode)
45094 {
45095 umul = gen_vec_widen_umult_even_v16si;
45096 nmode = V16SImode;
45097 }
45098 else
45099 gcc_unreachable ();
45100
45101
45102 /* Multiply low parts. */
45103 t1 = gen_reg_rtx (mode);
45104 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45105
45106 /* Shift input vectors right 32 bits so we can multiply high parts. */
45107 t6 = GEN_INT (32);
45108 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45109 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45110
45111 /* Multiply high parts by low parts. */
45112 t4 = gen_reg_rtx (mode);
45113 t5 = gen_reg_rtx (mode);
45114 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45115 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45116
45117 /* Combine and shift the highparts back. */
45118 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45119 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45120
45121 /* Combine high and low parts. */
45122 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45123 }
45124
45125 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45126 gen_rtx_MULT (mode, op1, op2));
45127 }
45128
45129 /* Calculate integer abs() using only SSE2 instructions. */
45130
45131 void
45132 ix86_expand_sse2_abs (rtx target, rtx input)
45133 {
45134 enum machine_mode mode = GET_MODE (target);
45135 rtx tmp0, tmp1, x;
45136
45137 switch (mode)
45138 {
45139 /* For 32-bit signed integer X, the best way to calculate the absolute
45140 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45141 case V4SImode:
45142 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45143 GEN_INT (GET_MODE_BITSIZE
45144 (GET_MODE_INNER (mode)) - 1),
45145 NULL, 0, OPTAB_DIRECT);
45146 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45147 NULL, 0, OPTAB_DIRECT);
45148 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45149 target, 0, OPTAB_DIRECT);
45150 break;
45151
45152 /* For 16-bit signed integer X, the best way to calculate the absolute
45153 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45154 case V8HImode:
45155 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45156
45157 x = expand_simple_binop (mode, SMAX, tmp0, input,
45158 target, 0, OPTAB_DIRECT);
45159 break;
45160
45161 /* For 8-bit signed integer X, the best way to calculate the absolute
45162 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45163 as SSE2 provides the PMINUB insn. */
45164 case V16QImode:
45165 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45166
45167 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45168 target, 0, OPTAB_DIRECT);
45169 break;
45170
45171 default:
45172 gcc_unreachable ();
45173 }
45174
45175 if (x != target)
45176 emit_move_insn (target, x);
45177 }
45178
45179 /* Expand an insert into a vector register through pinsr insn.
45180 Return true if successful. */
45181
45182 bool
45183 ix86_expand_pinsr (rtx *operands)
45184 {
45185 rtx dst = operands[0];
45186 rtx src = operands[3];
45187
45188 unsigned int size = INTVAL (operands[1]);
45189 unsigned int pos = INTVAL (operands[2]);
45190
45191 if (GET_CODE (dst) == SUBREG)
45192 {
45193 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45194 dst = SUBREG_REG (dst);
45195 }
45196
45197 if (GET_CODE (src) == SUBREG)
45198 src = SUBREG_REG (src);
45199
45200 switch (GET_MODE (dst))
45201 {
45202 case V16QImode:
45203 case V8HImode:
45204 case V4SImode:
45205 case V2DImode:
45206 {
45207 enum machine_mode srcmode, dstmode;
45208 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45209
45210 srcmode = mode_for_size (size, MODE_INT, 0);
45211
45212 switch (srcmode)
45213 {
45214 case QImode:
45215 if (!TARGET_SSE4_1)
45216 return false;
45217 dstmode = V16QImode;
45218 pinsr = gen_sse4_1_pinsrb;
45219 break;
45220
45221 case HImode:
45222 if (!TARGET_SSE2)
45223 return false;
45224 dstmode = V8HImode;
45225 pinsr = gen_sse2_pinsrw;
45226 break;
45227
45228 case SImode:
45229 if (!TARGET_SSE4_1)
45230 return false;
45231 dstmode = V4SImode;
45232 pinsr = gen_sse4_1_pinsrd;
45233 break;
45234
45235 case DImode:
45236 gcc_assert (TARGET_64BIT);
45237 if (!TARGET_SSE4_1)
45238 return false;
45239 dstmode = V2DImode;
45240 pinsr = gen_sse4_1_pinsrq;
45241 break;
45242
45243 default:
45244 return false;
45245 }
45246
45247 rtx d = dst;
45248 if (GET_MODE (dst) != dstmode)
45249 d = gen_reg_rtx (dstmode);
45250 src = gen_lowpart (srcmode, src);
45251
45252 pos /= size;
45253
45254 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45255 GEN_INT (1 << pos)));
45256 if (d != dst)
45257 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45258 return true;
45259 }
45260
45261 default:
45262 return false;
45263 }
45264 }
45265 \f
45266 /* This function returns the calling abi specific va_list type node.
45267 It returns the FNDECL specific va_list type. */
45268
45269 static tree
45270 ix86_fn_abi_va_list (tree fndecl)
45271 {
45272 if (!TARGET_64BIT)
45273 return va_list_type_node;
45274 gcc_assert (fndecl != NULL_TREE);
45275
45276 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45277 return ms_va_list_type_node;
45278 else
45279 return sysv_va_list_type_node;
45280 }
45281
45282 /* Returns the canonical va_list type specified by TYPE. If there
45283 is no valid TYPE provided, it return NULL_TREE. */
45284
45285 static tree
45286 ix86_canonical_va_list_type (tree type)
45287 {
45288 tree wtype, htype;
45289
45290 /* Resolve references and pointers to va_list type. */
45291 if (TREE_CODE (type) == MEM_REF)
45292 type = TREE_TYPE (type);
45293 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45294 type = TREE_TYPE (type);
45295 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45296 type = TREE_TYPE (type);
45297
45298 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45299 {
45300 wtype = va_list_type_node;
45301 gcc_assert (wtype != NULL_TREE);
45302 htype = type;
45303 if (TREE_CODE (wtype) == ARRAY_TYPE)
45304 {
45305 /* If va_list is an array type, the argument may have decayed
45306 to a pointer type, e.g. by being passed to another function.
45307 In that case, unwrap both types so that we can compare the
45308 underlying records. */
45309 if (TREE_CODE (htype) == ARRAY_TYPE
45310 || POINTER_TYPE_P (htype))
45311 {
45312 wtype = TREE_TYPE (wtype);
45313 htype = TREE_TYPE (htype);
45314 }
45315 }
45316 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45317 return va_list_type_node;
45318 wtype = sysv_va_list_type_node;
45319 gcc_assert (wtype != NULL_TREE);
45320 htype = type;
45321 if (TREE_CODE (wtype) == ARRAY_TYPE)
45322 {
45323 /* If va_list is an array type, the argument may have decayed
45324 to a pointer type, e.g. by being passed to another function.
45325 In that case, unwrap both types so that we can compare the
45326 underlying records. */
45327 if (TREE_CODE (htype) == ARRAY_TYPE
45328 || POINTER_TYPE_P (htype))
45329 {
45330 wtype = TREE_TYPE (wtype);
45331 htype = TREE_TYPE (htype);
45332 }
45333 }
45334 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45335 return sysv_va_list_type_node;
45336 wtype = ms_va_list_type_node;
45337 gcc_assert (wtype != NULL_TREE);
45338 htype = type;
45339 if (TREE_CODE (wtype) == ARRAY_TYPE)
45340 {
45341 /* If va_list is an array type, the argument may have decayed
45342 to a pointer type, e.g. by being passed to another function.
45343 In that case, unwrap both types so that we can compare the
45344 underlying records. */
45345 if (TREE_CODE (htype) == ARRAY_TYPE
45346 || POINTER_TYPE_P (htype))
45347 {
45348 wtype = TREE_TYPE (wtype);
45349 htype = TREE_TYPE (htype);
45350 }
45351 }
45352 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45353 return ms_va_list_type_node;
45354 return NULL_TREE;
45355 }
45356 return std_canonical_va_list_type (type);
45357 }
45358
45359 /* Iterate through the target-specific builtin types for va_list.
45360 IDX denotes the iterator, *PTREE is set to the result type of
45361 the va_list builtin, and *PNAME to its internal type.
45362 Returns zero if there is no element for this index, otherwise
45363 IDX should be increased upon the next call.
45364 Note, do not iterate a base builtin's name like __builtin_va_list.
45365 Used from c_common_nodes_and_builtins. */
45366
45367 static int
45368 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45369 {
45370 if (TARGET_64BIT)
45371 {
45372 switch (idx)
45373 {
45374 default:
45375 break;
45376
45377 case 0:
45378 *ptree = ms_va_list_type_node;
45379 *pname = "__builtin_ms_va_list";
45380 return 1;
45381
45382 case 1:
45383 *ptree = sysv_va_list_type_node;
45384 *pname = "__builtin_sysv_va_list";
45385 return 1;
45386 }
45387 }
45388
45389 return 0;
45390 }
45391
45392 #undef TARGET_SCHED_DISPATCH
45393 #define TARGET_SCHED_DISPATCH has_dispatch
45394 #undef TARGET_SCHED_DISPATCH_DO
45395 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45396 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45397 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45398 #undef TARGET_SCHED_REORDER
45399 #define TARGET_SCHED_REORDER ix86_sched_reorder
45400 #undef TARGET_SCHED_ADJUST_PRIORITY
45401 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45402 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45403 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45404 ix86_dependencies_evaluation_hook
45405
45406 /* The size of the dispatch window is the total number of bytes of
45407 object code allowed in a window. */
45408 #define DISPATCH_WINDOW_SIZE 16
45409
45410 /* Number of dispatch windows considered for scheduling. */
45411 #define MAX_DISPATCH_WINDOWS 3
45412
45413 /* Maximum number of instructions in a window. */
45414 #define MAX_INSN 4
45415
45416 /* Maximum number of immediate operands in a window. */
45417 #define MAX_IMM 4
45418
45419 /* Maximum number of immediate bits allowed in a window. */
45420 #define MAX_IMM_SIZE 128
45421
45422 /* Maximum number of 32 bit immediates allowed in a window. */
45423 #define MAX_IMM_32 4
45424
45425 /* Maximum number of 64 bit immediates allowed in a window. */
45426 #define MAX_IMM_64 2
45427
45428 /* Maximum total of loads or prefetches allowed in a window. */
45429 #define MAX_LOAD 2
45430
45431 /* Maximum total of stores allowed in a window. */
45432 #define MAX_STORE 1
45433
45434 #undef BIG
45435 #define BIG 100
45436
45437
45438 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45439 enum dispatch_group {
45440 disp_no_group = 0,
45441 disp_load,
45442 disp_store,
45443 disp_load_store,
45444 disp_prefetch,
45445 disp_imm,
45446 disp_imm_32,
45447 disp_imm_64,
45448 disp_branch,
45449 disp_cmp,
45450 disp_jcc,
45451 disp_last
45452 };
45453
45454 /* Number of allowable groups in a dispatch window. It is an array
45455 indexed by dispatch_group enum. 100 is used as a big number,
45456 because the number of these kind of operations does not have any
45457 effect in dispatch window, but we need them for other reasons in
45458 the table. */
45459 static unsigned int num_allowable_groups[disp_last] = {
45460 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45461 };
45462
45463 char group_name[disp_last + 1][16] = {
45464 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45465 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45466 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45467 };
45468
45469 /* Instruction path. */
45470 enum insn_path {
45471 no_path = 0,
45472 path_single, /* Single micro op. */
45473 path_double, /* Double micro op. */
45474 path_multi, /* Instructions with more than 2 micro op.. */
45475 last_path
45476 };
45477
45478 /* sched_insn_info defines a window to the instructions scheduled in
45479 the basic block. It contains a pointer to the insn_info table and
45480 the instruction scheduled.
45481
45482 Windows are allocated for each basic block and are linked
45483 together. */
45484 typedef struct sched_insn_info_s {
45485 rtx insn;
45486 enum dispatch_group group;
45487 enum insn_path path;
45488 int byte_len;
45489 int imm_bytes;
45490 } sched_insn_info;
45491
45492 /* Linked list of dispatch windows. This is a two way list of
45493 dispatch windows of a basic block. It contains information about
45494 the number of uops in the window and the total number of
45495 instructions and of bytes in the object code for this dispatch
45496 window. */
45497 typedef struct dispatch_windows_s {
45498 int num_insn; /* Number of insn in the window. */
45499 int num_uops; /* Number of uops in the window. */
45500 int window_size; /* Number of bytes in the window. */
45501 int window_num; /* Window number between 0 or 1. */
45502 int num_imm; /* Number of immediates in an insn. */
45503 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45504 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45505 int imm_size; /* Total immediates in the window. */
45506 int num_loads; /* Total memory loads in the window. */
45507 int num_stores; /* Total memory stores in the window. */
45508 int violation; /* Violation exists in window. */
45509 sched_insn_info *window; /* Pointer to the window. */
45510 struct dispatch_windows_s *next;
45511 struct dispatch_windows_s *prev;
45512 } dispatch_windows;
45513
45514 /* Immediate valuse used in an insn. */
45515 typedef struct imm_info_s
45516 {
45517 int imm;
45518 int imm32;
45519 int imm64;
45520 } imm_info;
45521
45522 static dispatch_windows *dispatch_window_list;
45523 static dispatch_windows *dispatch_window_list1;
45524
45525 /* Get dispatch group of insn. */
45526
45527 static enum dispatch_group
45528 get_mem_group (rtx insn)
45529 {
45530 enum attr_memory memory;
45531
45532 if (INSN_CODE (insn) < 0)
45533 return disp_no_group;
45534 memory = get_attr_memory (insn);
45535 if (memory == MEMORY_STORE)
45536 return disp_store;
45537
45538 if (memory == MEMORY_LOAD)
45539 return disp_load;
45540
45541 if (memory == MEMORY_BOTH)
45542 return disp_load_store;
45543
45544 return disp_no_group;
45545 }
45546
45547 /* Return true if insn is a compare instruction. */
45548
45549 static bool
45550 is_cmp (rtx insn)
45551 {
45552 enum attr_type type;
45553
45554 type = get_attr_type (insn);
45555 return (type == TYPE_TEST
45556 || type == TYPE_ICMP
45557 || type == TYPE_FCMP
45558 || GET_CODE (PATTERN (insn)) == COMPARE);
45559 }
45560
45561 /* Return true if a dispatch violation encountered. */
45562
45563 static bool
45564 dispatch_violation (void)
45565 {
45566 if (dispatch_window_list->next)
45567 return dispatch_window_list->next->violation;
45568 return dispatch_window_list->violation;
45569 }
45570
45571 /* Return true if insn is a branch instruction. */
45572
45573 static bool
45574 is_branch (rtx insn)
45575 {
45576 return (CALL_P (insn) || JUMP_P (insn));
45577 }
45578
45579 /* Return true if insn is a prefetch instruction. */
45580
45581 static bool
45582 is_prefetch (rtx insn)
45583 {
45584 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45585 }
45586
45587 /* This function initializes a dispatch window and the list container holding a
45588 pointer to the window. */
45589
45590 static void
45591 init_window (int window_num)
45592 {
45593 int i;
45594 dispatch_windows *new_list;
45595
45596 if (window_num == 0)
45597 new_list = dispatch_window_list;
45598 else
45599 new_list = dispatch_window_list1;
45600
45601 new_list->num_insn = 0;
45602 new_list->num_uops = 0;
45603 new_list->window_size = 0;
45604 new_list->next = NULL;
45605 new_list->prev = NULL;
45606 new_list->window_num = window_num;
45607 new_list->num_imm = 0;
45608 new_list->num_imm_32 = 0;
45609 new_list->num_imm_64 = 0;
45610 new_list->imm_size = 0;
45611 new_list->num_loads = 0;
45612 new_list->num_stores = 0;
45613 new_list->violation = false;
45614
45615 for (i = 0; i < MAX_INSN; i++)
45616 {
45617 new_list->window[i].insn = NULL;
45618 new_list->window[i].group = disp_no_group;
45619 new_list->window[i].path = no_path;
45620 new_list->window[i].byte_len = 0;
45621 new_list->window[i].imm_bytes = 0;
45622 }
45623 return;
45624 }
45625
45626 /* This function allocates and initializes a dispatch window and the
45627 list container holding a pointer to the window. */
45628
45629 static dispatch_windows *
45630 allocate_window (void)
45631 {
45632 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45633 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45634
45635 return new_list;
45636 }
45637
45638 /* This routine initializes the dispatch scheduling information. It
45639 initiates building dispatch scheduler tables and constructs the
45640 first dispatch window. */
45641
45642 static void
45643 init_dispatch_sched (void)
45644 {
45645 /* Allocate a dispatch list and a window. */
45646 dispatch_window_list = allocate_window ();
45647 dispatch_window_list1 = allocate_window ();
45648 init_window (0);
45649 init_window (1);
45650 }
45651
45652 /* This function returns true if a branch is detected. End of a basic block
45653 does not have to be a branch, but here we assume only branches end a
45654 window. */
45655
45656 static bool
45657 is_end_basic_block (enum dispatch_group group)
45658 {
45659 return group == disp_branch;
45660 }
45661
45662 /* This function is called when the end of a window processing is reached. */
45663
45664 static void
45665 process_end_window (void)
45666 {
45667 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45668 if (dispatch_window_list->next)
45669 {
45670 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45671 gcc_assert (dispatch_window_list->window_size
45672 + dispatch_window_list1->window_size <= 48);
45673 init_window (1);
45674 }
45675 init_window (0);
45676 }
45677
45678 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45679 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45680 for 48 bytes of instructions. Note that these windows are not dispatch
45681 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45682
45683 static dispatch_windows *
45684 allocate_next_window (int window_num)
45685 {
45686 if (window_num == 0)
45687 {
45688 if (dispatch_window_list->next)
45689 init_window (1);
45690 init_window (0);
45691 return dispatch_window_list;
45692 }
45693
45694 dispatch_window_list->next = dispatch_window_list1;
45695 dispatch_window_list1->prev = dispatch_window_list;
45696
45697 return dispatch_window_list1;
45698 }
45699
45700 /* Increment the number of immediate operands of an instruction. */
45701
45702 static int
45703 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45704 {
45705 if (*in_rtx == 0)
45706 return 0;
45707
45708 switch ( GET_CODE (*in_rtx))
45709 {
45710 case CONST:
45711 case SYMBOL_REF:
45712 case CONST_INT:
45713 (imm_values->imm)++;
45714 if (x86_64_immediate_operand (*in_rtx, SImode))
45715 (imm_values->imm32)++;
45716 else
45717 (imm_values->imm64)++;
45718 break;
45719
45720 case CONST_DOUBLE:
45721 (imm_values->imm)++;
45722 (imm_values->imm64)++;
45723 break;
45724
45725 case CODE_LABEL:
45726 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45727 {
45728 (imm_values->imm)++;
45729 (imm_values->imm32)++;
45730 }
45731 break;
45732
45733 default:
45734 break;
45735 }
45736
45737 return 0;
45738 }
45739
45740 /* Compute number of immediate operands of an instruction. */
45741
45742 static void
45743 find_constant (rtx in_rtx, imm_info *imm_values)
45744 {
45745 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45746 (rtx_function) find_constant_1, (void *) imm_values);
45747 }
45748
45749 /* Return total size of immediate operands of an instruction along with number
45750 of corresponding immediate-operands. It initializes its parameters to zero
45751 befor calling FIND_CONSTANT.
45752 INSN is the input instruction. IMM is the total of immediates.
45753 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45754 bit immediates. */
45755
45756 static int
45757 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45758 {
45759 imm_info imm_values = {0, 0, 0};
45760
45761 find_constant (insn, &imm_values);
45762 *imm = imm_values.imm;
45763 *imm32 = imm_values.imm32;
45764 *imm64 = imm_values.imm64;
45765 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45766 }
45767
45768 /* This function indicates if an operand of an instruction is an
45769 immediate. */
45770
45771 static bool
45772 has_immediate (rtx insn)
45773 {
45774 int num_imm_operand;
45775 int num_imm32_operand;
45776 int num_imm64_operand;
45777
45778 if (insn)
45779 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45780 &num_imm64_operand);
45781 return false;
45782 }
45783
45784 /* Return single or double path for instructions. */
45785
45786 static enum insn_path
45787 get_insn_path (rtx insn)
45788 {
45789 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
45790
45791 if ((int)path == 0)
45792 return path_single;
45793
45794 if ((int)path == 1)
45795 return path_double;
45796
45797 return path_multi;
45798 }
45799
45800 /* Return insn dispatch group. */
45801
45802 static enum dispatch_group
45803 get_insn_group (rtx insn)
45804 {
45805 enum dispatch_group group = get_mem_group (insn);
45806 if (group)
45807 return group;
45808
45809 if (is_branch (insn))
45810 return disp_branch;
45811
45812 if (is_cmp (insn))
45813 return disp_cmp;
45814
45815 if (has_immediate (insn))
45816 return disp_imm;
45817
45818 if (is_prefetch (insn))
45819 return disp_prefetch;
45820
45821 return disp_no_group;
45822 }
45823
45824 /* Count number of GROUP restricted instructions in a dispatch
45825 window WINDOW_LIST. */
45826
45827 static int
45828 count_num_restricted (rtx insn, dispatch_windows *window_list)
45829 {
45830 enum dispatch_group group = get_insn_group (insn);
45831 int imm_size;
45832 int num_imm_operand;
45833 int num_imm32_operand;
45834 int num_imm64_operand;
45835
45836 if (group == disp_no_group)
45837 return 0;
45838
45839 if (group == disp_imm)
45840 {
45841 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45842 &num_imm64_operand);
45843 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
45844 || num_imm_operand + window_list->num_imm > MAX_IMM
45845 || (num_imm32_operand > 0
45846 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
45847 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
45848 || (num_imm64_operand > 0
45849 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
45850 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
45851 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
45852 && num_imm64_operand > 0
45853 && ((window_list->num_imm_64 > 0
45854 && window_list->num_insn >= 2)
45855 || window_list->num_insn >= 3)))
45856 return BIG;
45857
45858 return 1;
45859 }
45860
45861 if ((group == disp_load_store
45862 && (window_list->num_loads >= MAX_LOAD
45863 || window_list->num_stores >= MAX_STORE))
45864 || ((group == disp_load
45865 || group == disp_prefetch)
45866 && window_list->num_loads >= MAX_LOAD)
45867 || (group == disp_store
45868 && window_list->num_stores >= MAX_STORE))
45869 return BIG;
45870
45871 return 1;
45872 }
45873
45874 /* This function returns true if insn satisfies dispatch rules on the
45875 last window scheduled. */
45876
45877 static bool
45878 fits_dispatch_window (rtx insn)
45879 {
45880 dispatch_windows *window_list = dispatch_window_list;
45881 dispatch_windows *window_list_next = dispatch_window_list->next;
45882 unsigned int num_restrict;
45883 enum dispatch_group group = get_insn_group (insn);
45884 enum insn_path path = get_insn_path (insn);
45885 int sum;
45886
45887 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
45888 instructions should be given the lowest priority in the
45889 scheduling process in Haifa scheduler to make sure they will be
45890 scheduled in the same dispatch window as the reference to them. */
45891 if (group == disp_jcc || group == disp_cmp)
45892 return false;
45893
45894 /* Check nonrestricted. */
45895 if (group == disp_no_group || group == disp_branch)
45896 return true;
45897
45898 /* Get last dispatch window. */
45899 if (window_list_next)
45900 window_list = window_list_next;
45901
45902 if (window_list->window_num == 1)
45903 {
45904 sum = window_list->prev->window_size + window_list->window_size;
45905
45906 if (sum == 32
45907 || (min_insn_size (insn) + sum) >= 48)
45908 /* Window 1 is full. Go for next window. */
45909 return true;
45910 }
45911
45912 num_restrict = count_num_restricted (insn, window_list);
45913
45914 if (num_restrict > num_allowable_groups[group])
45915 return false;
45916
45917 /* See if it fits in the first window. */
45918 if (window_list->window_num == 0)
45919 {
45920 /* The first widow should have only single and double path
45921 uops. */
45922 if (path == path_double
45923 && (window_list->num_uops + 2) > MAX_INSN)
45924 return false;
45925 else if (path != path_single)
45926 return false;
45927 }
45928 return true;
45929 }
45930
45931 /* Add an instruction INSN with NUM_UOPS micro-operations to the
45932 dispatch window WINDOW_LIST. */
45933
45934 static void
45935 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
45936 {
45937 int byte_len = min_insn_size (insn);
45938 int num_insn = window_list->num_insn;
45939 int imm_size;
45940 sched_insn_info *window = window_list->window;
45941 enum dispatch_group group = get_insn_group (insn);
45942 enum insn_path path = get_insn_path (insn);
45943 int num_imm_operand;
45944 int num_imm32_operand;
45945 int num_imm64_operand;
45946
45947 if (!window_list->violation && group != disp_cmp
45948 && !fits_dispatch_window (insn))
45949 window_list->violation = true;
45950
45951 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45952 &num_imm64_operand);
45953
45954 /* Initialize window with new instruction. */
45955 window[num_insn].insn = insn;
45956 window[num_insn].byte_len = byte_len;
45957 window[num_insn].group = group;
45958 window[num_insn].path = path;
45959 window[num_insn].imm_bytes = imm_size;
45960
45961 window_list->window_size += byte_len;
45962 window_list->num_insn = num_insn + 1;
45963 window_list->num_uops = window_list->num_uops + num_uops;
45964 window_list->imm_size += imm_size;
45965 window_list->num_imm += num_imm_operand;
45966 window_list->num_imm_32 += num_imm32_operand;
45967 window_list->num_imm_64 += num_imm64_operand;
45968
45969 if (group == disp_store)
45970 window_list->num_stores += 1;
45971 else if (group == disp_load
45972 || group == disp_prefetch)
45973 window_list->num_loads += 1;
45974 else if (group == disp_load_store)
45975 {
45976 window_list->num_stores += 1;
45977 window_list->num_loads += 1;
45978 }
45979 }
45980
45981 /* Adds a scheduled instruction, INSN, to the current dispatch window.
45982 If the total bytes of instructions or the number of instructions in
45983 the window exceed allowable, it allocates a new window. */
45984
45985 static void
45986 add_to_dispatch_window (rtx insn)
45987 {
45988 int byte_len;
45989 dispatch_windows *window_list;
45990 dispatch_windows *next_list;
45991 dispatch_windows *window0_list;
45992 enum insn_path path;
45993 enum dispatch_group insn_group;
45994 bool insn_fits;
45995 int num_insn;
45996 int num_uops;
45997 int window_num;
45998 int insn_num_uops;
45999 int sum;
46000
46001 if (INSN_CODE (insn) < 0)
46002 return;
46003
46004 byte_len = min_insn_size (insn);
46005 window_list = dispatch_window_list;
46006 next_list = window_list->next;
46007 path = get_insn_path (insn);
46008 insn_group = get_insn_group (insn);
46009
46010 /* Get the last dispatch window. */
46011 if (next_list)
46012 window_list = dispatch_window_list->next;
46013
46014 if (path == path_single)
46015 insn_num_uops = 1;
46016 else if (path == path_double)
46017 insn_num_uops = 2;
46018 else
46019 insn_num_uops = (int) path;
46020
46021 /* If current window is full, get a new window.
46022 Window number zero is full, if MAX_INSN uops are scheduled in it.
46023 Window number one is full, if window zero's bytes plus window
46024 one's bytes is 32, or if the bytes of the new instruction added
46025 to the total makes it greater than 48, or it has already MAX_INSN
46026 instructions in it. */
46027 num_insn = window_list->num_insn;
46028 num_uops = window_list->num_uops;
46029 window_num = window_list->window_num;
46030 insn_fits = fits_dispatch_window (insn);
46031
46032 if (num_insn >= MAX_INSN
46033 || num_uops + insn_num_uops > MAX_INSN
46034 || !(insn_fits))
46035 {
46036 window_num = ~window_num & 1;
46037 window_list = allocate_next_window (window_num);
46038 }
46039
46040 if (window_num == 0)
46041 {
46042 add_insn_window (insn, window_list, insn_num_uops);
46043 if (window_list->num_insn >= MAX_INSN
46044 && insn_group == disp_branch)
46045 {
46046 process_end_window ();
46047 return;
46048 }
46049 }
46050 else if (window_num == 1)
46051 {
46052 window0_list = window_list->prev;
46053 sum = window0_list->window_size + window_list->window_size;
46054 if (sum == 32
46055 || (byte_len + sum) >= 48)
46056 {
46057 process_end_window ();
46058 window_list = dispatch_window_list;
46059 }
46060
46061 add_insn_window (insn, window_list, insn_num_uops);
46062 }
46063 else
46064 gcc_unreachable ();
46065
46066 if (is_end_basic_block (insn_group))
46067 {
46068 /* End of basic block is reached do end-basic-block process. */
46069 process_end_window ();
46070 return;
46071 }
46072 }
46073
46074 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46075
46076 DEBUG_FUNCTION static void
46077 debug_dispatch_window_file (FILE *file, int window_num)
46078 {
46079 dispatch_windows *list;
46080 int i;
46081
46082 if (window_num == 0)
46083 list = dispatch_window_list;
46084 else
46085 list = dispatch_window_list1;
46086
46087 fprintf (file, "Window #%d:\n", list->window_num);
46088 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46089 list->num_insn, list->num_uops, list->window_size);
46090 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46091 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46092
46093 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46094 list->num_stores);
46095 fprintf (file, " insn info:\n");
46096
46097 for (i = 0; i < MAX_INSN; i++)
46098 {
46099 if (!list->window[i].insn)
46100 break;
46101 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46102 i, group_name[list->window[i].group],
46103 i, (void *)list->window[i].insn,
46104 i, list->window[i].path,
46105 i, list->window[i].byte_len,
46106 i, list->window[i].imm_bytes);
46107 }
46108 }
46109
46110 /* Print to stdout a dispatch window. */
46111
46112 DEBUG_FUNCTION void
46113 debug_dispatch_window (int window_num)
46114 {
46115 debug_dispatch_window_file (stdout, window_num);
46116 }
46117
46118 /* Print INSN dispatch information to FILE. */
46119
46120 DEBUG_FUNCTION static void
46121 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46122 {
46123 int byte_len;
46124 enum insn_path path;
46125 enum dispatch_group group;
46126 int imm_size;
46127 int num_imm_operand;
46128 int num_imm32_operand;
46129 int num_imm64_operand;
46130
46131 if (INSN_CODE (insn) < 0)
46132 return;
46133
46134 byte_len = min_insn_size (insn);
46135 path = get_insn_path (insn);
46136 group = get_insn_group (insn);
46137 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46138 &num_imm64_operand);
46139
46140 fprintf (file, " insn info:\n");
46141 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46142 group_name[group], path, byte_len);
46143 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46144 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46145 }
46146
46147 /* Print to STDERR the status of the ready list with respect to
46148 dispatch windows. */
46149
46150 DEBUG_FUNCTION void
46151 debug_ready_dispatch (void)
46152 {
46153 int i;
46154 int no_ready = number_in_ready ();
46155
46156 fprintf (stdout, "Number of ready: %d\n", no_ready);
46157
46158 for (i = 0; i < no_ready; i++)
46159 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46160 }
46161
46162 /* This routine is the driver of the dispatch scheduler. */
46163
46164 static void
46165 do_dispatch (rtx insn, int mode)
46166 {
46167 if (mode == DISPATCH_INIT)
46168 init_dispatch_sched ();
46169 else if (mode == ADD_TO_DISPATCH_WINDOW)
46170 add_to_dispatch_window (insn);
46171 }
46172
46173 /* Return TRUE if Dispatch Scheduling is supported. */
46174
46175 static bool
46176 has_dispatch (rtx insn, int action)
46177 {
46178 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46179 && flag_dispatch_scheduler)
46180 switch (action)
46181 {
46182 default:
46183 return false;
46184
46185 case IS_DISPATCH_ON:
46186 return true;
46187 break;
46188
46189 case IS_CMP:
46190 return is_cmp (insn);
46191
46192 case DISPATCH_VIOLATION:
46193 return dispatch_violation ();
46194
46195 case FITS_DISPATCH_WINDOW:
46196 return fits_dispatch_window (insn);
46197 }
46198
46199 return false;
46200 }
46201
46202 /* Implementation of reassociation_width target hook used by
46203 reassoc phase to identify parallelism level in reassociated
46204 tree. Statements tree_code is passed in OPC. Arguments type
46205 is passed in MODE.
46206
46207 Currently parallel reassociation is enabled for Atom
46208 processors only and we set reassociation width to be 2
46209 because Atom may issue up to 2 instructions per cycle.
46210
46211 Return value should be fixed if parallel reassociation is
46212 enabled for other processors. */
46213
46214 static int
46215 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
46216 enum machine_mode mode)
46217 {
46218 int res = 1;
46219
46220 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46221 res = 2;
46222 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46223 res = 2;
46224
46225 return res;
46226 }
46227
46228 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46229 place emms and femms instructions. */
46230
46231 static enum machine_mode
46232 ix86_preferred_simd_mode (enum machine_mode mode)
46233 {
46234 if (!TARGET_SSE)
46235 return word_mode;
46236
46237 switch (mode)
46238 {
46239 case QImode:
46240 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46241 case HImode:
46242 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46243 case SImode:
46244 return TARGET_AVX512F ? V16SImode :
46245 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46246 case DImode:
46247 return TARGET_AVX512F ? V8DImode :
46248 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46249
46250 case SFmode:
46251 if (TARGET_AVX512F)
46252 return V16SFmode;
46253 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46254 return V8SFmode;
46255 else
46256 return V4SFmode;
46257
46258 case DFmode:
46259 if (!TARGET_VECTORIZE_DOUBLE)
46260 return word_mode;
46261 else if (TARGET_AVX512F)
46262 return V8DFmode;
46263 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46264 return V4DFmode;
46265 else if (TARGET_SSE2)
46266 return V2DFmode;
46267 /* FALLTHRU */
46268
46269 default:
46270 return word_mode;
46271 }
46272 }
46273
46274 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46275 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46276 256bit and 128bit vectors. */
46277
46278 static unsigned int
46279 ix86_autovectorize_vector_sizes (void)
46280 {
46281 return TARGET_AVX512F ? 64 | 32 | 16 :
46282 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46283 }
46284
46285 \f
46286
46287 /* Return class of registers which could be used for pseudo of MODE
46288 and of class RCLASS for spilling instead of memory. Return NO_REGS
46289 if it is not possible or non-profitable. */
46290 static reg_class_t
46291 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46292 {
46293 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46294 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46295 && INTEGER_CLASS_P (rclass))
46296 return ALL_SSE_REGS;
46297 return NO_REGS;
46298 }
46299
46300 /* Implement targetm.vectorize.init_cost. */
46301
46302 static void *
46303 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
46304 {
46305 unsigned *cost = XNEWVEC (unsigned, 3);
46306 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46307 return cost;
46308 }
46309
46310 /* Implement targetm.vectorize.add_stmt_cost. */
46311
46312 static unsigned
46313 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46314 struct _stmt_vec_info *stmt_info, int misalign,
46315 enum vect_cost_model_location where)
46316 {
46317 unsigned *cost = (unsigned *) data;
46318 unsigned retval = 0;
46319
46320 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46321 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46322
46323 /* Statements in an inner loop relative to the loop being
46324 vectorized are weighted more heavily. The value here is
46325 arbitrary and could potentially be improved with analysis. */
46326 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46327 count *= 50; /* FIXME. */
46328
46329 retval = (unsigned) (count * stmt_cost);
46330
46331 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46332 for Silvermont as it has out of order integer pipeline and can execute
46333 2 scalar instruction per tick, but has in order SIMD pipeline. */
46334 if (TARGET_SILVERMONT || TARGET_INTEL)
46335 if (stmt_info && stmt_info->stmt)
46336 {
46337 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46338 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46339 retval = (retval * 17) / 10;
46340 }
46341
46342 cost[where] += retval;
46343
46344 return retval;
46345 }
46346
46347 /* Implement targetm.vectorize.finish_cost. */
46348
46349 static void
46350 ix86_finish_cost (void *data, unsigned *prologue_cost,
46351 unsigned *body_cost, unsigned *epilogue_cost)
46352 {
46353 unsigned *cost = (unsigned *) data;
46354 *prologue_cost = cost[vect_prologue];
46355 *body_cost = cost[vect_body];
46356 *epilogue_cost = cost[vect_epilogue];
46357 }
46358
46359 /* Implement targetm.vectorize.destroy_cost_data. */
46360
46361 static void
46362 ix86_destroy_cost_data (void *data)
46363 {
46364 free (data);
46365 }
46366
46367 /* Validate target specific memory model bits in VAL. */
46368
46369 static unsigned HOST_WIDE_INT
46370 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46371 {
46372 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46373 bool strong;
46374
46375 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46376 |MEMMODEL_MASK)
46377 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46378 {
46379 warning (OPT_Winvalid_memory_model,
46380 "Unknown architecture specific memory model");
46381 return MEMMODEL_SEQ_CST;
46382 }
46383 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46384 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46385 {
46386 warning (OPT_Winvalid_memory_model,
46387 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46388 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46389 }
46390 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46391 {
46392 warning (OPT_Winvalid_memory_model,
46393 "HLE_RELEASE not used with RELEASE or stronger memory model");
46394 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46395 }
46396 return val;
46397 }
46398
46399 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46400 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46401 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46402 or number of vecsize_mangle variants that should be emitted. */
46403
46404 static int
46405 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46406 struct cgraph_simd_clone *clonei,
46407 tree base_type, int num)
46408 {
46409 int ret = 1;
46410
46411 if (clonei->simdlen
46412 && (clonei->simdlen < 2
46413 || clonei->simdlen > 16
46414 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46415 {
46416 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46417 "unsupported simdlen %d", clonei->simdlen);
46418 return 0;
46419 }
46420
46421 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46422 if (TREE_CODE (ret_type) != VOID_TYPE)
46423 switch (TYPE_MODE (ret_type))
46424 {
46425 case QImode:
46426 case HImode:
46427 case SImode:
46428 case DImode:
46429 case SFmode:
46430 case DFmode:
46431 /* case SCmode: */
46432 /* case DCmode: */
46433 break;
46434 default:
46435 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46436 "unsupported return type %qT for simd\n", ret_type);
46437 return 0;
46438 }
46439
46440 tree t;
46441 int i;
46442
46443 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46444 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46445 switch (TYPE_MODE (TREE_TYPE (t)))
46446 {
46447 case QImode:
46448 case HImode:
46449 case SImode:
46450 case DImode:
46451 case SFmode:
46452 case DFmode:
46453 /* case SCmode: */
46454 /* case DCmode: */
46455 break;
46456 default:
46457 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46458 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46459 return 0;
46460 }
46461
46462 if (clonei->cilk_elemental)
46463 {
46464 /* Parse here processor clause. If not present, default to 'b'. */
46465 clonei->vecsize_mangle = 'b';
46466 }
46467 else if (!TREE_PUBLIC (node->decl))
46468 {
46469 /* If the function isn't exported, we can pick up just one ISA
46470 for the clones. */
46471 if (TARGET_AVX2)
46472 clonei->vecsize_mangle = 'd';
46473 else if (TARGET_AVX)
46474 clonei->vecsize_mangle = 'c';
46475 else
46476 clonei->vecsize_mangle = 'b';
46477 ret = 1;
46478 }
46479 else
46480 {
46481 clonei->vecsize_mangle = "bcd"[num];
46482 ret = 3;
46483 }
46484 switch (clonei->vecsize_mangle)
46485 {
46486 case 'b':
46487 clonei->vecsize_int = 128;
46488 clonei->vecsize_float = 128;
46489 break;
46490 case 'c':
46491 clonei->vecsize_int = 128;
46492 clonei->vecsize_float = 256;
46493 break;
46494 case 'd':
46495 clonei->vecsize_int = 256;
46496 clonei->vecsize_float = 256;
46497 break;
46498 }
46499 if (clonei->simdlen == 0)
46500 {
46501 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46502 clonei->simdlen = clonei->vecsize_int;
46503 else
46504 clonei->simdlen = clonei->vecsize_float;
46505 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46506 if (clonei->simdlen > 16)
46507 clonei->simdlen = 16;
46508 }
46509 return ret;
46510 }
46511
46512 /* Add target attribute to SIMD clone NODE if needed. */
46513
46514 static void
46515 ix86_simd_clone_adjust (struct cgraph_node *node)
46516 {
46517 const char *str = NULL;
46518 gcc_assert (node->decl == cfun->decl);
46519 switch (node->simdclone->vecsize_mangle)
46520 {
46521 case 'b':
46522 if (!TARGET_SSE2)
46523 str = "sse2";
46524 break;
46525 case 'c':
46526 if (!TARGET_AVX)
46527 str = "avx";
46528 break;
46529 case 'd':
46530 if (!TARGET_AVX2)
46531 str = "avx2";
46532 break;
46533 default:
46534 gcc_unreachable ();
46535 }
46536 if (str == NULL)
46537 return;
46538 push_cfun (NULL);
46539 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46540 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46541 gcc_assert (ok);
46542 pop_cfun ();
46543 ix86_previous_fndecl = NULL_TREE;
46544 ix86_set_current_function (node->decl);
46545 }
46546
46547 /* If SIMD clone NODE can't be used in a vectorized loop
46548 in current function, return -1, otherwise return a badness of using it
46549 (0 if it is most desirable from vecsize_mangle point of view, 1
46550 slightly less desirable, etc.). */
46551
46552 static int
46553 ix86_simd_clone_usable (struct cgraph_node *node)
46554 {
46555 switch (node->simdclone->vecsize_mangle)
46556 {
46557 case 'b':
46558 if (!TARGET_SSE2)
46559 return -1;
46560 if (!TARGET_AVX)
46561 return 0;
46562 return TARGET_AVX2 ? 2 : 1;
46563 case 'c':
46564 if (!TARGET_AVX)
46565 return -1;
46566 return TARGET_AVX2 ? 1 : 0;
46567 break;
46568 case 'd':
46569 if (!TARGET_AVX2)
46570 return -1;
46571 return 0;
46572 default:
46573 gcc_unreachable ();
46574 }
46575 }
46576
46577 /* This function gives out the number of memory references.
46578 This value determines the unrolling factor for
46579 bdver3 and bdver4 architectures. */
46580
46581 static int
46582 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46583 {
46584 if (*x != NULL_RTX && MEM_P (*x))
46585 {
46586 enum machine_mode mode;
46587 unsigned int n_words;
46588
46589 mode = GET_MODE (*x);
46590 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46591
46592 if (n_words > 4)
46593 (*mem_count)+=2;
46594 else
46595 (*mem_count)+=1;
46596 }
46597 return 0;
46598 }
46599
46600 /* This function adjusts the unroll factor based on
46601 the hardware capabilities. For ex, bdver3 has
46602 a loop buffer which makes unrolling of smaller
46603 loops less important. This function decides the
46604 unroll factor using number of memory references
46605 (value 32 is used) as a heuristic. */
46606
46607 static unsigned
46608 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46609 {
46610 basic_block *bbs;
46611 rtx insn;
46612 unsigned i;
46613 unsigned mem_count = 0;
46614
46615 if (!TARGET_ADJUST_UNROLL)
46616 return nunroll;
46617
46618 /* Count the number of memory references within the loop body. */
46619 bbs = get_loop_body (loop);
46620 for (i = 0; i < loop->num_nodes; i++)
46621 {
46622 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46623 if (NONDEBUG_INSN_P (insn))
46624 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46625 }
46626 free (bbs);
46627
46628 if (mem_count && mem_count <=32)
46629 return 32/mem_count;
46630
46631 return nunroll;
46632 }
46633
46634
46635 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46636
46637 static bool
46638 ix86_float_exceptions_rounding_supported_p (void)
46639 {
46640 /* For x87 floating point with standard excess precision handling,
46641 there is no adddf3 pattern (since x87 floating point only has
46642 XFmode operations) so the default hook implementation gets this
46643 wrong. */
46644 return TARGET_80387 || TARGET_SSE_MATH;
46645 }
46646
46647 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46648
46649 static void
46650 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46651 {
46652 if (!TARGET_80387 && !TARGET_SSE_MATH)
46653 return;
46654 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46655 if (TARGET_80387)
46656 {
46657 tree fenv_index_type = build_index_type (size_int (6));
46658 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46659 tree fenv_var = create_tmp_var (fenv_type, NULL);
46660 mark_addressable (fenv_var);
46661 tree fenv_ptr = build_pointer_type (fenv_type);
46662 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46663 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46664 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46665 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46666 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46667 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46668 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46669 tree hold_fnclex = build_call_expr (fnclex, 0);
46670 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46671 hold_fnclex);
46672 *clear = build_call_expr (fnclex, 0);
46673 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46674 mark_addressable (sw_var);
46675 tree su_ptr = build_pointer_type (short_unsigned_type_node);
46676 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
46677 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
46678 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46679 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46680 exceptions_var, exceptions_x87);
46681 *update = build2 (COMPOUND_EXPR, integer_type_node,
46682 fnstsw_call, update_mod);
46683 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46684 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46685 }
46686 if (TARGET_SSE_MATH)
46687 {
46688 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46689 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46690 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46691 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46692 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46693 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46694 mxcsr_orig_var, stmxcsr_hold_call);
46695 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46696 mxcsr_orig_var,
46697 build_int_cst (unsigned_type_node, 0x1f80));
46698 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46699 build_int_cst (unsigned_type_node, 0xffffffc0));
46700 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46701 mxcsr_mod_var, hold_mod_val);
46702 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46703 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46704 hold_assign_orig, hold_assign_mod);
46705 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46706 ldmxcsr_hold_call);
46707 if (*hold)
46708 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46709 else
46710 *hold = hold_all;
46711 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46712 if (*clear)
46713 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46714 ldmxcsr_clear_call);
46715 else
46716 *clear = ldmxcsr_clear_call;
46717 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46718 tree exceptions_sse = fold_convert (integer_type_node,
46719 stxmcsr_update_call);
46720 if (*update)
46721 {
46722 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46723 exceptions_var, exceptions_sse);
46724 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46725 exceptions_var, exceptions_mod);
46726 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46727 exceptions_assign);
46728 }
46729 else
46730 *update = build2 (MODIFY_EXPR, integer_type_node,
46731 exceptions_var, exceptions_sse);
46732 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46733 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46734 ldmxcsr_update_call);
46735 }
46736 tree atomic_feraiseexcept
46737 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46738 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46739 1, exceptions_var);
46740 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46741 atomic_feraiseexcept_call);
46742 }
46743
46744 /* Initialize the GCC target structure. */
46745 #undef TARGET_RETURN_IN_MEMORY
46746 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46747
46748 #undef TARGET_LEGITIMIZE_ADDRESS
46749 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46750
46751 #undef TARGET_ATTRIBUTE_TABLE
46752 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46753 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46754 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46755 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46756 # undef TARGET_MERGE_DECL_ATTRIBUTES
46757 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46758 #endif
46759
46760 #undef TARGET_COMP_TYPE_ATTRIBUTES
46761 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46762
46763 #undef TARGET_INIT_BUILTINS
46764 #define TARGET_INIT_BUILTINS ix86_init_builtins
46765 #undef TARGET_BUILTIN_DECL
46766 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46767 #undef TARGET_EXPAND_BUILTIN
46768 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46769
46770 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46771 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46772 ix86_builtin_vectorized_function
46773
46774 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46775 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46776
46777 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46778 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46779
46780 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46781 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46782
46783 #undef TARGET_BUILTIN_RECIPROCAL
46784 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
46785
46786 #undef TARGET_ASM_FUNCTION_EPILOGUE
46787 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
46788
46789 #undef TARGET_ENCODE_SECTION_INFO
46790 #ifndef SUBTARGET_ENCODE_SECTION_INFO
46791 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
46792 #else
46793 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
46794 #endif
46795
46796 #undef TARGET_ASM_OPEN_PAREN
46797 #define TARGET_ASM_OPEN_PAREN ""
46798 #undef TARGET_ASM_CLOSE_PAREN
46799 #define TARGET_ASM_CLOSE_PAREN ""
46800
46801 #undef TARGET_ASM_BYTE_OP
46802 #define TARGET_ASM_BYTE_OP ASM_BYTE
46803
46804 #undef TARGET_ASM_ALIGNED_HI_OP
46805 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
46806 #undef TARGET_ASM_ALIGNED_SI_OP
46807 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
46808 #ifdef ASM_QUAD
46809 #undef TARGET_ASM_ALIGNED_DI_OP
46810 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
46811 #endif
46812
46813 #undef TARGET_PROFILE_BEFORE_PROLOGUE
46814 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
46815
46816 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
46817 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
46818
46819 #undef TARGET_ASM_UNALIGNED_HI_OP
46820 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
46821 #undef TARGET_ASM_UNALIGNED_SI_OP
46822 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
46823 #undef TARGET_ASM_UNALIGNED_DI_OP
46824 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
46825
46826 #undef TARGET_PRINT_OPERAND
46827 #define TARGET_PRINT_OPERAND ix86_print_operand
46828 #undef TARGET_PRINT_OPERAND_ADDRESS
46829 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
46830 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
46831 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
46832 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
46833 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
46834
46835 #undef TARGET_SCHED_INIT_GLOBAL
46836 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
46837 #undef TARGET_SCHED_ADJUST_COST
46838 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
46839 #undef TARGET_SCHED_ISSUE_RATE
46840 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
46841 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
46842 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
46843 ia32_multipass_dfa_lookahead
46844 #undef TARGET_SCHED_MACRO_FUSION_P
46845 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
46846 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
46847 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
46848
46849 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
46850 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
46851
46852 #undef TARGET_MEMMODEL_CHECK
46853 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
46854
46855 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
46856 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
46857
46858 #ifdef HAVE_AS_TLS
46859 #undef TARGET_HAVE_TLS
46860 #define TARGET_HAVE_TLS true
46861 #endif
46862 #undef TARGET_CANNOT_FORCE_CONST_MEM
46863 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
46864 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
46865 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
46866
46867 #undef TARGET_DELEGITIMIZE_ADDRESS
46868 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
46869
46870 #undef TARGET_MS_BITFIELD_LAYOUT_P
46871 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
46872
46873 #if TARGET_MACHO
46874 #undef TARGET_BINDS_LOCAL_P
46875 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
46876 #endif
46877 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46878 #undef TARGET_BINDS_LOCAL_P
46879 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
46880 #endif
46881
46882 #undef TARGET_ASM_OUTPUT_MI_THUNK
46883 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
46884 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
46885 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
46886
46887 #undef TARGET_ASM_FILE_START
46888 #define TARGET_ASM_FILE_START x86_file_start
46889
46890 #undef TARGET_OPTION_OVERRIDE
46891 #define TARGET_OPTION_OVERRIDE ix86_option_override
46892
46893 #undef TARGET_REGISTER_MOVE_COST
46894 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
46895 #undef TARGET_MEMORY_MOVE_COST
46896 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
46897 #undef TARGET_RTX_COSTS
46898 #define TARGET_RTX_COSTS ix86_rtx_costs
46899 #undef TARGET_ADDRESS_COST
46900 #define TARGET_ADDRESS_COST ix86_address_cost
46901
46902 #undef TARGET_FIXED_CONDITION_CODE_REGS
46903 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
46904 #undef TARGET_CC_MODES_COMPATIBLE
46905 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
46906
46907 #undef TARGET_MACHINE_DEPENDENT_REORG
46908 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
46909
46910 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
46911 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
46912
46913 #undef TARGET_BUILD_BUILTIN_VA_LIST
46914 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
46915
46916 #undef TARGET_FOLD_BUILTIN
46917 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
46918
46919 #undef TARGET_COMPARE_VERSION_PRIORITY
46920 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
46921
46922 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
46923 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
46924 ix86_generate_version_dispatcher_body
46925
46926 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
46927 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
46928 ix86_get_function_versions_dispatcher
46929
46930 #undef TARGET_ENUM_VA_LIST_P
46931 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
46932
46933 #undef TARGET_FN_ABI_VA_LIST
46934 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
46935
46936 #undef TARGET_CANONICAL_VA_LIST_TYPE
46937 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
46938
46939 #undef TARGET_EXPAND_BUILTIN_VA_START
46940 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
46941
46942 #undef TARGET_MD_ASM_CLOBBERS
46943 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
46944
46945 #undef TARGET_PROMOTE_PROTOTYPES
46946 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
46947 #undef TARGET_SETUP_INCOMING_VARARGS
46948 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
46949 #undef TARGET_MUST_PASS_IN_STACK
46950 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
46951 #undef TARGET_FUNCTION_ARG_ADVANCE
46952 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
46953 #undef TARGET_FUNCTION_ARG
46954 #define TARGET_FUNCTION_ARG ix86_function_arg
46955 #undef TARGET_FUNCTION_ARG_BOUNDARY
46956 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
46957 #undef TARGET_PASS_BY_REFERENCE
46958 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
46959 #undef TARGET_INTERNAL_ARG_POINTER
46960 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
46961 #undef TARGET_UPDATE_STACK_BOUNDARY
46962 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
46963 #undef TARGET_GET_DRAP_RTX
46964 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
46965 #undef TARGET_STRICT_ARGUMENT_NAMING
46966 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
46967 #undef TARGET_STATIC_CHAIN
46968 #define TARGET_STATIC_CHAIN ix86_static_chain
46969 #undef TARGET_TRAMPOLINE_INIT
46970 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
46971 #undef TARGET_RETURN_POPS_ARGS
46972 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
46973
46974 #undef TARGET_LEGITIMATE_COMBINED_INSN
46975 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
46976
46977 #undef TARGET_ASAN_SHADOW_OFFSET
46978 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
46979
46980 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
46981 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
46982
46983 #undef TARGET_SCALAR_MODE_SUPPORTED_P
46984 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
46985
46986 #undef TARGET_VECTOR_MODE_SUPPORTED_P
46987 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
46988
46989 #undef TARGET_C_MODE_FOR_SUFFIX
46990 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
46991
46992 #ifdef HAVE_AS_TLS
46993 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
46994 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
46995 #endif
46996
46997 #ifdef SUBTARGET_INSERT_ATTRIBUTES
46998 #undef TARGET_INSERT_ATTRIBUTES
46999 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47000 #endif
47001
47002 #undef TARGET_MANGLE_TYPE
47003 #define TARGET_MANGLE_TYPE ix86_mangle_type
47004
47005 #if !TARGET_MACHO
47006 #undef TARGET_STACK_PROTECT_FAIL
47007 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47008 #endif
47009
47010 #undef TARGET_FUNCTION_VALUE
47011 #define TARGET_FUNCTION_VALUE ix86_function_value
47012
47013 #undef TARGET_FUNCTION_VALUE_REGNO_P
47014 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47015
47016 #undef TARGET_PROMOTE_FUNCTION_MODE
47017 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47018
47019 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47020 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47021
47022 #undef TARGET_INSTANTIATE_DECLS
47023 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47024
47025 #undef TARGET_SECONDARY_RELOAD
47026 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47027
47028 #undef TARGET_CLASS_MAX_NREGS
47029 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47030
47031 #undef TARGET_PREFERRED_RELOAD_CLASS
47032 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47033 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47034 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47035 #undef TARGET_CLASS_LIKELY_SPILLED_P
47036 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47037
47038 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47039 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47040 ix86_builtin_vectorization_cost
47041 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47042 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47043 ix86_vectorize_vec_perm_const_ok
47044 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47045 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47046 ix86_preferred_simd_mode
47047 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47048 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47049 ix86_autovectorize_vector_sizes
47050 #undef TARGET_VECTORIZE_INIT_COST
47051 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47052 #undef TARGET_VECTORIZE_ADD_STMT_COST
47053 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47054 #undef TARGET_VECTORIZE_FINISH_COST
47055 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47056 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47057 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47058
47059 #undef TARGET_SET_CURRENT_FUNCTION
47060 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47061
47062 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47063 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47064
47065 #undef TARGET_OPTION_SAVE
47066 #define TARGET_OPTION_SAVE ix86_function_specific_save
47067
47068 #undef TARGET_OPTION_RESTORE
47069 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47070
47071 #undef TARGET_OPTION_PRINT
47072 #define TARGET_OPTION_PRINT ix86_function_specific_print
47073
47074 #undef TARGET_OPTION_FUNCTION_VERSIONS
47075 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47076
47077 #undef TARGET_CAN_INLINE_P
47078 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47079
47080 #undef TARGET_EXPAND_TO_RTL_HOOK
47081 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47082
47083 #undef TARGET_LEGITIMATE_ADDRESS_P
47084 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47085
47086 #undef TARGET_LRA_P
47087 #define TARGET_LRA_P hook_bool_void_true
47088
47089 #undef TARGET_REGISTER_PRIORITY
47090 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47091
47092 #undef TARGET_REGISTER_USAGE_LEVELING_P
47093 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47094
47095 #undef TARGET_LEGITIMATE_CONSTANT_P
47096 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47097
47098 #undef TARGET_FRAME_POINTER_REQUIRED
47099 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47100
47101 #undef TARGET_CAN_ELIMINATE
47102 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47103
47104 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47105 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47106
47107 #undef TARGET_ASM_CODE_END
47108 #define TARGET_ASM_CODE_END ix86_code_end
47109
47110 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47111 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47112
47113 #if TARGET_MACHO
47114 #undef TARGET_INIT_LIBFUNCS
47115 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47116 #endif
47117
47118 #undef TARGET_LOOP_UNROLL_ADJUST
47119 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47120
47121 #undef TARGET_SPILL_CLASS
47122 #define TARGET_SPILL_CLASS ix86_spill_class
47123
47124 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47125 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47126 ix86_simd_clone_compute_vecsize_and_simdlen
47127
47128 #undef TARGET_SIMD_CLONE_ADJUST
47129 #define TARGET_SIMD_CLONE_ADJUST \
47130 ix86_simd_clone_adjust
47131
47132 #undef TARGET_SIMD_CLONE_USABLE
47133 #define TARGET_SIMD_CLONE_USABLE \
47134 ix86_simd_clone_usable
47135
47136 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47137 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47138 ix86_float_exceptions_rounding_supported_p
47139
47140 struct gcc_target targetm = TARGET_INITIALIZER;
47141 \f
47142 #include "gt-i386.h"